In [2]:
import pandas as pd
import numpy as np
import nltk
from nltk.tokenize import sent_tokenize
import time
nltk.download("punkt")

import os
from openai import OpenAI,AzureOpenAI

from datasets import load_dataset, load_from_disk, load_metric
from transformers import pipeline, set_seed,AutoModelForSeq2SeqLM, AutoTokenizer, BertTokenizer,EncoderDecoderModel

import logging
import time
import datetime

import torch

import warnings
warnings.filterwarnings("ignore")

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\Dell\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!





In [3]:
logging.basicConfig(
    format='%(asctime)s %(levelname)-8s %(message)s',
    datefmt='%Y-%m-%d %H:%M:%S',
    level=logging.INFO, 
    filename='./logs/Hybrid Text Summary Generation.log')   

In [4]:
logging.info("==========================================================================================================")
logging.info("Hybrid Text Summarization Started ")

In [5]:
NO_OF_TEST_RECORDS = 100
logging.info(f"No of Test Records considered for analysis - {NO_OF_TEST_RECORDS}")

start_time_hts=time.time()

sample_test_df = pd.read_csv('./input/test_cleaned.csv', nrows=NO_OF_TEST_RECORDS)
sample_test_df.head()

Unnamed: 0.1,Unnamed: 0,id,highlights,article
0,0,92c514c913c0bdfe25341af9fd72b29db544099b,Experts question if packed out planes are put...,Ever noticed how plane seats appear to be gett...
1,1,2003841c7dc0e7c5b1a248f9cd536d727f27a45a,Drunk teenage boy climbed into lion enclosure ...,A drunk teenage boy had to be rescued by secur...
2,2,91b7d2311527f5c2b63a65ca98d21d9c92485149,Nottingham Forest are close to extending Dougi...,Dougie Freedman is on the verge of agreeing a ...
3,3,caabf9cbdf96eb1410295a673e953d304391bfbb,Fiorentina goalkeeper Neto has been linked wit...,Liverpool target Neto is also wanted by PSG an...
4,4,3da746a7d9afcaa659088c8366ef6347fe6b53ea,"Tell-all interview with the reality TV star, 6...",Bruce Jenner will break his silence in a two h...


In [6]:
summaries = {}

In [7]:
duration={}

# Baseline Hybrid Summarization

### Lead 3 + Seq2Seq (Ext - Abs Hybrid summarization)

In [8]:
logging.info("Lead 3 + Seq2Seq hybrid summarization")
def generate_ext_baseline(text):
    return "\n".join(sent_tokenize(text)[:3]) # First 3 sentences

pipe = pipeline("summarization", model="bert-base-uncased", max_new_tokens=80)

def generate_abs_seq2seq(text):
   pipe_out = pipe(text)
   return "\n".join(sent_tokenize(pipe_out[0]["summary_text"]))

def generate_hyb_baseline(text):
    temp_summary = generate_ext_baseline(text)
    summary = generate_abs_seq2seq(temp_summary)
    return summary

def roundTS(startTime, endTime):
    return round((endTime -startTime),4)

def avgTimePerRecord(startTime, endTime, no_of_recs):
     return round((endTime -startTime)/no_of_recs ,4)

logging.info("Generating Baseline Hybrid Summaries...")
st_baseline_hts=time.time()
sample_test_df['baseline-hyb'] = sample_test_df['article'].apply(generate_hyb_baseline)
end_baseline_hts=time.time()
logging.info(f"Baseline ATS Duration - {roundTS(st_baseline_hts, end_baseline_hts)} seconds") 

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForMaskedLM: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForMaskedLM from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForMaskedLM from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
The model 'BertForMaskedLM' is not supported for summarization. Supported models are ['BartForConditionalGeneration', 'BigBirdPegasusForConditionalGeneration', 'BlenderbotForConditionalGeneration', 'BlenderbotSmallForConditionalGeneration', 'EncoderDecoderModel', 'FSMTForConditionalGeneration', 'GPTSanJapaneseFo

In [9]:
duration['Baseline'] = avgTimePerRecord(st_baseline_hts, end_baseline_hts, NO_OF_TEST_RECORDS)

In [10]:
sample_test_df.head()  

Unnamed: 0.1,Unnamed: 0,id,highlights,article,baseline-hyb
0,0,92c514c913c0bdfe25341af9fd72b29db544099b,Experts question if packed out planes are put...,Ever noticed how plane seats appear to be gett...,ever noticed how plane seats appear to be gett...
1,1,2003841c7dc0e7c5b1a248f9cd536d727f27a45a,Drunk teenage boy climbed into lion enclosure ...,A drunk teenage boy had to be rescued by secur...,a drunk teenage boy had to be rescued by secur...
2,2,91b7d2311527f5c2b63a65ca98d21d9c92485149,Nottingham Forest are close to extending Dougi...,Dougie Freedman is on the verge of agreeing a ...,dougie freedman is on the verge of agreeing a ...
3,3,caabf9cbdf96eb1410295a673e953d304391bfbb,Fiorentina goalkeeper Neto has been linked wit...,Liverpool target Neto is also wanted by PSG an...,liverpool target neto is also wanted by psg an...
4,4,3da746a7d9afcaa659088c8366ef6347fe6b53ea,"Tell-all interview with the reality TV star, 6...",Bruce Jenner will break his silence in a two h...,bruce jenner will break his silence in a two h...


### Lead 3 + BERT (Ext - Abs Hybrid summarization)

In [11]:
logging.info("Lead 3 + BERT hybrid summarization")
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
bert_tokenizer = BertTokenizer.from_pretrained("./models/bert-base-cnn-finetuned")
bert_model_ft = EncoderDecoderModel.from_pretrained("./models/bert-base-cnn-finetuned")
bert_model_ft.to(device)


def generate_abs_bert(text):
    # cut off at BERT max length 512
    inputs = bert_tokenizer([text], padding="max_length", truncation=True, max_length=512, return_tensors="pt")
    input_ids = inputs.input_ids.to(device)
    attention_mask = inputs.attention_mask.to(device)
    output = bert_model_ft.generate(input_ids, attention_mask=attention_mask)
    return bert_tokenizer.decode(output[0], skip_special_tokens=True)

In [12]:
def generate_hyb_bert(text):
    temp_summary = generate_ext_baseline(text)
    summary = generate_abs_bert(temp_summary)
    return summary

logging.info("Generating BERT Hybrid Summaries...")
st_bert_hyb_hts=time.time()
sample_test_df['bert-hyb'] = sample_test_df['article'].apply(generate_hyb_bert)
end_bert_hyb_hts=time.time()
logging.info(f"BERT HTS Duration - {roundTS(st_bert_hyb_hts, end_bert_hyb_hts)} seconds")  

In [13]:
sample_test_df.head() 

Unnamed: 0.1,Unnamed: 0,id,highlights,article,baseline-hyb,bert-hyb
0,0,92c514c913c0bdfe25341af9fd72b29db544099b,Experts question if packed out planes are put...,Ever noticed how plane seats appear to be gett...,ever noticed how plane seats appear to be gett...,the department of transportation is happy to s...
1,1,2003841c7dc0e7c5b1a248f9cd536d727f27a45a,Drunk teenage boy climbed into lion enclosure ...,A drunk teenage boy had to be rescued by secur...,a drunk teenage boy had to be rescued by secur...,rahul kumar jumped into a lions enclosure at a...
2,2,91b7d2311527f5c2b63a65ca98d21d9c92485149,Nottingham Forest are close to extending Dougi...,Dougie Freedman is on the verge of agreeing a ...,dougie freedman is on the verge of agreeing a ...,dougie freedman is set to sign a new two year ...
3,3,caabf9cbdf96eb1410295a673e953d304391bfbb,Fiorentina goalkeeper Neto has been linked wit...,Liverpool target Neto is also wanted by PSG an...,liverpool target neto is also wanted by psg an...,liverpool were linked with a move for the 25 -...
4,4,3da746a7d9afcaa659088c8366ef6347fe6b53ea,"Tell-all interview with the reality TV star, 6...",Bruce Jenner will break his silence in a two h...,bruce jenner will break his silence in a two h...,the interview comes amid growing speculation a...


In [14]:
duration['bert-hyb'] = avgTimePerRecord(st_bert_hyb_hts, end_bert_hyb_hts, NO_OF_TEST_RECORDS)

### Lead 3 + T5 (Ext - Abs Hybrid summarization)

In [15]:
logging.info("Lead 3 + T5 hybrid summarization")
from transformers import pipeline
summarizer = pipeline("summarization", model="./models/t5-small-cnn-hf-finetuned")

def generate_abs_t5(text):
    response = summarizer(text)
    return response[0]['summary_text']

In [16]:

def generate_hyb_t5(text):
    temp_summary = generate_ext_baseline(text)
    summary = generate_abs_t5(temp_summary)
    return summary

logging.info("Generating T5 Hybrid Summaries...")
st_t5_hyb_hts=time.time()
sample_test_df['t5-hyb'] = sample_test_df['article'].apply(generate_hyb_t5)
end_t5_hyb_hts=time.time()
logging.info(f"T5 HTS Duration - {roundTS(st_t5_hyb_hts, end_t5_hyb_hts)} seconds")  


Your max_length is set to 200, but your input_length is only 148. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=74)
Your max_length is set to 200, but your input_length is only 99. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=49)
Your max_length is set to 200, but your input_length is only 88. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=44)
Your max_length is set to 200, but your input_length is only 120. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=60)
Yo

Your max_length is set to 200, but your input_length is only 149. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=74)
Your max_length is set to 200, but your input_length is only 136. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=68)
Your max_length is set to 200, but your input_length is only 127. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=63)
Your max_length is set to 200, but your input_length is only 91. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=45)
Y

Your max_length is set to 200, but your input_length is only 122. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=61)
Your max_length is set to 200, but your input_length is only 108. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=54)
Your max_length is set to 200, but your input_length is only 131. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=65)
Your max_length is set to 200, but your input_length is only 88. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=44)
Y

In [17]:
sample_test_df.head()  

Unnamed: 0.1,Unnamed: 0,id,highlights,article,baseline-hyb,bert-hyb,t5-hyb
0,0,92c514c913c0bdfe25341af9fd72b29db544099b,Experts question if packed out planes are put...,Ever noticed how plane seats appear to be gett...,ever noticed how plane seats appear to be gett...,the department of transportation is happy to s...,The shrinking space on aeroplanes is not only ...
1,1,2003841c7dc0e7c5b1a248f9cd536d727f27a45a,Drunk teenage boy climbed into lion enclosure ...,A drunk teenage boy had to be rescued by secur...,a drunk teenage boy had to be rescued by secur...,rahul kumar jumped into a lions enclosure at a...,Rahul Kumar 17 climbed over the enclosure fenc...
2,2,91b7d2311527f5c2b63a65ca98d21d9c92485149,Nottingham Forest are close to extending Dougi...,Dougie Freedman is on the verge of agreeing a ...,dougie freedman is on the verge of agreeing a ...,dougie freedman is set to sign a new two year ...,Dougie Freedman is set to sign a new two year ...
3,3,caabf9cbdf96eb1410295a673e953d304391bfbb,Fiorentina goalkeeper Neto has been linked wit...,Liverpool target Neto is also wanted by PSG an...,liverpool target neto is also wanted by psg an...,liverpool were linked with a move for the 25 -...,Liverpool target Neto is also wanted by PSG an...
4,4,3da746a7d9afcaa659088c8366ef6347fe6b53ea,"Tell-all interview with the reality TV star, 6...",Bruce Jenner will break his silence in a two h...,bruce jenner will break his silence in a two h...,the interview comes amid growing speculation a...,The former Olympian will speak in a two hour i...


In [18]:
duration['t5-hyb'] = avgTimePerRecord(st_t5_hyb_hts, end_t5_hyb_hts, NO_OF_TEST_RECORDS)

### Lead 3 + GPT 3.5 (Ext - Abs Hybrid summarization)

In [19]:
logging.info("Lead 3 + GPT3.5 hybrid summarization")
client = OpenAI(
    # This is` the default and can be omitted
    api_key='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',
)
AZURE_OPENAI_KEY = "ee16f3418f0740ce8a1a21e262a839a3"
AZURE_OPENAI_ENDPOINT = "https://tmap-openai.openai.azure.com"

In [20]:
client = AzureOpenAI(
  azure_endpoint = "https://tmap-openai.openai.azure.com/", 
  api_key="ee16f3418f0740ce8a1a21e262a839a3",  
  api_version="2023-09-15-preview")

In [21]:
# Computation of Length of article and hughlights
def article_len(row):
    return len(row['article'].split())

def highlights_len(row):
    return len(row['highlights'].split())

sample_test_df['article_len'] = sample_test_df.apply(lambda r: article_len(r), axis= 1)
sample_test_df['highlights_len'] = sample_test_df.apply(lambda r: highlights_len(r), axis= 1)

In [22]:
sorted_by_article_size_df = sample_test_df.sort_values('article_len')

In [23]:
def generate_GPT_Hyb_1_Shot(temp_summary, model, temp, max_tokens):
    summary = ""
    abs_pmt = f"""
                Your task is to create a concise, factual summary, 
                
                by selecting and combining key sentences from  the original text. 
                
                Text is delimited by triple backticks. 

                TEXT: ```{temp_summary}```
            """
    
    #create a concise summary by selecting and combining key sentences from the original text : {text}"
    try:
        response = client.chat.completions.create(
        model=model, # model = "deployment_name".
        messages=[
            {"role": "system", "content": "You are a LLM trained by OpenAI."},
            {"role":"user","content":sorted_by_article_size_df.iloc[0]['article']},
            {"role":"assistant","content":sorted_by_article_size_df.iloc[0]['highlights']},
            {"role": "user", "content": abs_pmt},
            ],
        max_tokens = max_tokens,
        temperature = temp, #top_p=.9
        n=1,
        )
        summary = response.choices[0].message.content
    except Exception as e:
        logging.info("Exception occured - {e}") 
        summary = "ERROR"
    
    return summary

In [24]:
def generate_hyb_gpt35(record, model, temp, max_tokens):
    temp_summary = generate_ext_baseline(record['article'])
    summary = generate_GPT_Hyb_1_Shot(temp_summary,model, temp, max_tokens)
    return summary

logging.info("Generating GPT3.5 Hybrid Summaries...")
st_gpt35_hyb_hts=time.time()
sample_test_df['gpt35-hyb'] = sample_test_df.apply(lambda rec: generate_hyb_gpt35(rec, 'gpt35', 0, 60), axis=1)
end_gpt35_hyb_hts=time.time()
logging.info(f"GPT 3.5 HTS Duration - {roundTS(st_gpt35_hyb_hts, end_gpt35_hyb_hts)} seconds")  


In [25]:
duration['gpt35-hyb'] = avgTimePerRecord(st_gpt35_hyb_hts, end_gpt35_hyb_hts, NO_OF_TEST_RECORDS)

In [26]:
sample_test_df.head()  

Unnamed: 0.1,Unnamed: 0,id,highlights,article,baseline-hyb,bert-hyb,t5-hyb,article_len,highlights_len,gpt35-hyb
0,0,92c514c913c0bdfe25341af9fd72b29db544099b,Experts question if packed out planes are put...,Ever noticed how plane seats appear to be gett...,ever noticed how plane seats appear to be gett...,the department of transportation is happy to s...,The shrinking space on aeroplanes is not only ...,374,36,Experts are concerned that the shrinking space...
1,1,2003841c7dc0e7c5b1a248f9cd536d727f27a45a,Drunk teenage boy climbed into lion enclosure ...,A drunk teenage boy had to be rescued by secur...,a drunk teenage boy had to be rescued by secur...,rahul kumar jumped into a lions enclosure at a...,Rahul Kumar 17 climbed over the enclosure fenc...,317,38,
2,2,91b7d2311527f5c2b63a65ca98d21d9c92485149,Nottingham Forest are close to extending Dougi...,Dougie Freedman is on the verge of agreeing a ...,dougie freedman is on the verge of agreeing a ...,dougie freedman is set to sign a new two year ...,Dougie Freedman is set to sign a new two year ...,114,35,Dougie Freedman is set to sign a new two-year ...
3,3,caabf9cbdf96eb1410295a673e953d304391bfbb,Fiorentina goalkeeper Neto has been linked wit...,Liverpool target Neto is also wanted by PSG an...,liverpool target neto is also wanted by psg an...,liverpool were linked with a move for the 25 -...,Liverpool target Neto is also wanted by PSG an...,316,44,Liverpool face competition from PSG and Spanis...
4,4,3da746a7d9afcaa659088c8366ef6347fe6b53ea,"Tell-all interview with the reality TV star, 6...",Bruce Jenner will break his silence in a two h...,bruce jenner will break his silence in a two h...,the interview comes amid growing speculation a...,The former Olympian will speak in a two hour i...,780,61,"Bruce Jenner, former Olympian and reality TV s..."


### Lead 3 + GPT 4 (Ext - Abs Hybrid summarization)

In [27]:
logging.info("Lead 3 + GPT 4 hybrid summarization")
client = AzureOpenAI(
  azure_endpoint = "https://tmap-openai.openai.azure.com/", 
  api_key="ee16f3418f0740ce8a1a21e262a839a3",  
  api_version="2023-09-15-preview")

In [28]:
def generate_hyb_gpt4(record, model, temp, max_tokens):
    temp_summary = generate_ext_baseline(record['article'])
    summary = generate_GPT_Hyb_1_Shot(temp_summary,model, temp, max_tokens)
    return summary

#sample_test_df['gpt4-hyb'] = sample_test_df['article'].apply(generate_hyb_gpt4)
logging.info("Generating GPT 4 Hybrid Summaries...")
st_gpt4_hyb_hts=time.time()
sample_test_df['gpt4-hyb'] = sample_test_df.apply(lambda rec: generate_hyb_gpt35(rec, 'gpt4', 0, 60), axis=1)
end_gpt4_hyb_hts=time.time()
logging.info(f"GPT 4 HTS Duration - {roundTS(st_gpt4_hyb_hts, end_gpt4_hyb_hts)} seconds") 


In [29]:
duration['gpt4-hyb'] = avgTimePerRecord(st_gpt4_hyb_hts, end_gpt4_hyb_hts, NO_OF_TEST_RECORDS)

In [30]:
sample_test_df.head()  

Unnamed: 0.1,Unnamed: 0,id,highlights,article,baseline-hyb,bert-hyb,t5-hyb,article_len,highlights_len,gpt35-hyb,gpt4-hyb
0,0,92c514c913c0bdfe25341af9fd72b29db544099b,Experts question if packed out planes are put...,Ever noticed how plane seats appear to be gett...,ever noticed how plane seats appear to be gett...,the department of transportation is happy to s...,The shrinking space on aeroplanes is not only ...,374,36,Experts are concerned that the shrinking space...,Experts are raising concerns that the decreasi...
1,1,2003841c7dc0e7c5b1a248f9cd536d727f27a45a,Drunk teenage boy climbed into lion enclosure ...,A drunk teenage boy had to be rescued by secur...,a drunk teenage boy had to be rescued by secur...,rahul kumar jumped into a lions enclosure at a...,Rahul Kumar 17 climbed over the enclosure fenc...,317,38,,"A drunk teenager, Rahul Kumar, was rescued by ..."
2,2,91b7d2311527f5c2b63a65ca98d21d9c92485149,Nottingham Forest are close to extending Dougi...,Dougie Freedman is on the verge of agreeing a ...,dougie freedman is on the verge of agreeing a ...,dougie freedman is set to sign a new two year ...,Dougie Freedman is set to sign a new two year ...,114,35,Dougie Freedman is set to sign a new two-year ...,Dougie Freedman is close to signing a new two-...
3,3,caabf9cbdf96eb1410295a673e953d304391bfbb,Fiorentina goalkeeper Neto has been linked wit...,Liverpool target Neto is also wanted by PSG an...,liverpool target neto is also wanted by psg an...,liverpool were linked with a move for the 25 -...,Liverpool target Neto is also wanted by PSG an...,316,44,Liverpool face competition from PSG and Spanis...,"Liverpool, PSG, and Spanish clubs are interest..."
4,4,3da746a7d9afcaa659088c8366ef6347fe6b53ea,"Tell-all interview with the reality TV star, 6...",Bruce Jenner will break his silence in a two h...,bruce jenner will break his silence in a two h...,the interview comes amid growing speculation a...,The former Olympian will speak in a two hour i...,780,61,"Bruce Jenner, former Olympian and reality TV s...",Bruce Jenner will discuss his transition to a ...


In [31]:
end_time_hts=time.time()
logging.info(f"Total HTS Duration - {roundTS(start_time_hts, end_time_hts)} seconds")


In [32]:
duration

{'Baseline': 7.4758,
 'bert-hyb': 0.4406,
 't5-hyb': 0.8835,
 'gpt35-hyb': 1.1983,
 'gpt4-hyb': 4.4695}

In [33]:
duration_df = pd.DataFrame(duration.items(), columns=['models', 'avg_inf_time'])
duration_df

Unnamed: 0,models,avg_inf_time
0,Baseline,7.4758
1,bert-hyb,0.4406
2,t5-hyb,0.8835
3,gpt35-hyb,1.1983
4,gpt4-hyb,4.4695


In [34]:
logging.info(f"MAP - HTS Average Inference Time Per Request - {duration}")

In [35]:
duration_file_path = './output/hyb-ts-duration-final.csv'
logging.info(f"Duration will be saved in this file : {duration_file_path}")
duration_df.to_csv(duration_file_path,  mode="w+")

In [36]:
file_path = './output/hyb-ts-final.csv'
logging.info(f"Summaries will be saved in this file : {file_path}")

In [37]:
sample_test_df.head(5)

Unnamed: 0.1,Unnamed: 0,id,highlights,article,baseline-hyb,bert-hyb,t5-hyb,article_len,highlights_len,gpt35-hyb,gpt4-hyb
0,0,92c514c913c0bdfe25341af9fd72b29db544099b,Experts question if packed out planes are put...,Ever noticed how plane seats appear to be gett...,ever noticed how plane seats appear to be gett...,the department of transportation is happy to s...,The shrinking space on aeroplanes is not only ...,374,36,Experts are concerned that the shrinking space...,Experts are raising concerns that the decreasi...
1,1,2003841c7dc0e7c5b1a248f9cd536d727f27a45a,Drunk teenage boy climbed into lion enclosure ...,A drunk teenage boy had to be rescued by secur...,a drunk teenage boy had to be rescued by secur...,rahul kumar jumped into a lions enclosure at a...,Rahul Kumar 17 climbed over the enclosure fenc...,317,38,,"A drunk teenager, Rahul Kumar, was rescued by ..."
2,2,91b7d2311527f5c2b63a65ca98d21d9c92485149,Nottingham Forest are close to extending Dougi...,Dougie Freedman is on the verge of agreeing a ...,dougie freedman is on the verge of agreeing a ...,dougie freedman is set to sign a new two year ...,Dougie Freedman is set to sign a new two year ...,114,35,Dougie Freedman is set to sign a new two-year ...,Dougie Freedman is close to signing a new two-...
3,3,caabf9cbdf96eb1410295a673e953d304391bfbb,Fiorentina goalkeeper Neto has been linked wit...,Liverpool target Neto is also wanted by PSG an...,liverpool target neto is also wanted by psg an...,liverpool were linked with a move for the 25 -...,Liverpool target Neto is also wanted by PSG an...,316,44,Liverpool face competition from PSG and Spanis...,"Liverpool, PSG, and Spanish clubs are interest..."
4,4,3da746a7d9afcaa659088c8366ef6347fe6b53ea,"Tell-all interview with the reality TV star, 6...",Bruce Jenner will break his silence in a two h...,bruce jenner will break his silence in a two h...,the interview comes amid growing speculation a...,The former Olympian will speak in a two hour i...,780,61,"Bruce Jenner, former Olympian and reality TV s...",Bruce Jenner will discuss his transition to a ...


In [38]:
sample_test_df.to_csv(file_path,  mode="w+")

In [39]:
read_content = pd.read_csv(file_path)
read_content.tail(5)

Unnamed: 0.2,Unnamed: 0.1,Unnamed: 0,id,highlights,article,baseline-hyb,bert-hyb,t5-hyb,article_len,highlights_len,gpt35-hyb,gpt4-hyb
95,95,95,64ee7c9eb9f1efbb7da0ce80498434c623615b84,Zlatan Ibrahimovic will line up against former...,As Zlatan Ibrahimovic famously believes the Wo...,as zlatan ibrahimovic famously believes the wo...,barcelona beat paris saint germain 3 - 1 in th...,Zlatan Ibrahimovic famously believes the World...,718,61,Barcelona are expected to progress into the Ch...,"Zlatan Ibrahimovic, who famously believes the ..."
96,96,96,5cf4682cd03238d5867027ce9492b626cd1ed011,"Jameela Jamil, 29, is convinced dental work tr...",Jameela spent GBP3 000 on having all her amalg...,jameela spent gbp3 000 on having all her amalg...,jameela jamil has the dazzling smile that you ...,Jameela Jamil spent GBP3 000 on having all her...,1346,46,"Jameela Jamil, a television presenter and form...","Jameela Jamil, a 29-year-old television presen..."
97,97,97,3815d19af18ff22be6ad6095722d7367bb7271af,"Christopher Bridger, 25, attacked three women ...",A paramedic who pretended he was gay to get cl...,a paramedic who pretended he was gay to get cl...,christopher bridger 25 attacked three women af...,Christopher Bridger 25 from Stevenage Hertford...,738,60,"Christopher Bridger, a paramedic from Stevenag...","Christopher Bridger, a 25-year-old paramedic f..."
98,98,98,fb207604ffa7e8371c622840445825db8993d4d2,Paris Saint-Germain captain Thiago Silva suffe...,Paris Saint Germain face Nice on Saturday hopi...,paris saint germain face nice on saturday hopi...,paris saint germain face nice on saturday in l...,Thiago Silva is recovering at home from a thig...,565,40,Paris Saint Germain will face Nice on Saturday...,"Paris Saint Germain, aiming to overtake Lyon f..."
99,99,99,d25d52c434a13c1df5faa593e8a097d2f501a2b6,.50-caliber bullets equipped with optical sens...,CNN You know the phrase dodging a bullet Forge...,cnn you know the phrase dodging a bullet forge...,the u. s. military says it has made great prog...,The U.S. military said this week it has made g...,331,39,The U.S. military has made progress in develop...,The U.S. military has made significant advance...


In [40]:
logging.info("Hybrid Text Summarization Completed ")