In [2]:
import pandas as pd
import numpy as np
import nltk
from nltk.tokenize import sent_tokenize

nltk.download("punkt")

import os
from openai import OpenAI,AzureOpenAI

from summarizer import Summarizer

from datasets import load_dataset, load_from_disk, load_metric
from transformers import pipeline, set_seed,AutoModelForSeq2SeqLM, AutoTokenizer, BertTokenizer,EncoderDecoderModel
import logging
import time
import torch

import warnings
warnings.filterwarnings("ignore")

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\Dell\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!





In [3]:
logging.basicConfig(
    format='%(asctime)s %(levelname)-8s %(message)s',
    datefmt='%Y-%m-%d %H:%M:%S',
    level=logging.INFO, 
    filename='./logs/Extractive Text Summary Generation.log')    

In [4]:
logging.info("==========================================================================================================")
logging.info("Extractive Text Summarization Start ")

In [5]:
NO_OF_TEST_RECORDS = 100
logging.info(f"No of test records - {NO_OF_TEST_RECORDS}")

In [6]:
start_time_ets=time.time()

# Load sample records from test data
sample_test_df = pd.read_csv('./input/test_cleaned.csv', nrows=NO_OF_TEST_RECORDS)
sample_test_df.head(2)

Unnamed: 0.1,Unnamed: 0,id,highlights,article
0,0,92c514c913c0bdfe25341af9fd72b29db544099b,Experts question if packed out planes are put...,Ever noticed how plane seats appear to be gett...
1,1,2003841c7dc0e7c5b1a248f9cd536d727f27a45a,Drunk teenage boy climbed into lion enclosure ...,A drunk teenage boy had to be rescued by secur...


In [7]:
summaries = {}

In [8]:
duration={}

# Zero Shot Extractive Summarization

#### Baseline Model (Lead 5)

In [9]:
def generate_ext_baseline(text):
    return "\n".join(sent_tokenize(text)[:5]) # First 5 sentences

In [10]:
def roundTS(startTime, endTime):
    return round((endTime -startTime),4)

def avgTimePerRecord(startTime, endTime, no_of_recs):
     return round((endTime -startTime)/no_of_recs ,4)

In [11]:
logging.info("Generating Baseline Extractive Summaries...")
st_baseline_ets=time.time()
sample_test_df['baseline-ext'] = sample_test_df['article'].apply(generate_ext_baseline)
end_baseline_ets=time.time()
logging.info(f"Baseline ETS Duration - {roundTS(st_baseline_ets, end_baseline_ets)} seconds")    
sample_test_df['baseline-ext'][:2]

0    Ever noticed how plane seats appear to be gett...
1    A drunk teenage boy had to be rescued by secur...
Name: baseline-ext, dtype: object

In [12]:
duration['Baseline'] = avgTimePerRecord(st_baseline_ets, end_baseline_ets, NO_OF_TEST_RECORDS)

In [13]:
print("Article:\n",sample_test_df.iloc[0]['article'][:512])
print("Summary:\n",sample_test_df.iloc[0]['baseline-ext'])

Article:
 Ever noticed how plane seats appear to be getting smaller and smaller With increasing numbers of people taking to the skies some experts are questioning if having such packed out planes is putting passengers at risk. They say that the shrinking space on aeroplanes is not only uncomfortable it s putting our health and safety in danger. More than squabbling over the arm rest shrinking space on planes putting our health and safety in danger This week a U.S consumer advisory group set up by the Department of Tr
Summary:
 Ever noticed how plane seats appear to be getting smaller and smaller With increasing numbers of people taking to the skies some experts are questioning if having such packed out planes is putting passengers at risk.
They say that the shrinking space on aeroplanes is not only uncomfortable it s putting our health and safety in danger.
More than squabbling over the arm rest shrinking space on planes putting our health and safety in danger This week a U.S consumer 

#### BERT Extractive Summarization

In [14]:
model = Summarizer()
def generate_ext_bert_zs(text):
    result = model(text, min_length=40)
    return "".join(result)

logging.info("Generating BERT Extractive Summaries (Before Fine Tuning) ...")    
st_bert_zs_ets=time.time()
sample_test_df['bert-base-ext-zs'] = sample_test_df['article'].apply(generate_ext_bert_zs)
end_bert_zs_ets=time.time()
logging.info(f"BERT ZS ETS Duration - {roundTS(st_bert_zs_ets,end_bert_zs_ets)} seconds")    
sample_test_df.head(3)

Unnamed: 0.1,Unnamed: 0,id,highlights,article,baseline-ext,bert-base-ext-zs
0,0,92c514c913c0bdfe25341af9fd72b29db544099b,Experts question if packed out planes are put...,Ever noticed how plane seats appear to be gett...,Ever noticed how plane seats appear to be gett...,Ever noticed how plane seats appear to be gett...
1,1,2003841c7dc0e7c5b1a248f9cd536d727f27a45a,Drunk teenage boy climbed into lion enclosure ...,A drunk teenage boy had to be rescued by secur...,A drunk teenage boy had to be rescued by secur...,A drunk teenage boy had to be rescued by secur...
2,2,91b7d2311527f5c2b63a65ca98d21d9c92485149,Nottingham Forest are close to extending Dougi...,Dougie Freedman is on the verge of agreeing a ...,Dougie Freedman is on the verge of agreeing a ...,Dougie Freedman is on the verge of agreeing a ...


In [15]:
duration['BERT'] = avgTimePerRecord(st_bert_zs_ets, end_bert_zs_ets, NO_OF_TEST_RECORDS)

In [16]:
print("Article\n",sample_test_df.iloc[0]['article'])
print("Summary\n",sample_test_df.iloc[0]['bert-base-ext-zs'])

Article
 Ever noticed how plane seats appear to be getting smaller and smaller With increasing numbers of people taking to the skies some experts are questioning if having such packed out planes is putting passengers at risk. They say that the shrinking space on aeroplanes is not only uncomfortable it s putting our health and safety in danger. More than squabbling over the arm rest shrinking space on planes putting our health and safety in danger This week a U.S consumer advisory group set up by the Department of Transportation said at a public hearing that while the government is happy to set standards for animals flying on planes it does not stipulate a minimum amount of space for humans. In a world where animals have more rights to space and food than humans said Charlie Leocha consumer representative on the committee. It is time that the DOT and FAA take a stand for humane treatment of passengers. But could crowding on planes lead to more serious issues than fighting for space in t

#### T5 Extractive Summarization

In [17]:
def generate_ext_t5_small_zs(text):
    pipe = pipeline('summarization', model = 't5-small', min_length=40)
    pipe_out = pipe(text)
    return 'n'.join(sent_tokenize(pipe_out[0]['summary_text']))

In [18]:
logging.info("Generating T5 Extractive Summaries ...")    
st_t5_zs_ets=time.time()
sample_test_df['t5-small-ext-zs'] = sample_test_df['article'].apply(generate_ext_t5_small_zs)
end_t5_zs_ets=time.time()
logging.info(f"T5 ZS ETS Duration - {roundTS(st_t5_zs_ets, end_t5_zs_ets) } seconds")    
sample_test_df.head()


Your max_length is set to 200, but your input_length is only 162. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=81)
Token indices sequence length is longer than the specified maximum sequence length for this model (970 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (569 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (1120 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (1125 > 512). Running this sequence through the model will result in indexing errors
Token indices sequen

Token indices sequence length is longer than the specified maximum sequence length for this model (1750 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (835 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (2034 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (527 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (990 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for t

Unnamed: 0.1,Unnamed: 0,id,highlights,article,baseline-ext,bert-base-ext-zs,t5-small-ext-zs
0,0,92c514c913c0bdfe25341af9fd72b29db544099b,Experts question if packed out planes are put...,Ever noticed how plane seats appear to be gett...,Ever noticed how plane seats appear to be gett...,Ever noticed how plane seats appear to be gett...,some experts are questioning if shrinking spac...
1,1,2003841c7dc0e7c5b1a248f9cd536d727f27a45a,Drunk teenage boy climbed into lion enclosure ...,A drunk teenage boy had to be rescued by secur...,A drunk teenage boy had to be rescued by secur...,A drunk teenage boy had to be rescued by secur...,Rahul Kumar 17 climbed into the enclosure fenc...
2,2,91b7d2311527f5c2b63a65ca98d21d9c92485149,Nottingham Forest are close to extending Dougi...,Dougie Freedman is on the verge of agreeing a ...,Dougie Freedman is on the verge of agreeing a ...,Dougie Freedman is on the verge of agreeing a ...,Dougie Freedman is set to sign a new two year ...
3,3,caabf9cbdf96eb1410295a673e953d304391bfbb,Fiorentina goalkeeper Neto has been linked wit...,Liverpool target Neto is also wanted by PSG an...,Liverpool target Neto is also wanted by PSG an...,Liverpool target Neto is also wanted by PSG an...,neto is wanted by a number of top european clu...
4,4,3da746a7d9afcaa659088c8366ef6347fe6b53ea,"Tell-all interview with the reality TV star, 6...",Bruce Jenner will break his silence in a two h...,Bruce Jenner will break his silence in a two h...,Bruce Jenner will break his silence in a two h...,the former Olympian will speak in a two hour i...


In [19]:
duration['T5'] = avgTimePerRecord(st_t5_zs_ets, end_t5_zs_ets, NO_OF_TEST_RECORDS)

In [20]:
print("Article\n", sample_test_df.iloc[0]['article'])
print("Summary\n", sample_test_df.iloc[0]['t5-small-ext-zs'])

Article
 Ever noticed how plane seats appear to be getting smaller and smaller With increasing numbers of people taking to the skies some experts are questioning if having such packed out planes is putting passengers at risk. They say that the shrinking space on aeroplanes is not only uncomfortable it s putting our health and safety in danger. More than squabbling over the arm rest shrinking space on planes putting our health and safety in danger This week a U.S consumer advisory group set up by the Department of Transportation said at a public hearing that while the government is happy to set standards for animals flying on planes it does not stipulate a minimum amount of space for humans. In a world where animals have more rights to space and food than humans said Charlie Leocha consumer representative on the committee. It is time that the DOT and FAA take a stand for humane treatment of passengers. But could crowding on planes lead to more serious issues than fighting for space in t

#### GPT 3.5 Extractive Summarization

In [21]:
client = OpenAI(
    api_key='[API KEY]',
)
AZURE_OPENAI_KEY = "[API KEY]"
AZURE_OPENAI_ENDPOINT = "https://tmap-openai.openai.azure.com"

In [22]:
client = AzureOpenAI(
  azure_endpoint = "https://tmap-openai.openai.azure.com/", 
  api_key="[API KEY]",  
  api_version="2023-09-15-preview")

def generate_ext_gpt_35_zs(text):
    GPT35_ETS_PMT = "Generate extractive summary from the original text : " + text
    summary = ""
    try:
        response = client.chat.completions.create(
            model="gpt35", 
            messages=[
                {"role": "system", "content": "You are a LLM trained by OpenAI."},
                {"role": "user", "content": GPT35_ETS_PMT}
            ]
        )
        summary = response.choices[0].message.content
    except Exception as e:
        logging.info(f"Exception occured - {e}")
        summary = "ERROR"
    return summary


In [23]:
logging.info("Generating GPT3.5 Extractive Summaries (Before Prompt Tuning) ...") 
st_gpt35_zs_ets=time.time()
sample_test_df['gpt-3.5-ext-zs'] = sample_test_df['article'].apply(generate_ext_gpt_35_zs)
end_gpt35_zs_ets=time.time()
logging.info(f"GPT35 ZS ETS Duration - {roundTS(st_gpt35_zs_ets, end_gpt35_zs_ets)} seconds")    
sample_test_df.head(3)

Unnamed: 0.1,Unnamed: 0,id,highlights,article,baseline-ext,bert-base-ext-zs,t5-small-ext-zs,gpt-3.5-ext-zs
0,0,92c514c913c0bdfe25341af9fd72b29db544099b,Experts question if packed out planes are put...,Ever noticed how plane seats appear to be gett...,Ever noticed how plane seats appear to be gett...,Ever noticed how plane seats appear to be gett...,some experts are questioning if shrinking spac...,Experts are concerned that the shrinking space...
1,1,2003841c7dc0e7c5b1a248f9cd536d727f27a45a,Drunk teenage boy climbed into lion enclosure ...,A drunk teenage boy had to be rescued by secur...,A drunk teenage boy had to be rescued by secur...,A drunk teenage boy had to be rescued by secur...,Rahul Kumar 17 climbed into the enclosure fenc...,
2,2,91b7d2311527f5c2b63a65ca98d21d9c92485149,Nottingham Forest are close to extending Dougi...,Dougie Freedman is on the verge of agreeing a ...,Dougie Freedman is on the verge of agreeing a ...,Dougie Freedman is on the verge of agreeing a ...,Dougie Freedman is set to sign a new two year ...,Dougie Freedman is close to agreeing to a new ...


In [24]:
duration['GPT35'] = avgTimePerRecord(st_gpt35_zs_ets, end_gpt35_zs_ets, NO_OF_TEST_RECORDS)

In [25]:
print("Article\n",sample_test_df.iloc[0]['article'])
print("Summary\n",sample_test_df.iloc[0]['gpt-3.5-ext-zs'])

Article
 Ever noticed how plane seats appear to be getting smaller and smaller With increasing numbers of people taking to the skies some experts are questioning if having such packed out planes is putting passengers at risk. They say that the shrinking space on aeroplanes is not only uncomfortable it s putting our health and safety in danger. More than squabbling over the arm rest shrinking space on planes putting our health and safety in danger This week a U.S consumer advisory group set up by the Department of Transportation said at a public hearing that while the government is happy to set standards for animals flying on planes it does not stipulate a minimum amount of space for humans. In a world where animals have more rights to space and food than humans said Charlie Leocha consumer representative on the committee. It is time that the DOT and FAA take a stand for humane treatment of passengers. But could crowding on planes lead to more serious issues than fighting for space in t

#### GPT 4 Extractive Summarization

In [26]:
client = AzureOpenAI(
  azure_endpoint = "https://tmap-openai.openai.azure.com/", 
  api_key="[API KEY]",  
  api_version="2023-09-15-preview")

In [27]:
def generate_ext_gpt_4_zs(text):
    GPT40_ETS_PMT = "create a concise summary by selecting and combining key sentences from the original text : " + text
    summary = ""
    try:
        response = client.chat.completions.create(
            model="gpt4", 
            messages=[
                {"role": "system", "content": "You are a LLM trained by OpenAI."},
                {"role": "user", "content": GPT40_ETS_PMT}

            ]
        )
        summary = response.choices[0].message.content
    except Exception as e:
        logging.info(f"Exception Occured - {e}")
        summary = "ERROR"
        
    return summary

In [28]:
logging.info("Generating GPT4 Extractive Summaries (Before Prompt Tuning) ...") 
st_gpt4_zs_ets=time.time()
sample_test_df['gpt-4-ext-zs'] = sample_test_df['article'].apply(generate_ext_gpt_4_zs)
end_gpt4_zs_ets=time.time()
logging.info(f"GPT4 ZS ETS Duration - {roundTS(st_gpt4_zs_ets, end_gpt4_zs_ets)} seconds")    
sample_test_df.head(3)

Unnamed: 0.1,Unnamed: 0,id,highlights,article,baseline-ext,bert-base-ext-zs,t5-small-ext-zs,gpt-3.5-ext-zs,gpt-4-ext-zs
0,0,92c514c913c0bdfe25341af9fd72b29db544099b,Experts question if packed out planes are put...,Ever noticed how plane seats appear to be gett...,Ever noticed how plane seats appear to be gett...,Ever noticed how plane seats appear to be gett...,some experts are questioning if shrinking spac...,Experts are concerned that the shrinking space...,Experts argue that the diminishing space on pl...
1,1,2003841c7dc0e7c5b1a248f9cd536d727f27a45a,Drunk teenage boy climbed into lion enclosure ...,A drunk teenage boy had to be rescued by secur...,A drunk teenage boy had to be rescued by secur...,A drunk teenage boy had to be rescued by secur...,Rahul Kumar 17 climbed into the enclosure fenc...,,Intoxicated 17-year-old Rahul Kumar jumped int...
2,2,91b7d2311527f5c2b63a65ca98d21d9c92485149,Nottingham Forest are close to extending Dougi...,Dougie Freedman is on the verge of agreeing a ...,Dougie Freedman is on the verge of agreeing a ...,Dougie Freedman is on the verge of agreeing a ...,Dougie Freedman is set to sign a new two year ...,Dougie Freedman is close to agreeing to a new ...,Dougie Freedman is poised to sign a new two-ye...


In [29]:
end_gpt4_zs_ets, st_gpt4_zs_ets

(1715079923.429398, 1715078412.6961112)

In [30]:
duration['GPT4'] = avgTimePerRecord(st_gpt4_zs_ets, end_gpt4_zs_ets, NO_OF_TEST_RECORDS)

In [31]:
print("Article\n",sample_test_df.iloc[0]['article'])
print("Summary\n",sample_test_df.iloc[0]['gpt-4-ext-zs'])

Article
 Ever noticed how plane seats appear to be getting smaller and smaller With increasing numbers of people taking to the skies some experts are questioning if having such packed out planes is putting passengers at risk. They say that the shrinking space on aeroplanes is not only uncomfortable it s putting our health and safety in danger. More than squabbling over the arm rest shrinking space on planes putting our health and safety in danger This week a U.S consumer advisory group set up by the Department of Transportation said at a public hearing that while the government is happy to set standards for animals flying on planes it does not stipulate a minimum amount of space for humans. In a world where animals have more rights to space and food than humans said Charlie Leocha consumer representative on the committee. It is time that the DOT and FAA take a stand for humane treatment of passengers. But could crowding on planes lead to more serious issues than fighting for space in t

# Fine Tuned - Extractractive Text Summarization

###### Prerequisite - Fine Tuned BERT, T5 Pretrained Models 

#### BERT Extractive Summarization

In [32]:
sample_test_df

Unnamed: 0.1,Unnamed: 0,id,highlights,article,baseline-ext,bert-base-ext-zs,t5-small-ext-zs,gpt-3.5-ext-zs,gpt-4-ext-zs
0,0,92c514c913c0bdfe25341af9fd72b29db544099b,Experts question if packed out planes are put...,Ever noticed how plane seats appear to be gett...,Ever noticed how plane seats appear to be gett...,Ever noticed how plane seats appear to be gett...,some experts are questioning if shrinking spac...,Experts are concerned that the shrinking space...,Experts argue that the diminishing space on pl...
1,1,2003841c7dc0e7c5b1a248f9cd536d727f27a45a,Drunk teenage boy climbed into lion enclosure ...,A drunk teenage boy had to be rescued by secur...,A drunk teenage boy had to be rescued by secur...,A drunk teenage boy had to be rescued by secur...,Rahul Kumar 17 climbed into the enclosure fenc...,,Intoxicated 17-year-old Rahul Kumar jumped int...
2,2,91b7d2311527f5c2b63a65ca98d21d9c92485149,Nottingham Forest are close to extending Dougi...,Dougie Freedman is on the verge of agreeing a ...,Dougie Freedman is on the verge of agreeing a ...,Dougie Freedman is on the verge of agreeing a ...,Dougie Freedman is set to sign a new two year ...,Dougie Freedman is close to agreeing to a new ...,Dougie Freedman is poised to sign a new two-ye...
3,3,caabf9cbdf96eb1410295a673e953d304391bfbb,Fiorentina goalkeeper Neto has been linked wit...,Liverpool target Neto is also wanted by PSG an...,Liverpool target Neto is also wanted by PSG an...,Liverpool target Neto is also wanted by PSG an...,neto is wanted by a number of top european clu...,"Fiorentina goalkeeper Neto, who has been linke...","Liverpool are interested in signing Neto, the ..."
4,4,3da746a7d9afcaa659088c8366ef6347fe6b53ea,"Tell-all interview with the reality TV star, 6...",Bruce Jenner will break his silence in a two h...,Bruce Jenner will break his silence in a two h...,Bruce Jenner will break his silence in a two h...,the former Olympian will speak in a two hour i...,"Former Olympian and reality star, Bruce Jenner...",Bruce Jenner will discuss his life and recent ...
...,...,...,...,...,...,...,...,...,...
95,95,64ee7c9eb9f1efbb7da0ce80498434c623615b84,Zlatan Ibrahimovic will line up against former...,As Zlatan Ibrahimovic famously believes the Wo...,As Zlatan Ibrahimovic famously believes the Wo...,As Zlatan Ibrahimovic famously believes the Wo...,Zlatan Ibrahimovic will take centre stage agai...,"Zlatan Ibrahimovic, the main attraction for PS...","Zlatan Ibrahimovic, renowned for his confidenc..."
96,96,5cf4682cd03238d5867027ce9492b626cd1ed011,"Jameela Jamil, 29, is convinced dental work tr...",Jameela spent GBP3 000 on having all her amalg...,Jameela spent GBP3 000 on having all her amalg...,Jameela spent GBP3 000 on having all her amalg...,dental amalgam has been used for more than 150...,"Jameela Jamil spent GBP3,000 on removing and r...","Jameela Jamil, a former model and television p..."
97,97,3815d19af18ff22be6ad6095722d7367bb7271af,"Christopher Bridger, 25, attacked three women ...",A paramedic who pretended he was gay to get cl...,A paramedic who pretended he was gay to get cl...,A paramedic who pretended he was gay to get cl...,"Christopher Bridger 25, from Stevenage Hertfor...","Christopher Bridger, a former paramedic who wa...","Christopher Bridger, a 25-year-old former para..."
98,98,fb207604ffa7e8371c622840445825db8993d4d2,Paris Saint-Germain captain Thiago Silva suffe...,Paris Saint Germain face Nice on Saturday hopi...,Paris Saint Germain face Nice on Saturday hopi...,Paris Saint Germain face Nice on Saturday hopi...,Thiago Silva is recovering at home from a thig...,Paris Saint Germain will face Nice without sev...,Paris Saint Germain (PSG) aims to take the top...


In [33]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [34]:
bert_tokenizer = BertTokenizer.from_pretrained("./models/bert-base-cnn-finetuned")
bert_model_ft = EncoderDecoderModel.from_pretrained("./models/bert-base-cnn-finetuned")
bert_model_ft.to(device)


def generate_ext_bert_ft(text):
    # cut off at BERT max length 512
    inputs = bert_tokenizer([text], padding="max_length", truncation=True, max_length=512, return_tensors="pt")
    input_ids = inputs.input_ids.to(device)
    attention_mask = inputs.attention_mask.to(device)
    output = bert_model_ft.generate(input_ids, attention_mask=attention_mask)
    return bert_tokenizer.decode(output[0], skip_special_tokens=True)

logging.info("Generating BERT Extractive Summaries (After Fine Tuning) ...")
st_bert_ft_ets=time.time()
sample_test_df['bert-base-ext-ft'] = sample_test_df['article'].apply(generate_ext_bert_ft)
end_bert_ft_ets=time.time()
logging.info(f"BERT FT ETS Duration - {roundTS(st_bert_ft_ets, end_bert_ft_ets)} seconds")    
sample_test_df.head(3)

Unnamed: 0.1,Unnamed: 0,id,highlights,article,baseline-ext,bert-base-ext-zs,t5-small-ext-zs,gpt-3.5-ext-zs,gpt-4-ext-zs,bert-base-ext-ft
0,0,92c514c913c0bdfe25341af9fd72b29db544099b,Experts question if packed out planes are put...,Ever noticed how plane seats appear to be gett...,Ever noticed how plane seats appear to be gett...,Ever noticed how plane seats appear to be gett...,some experts are questioning if shrinking spac...,Experts are concerned that the shrinking space...,Experts argue that the diminishing space on pl...,the dot and faa are happy to set standards for...
1,1,2003841c7dc0e7c5b1a248f9cd536d727f27a45a,Drunk teenage boy climbed into lion enclosure ...,A drunk teenage boy had to be rescued by secur...,A drunk teenage boy had to be rescued by secur...,A drunk teenage boy had to be rescued by secur...,Rahul Kumar 17 climbed into the enclosure fenc...,,Intoxicated 17-year-old Rahul Kumar jumped int...,rahul kumar 17 climbed into the enclosure fenc...
2,2,91b7d2311527f5c2b63a65ca98d21d9c92485149,Nottingham Forest are close to extending Dougi...,Dougie Freedman is on the verge of agreeing a ...,Dougie Freedman is on the verge of agreeing a ...,Dougie Freedman is on the verge of agreeing a ...,Dougie Freedman is set to sign a new two year ...,Dougie Freedman is close to agreeing to a new ...,Dougie Freedman is poised to sign a new two-ye...,dougie freedman is set to sign a new two year ...


In [35]:
duration['BERT-FT'] = avgTimePerRecord(st_bert_ft_ets, end_bert_ft_ets,NO_OF_TEST_RECORDS)

#### T5 Extractive Summarization

In [36]:
print("T5 Doesnot support Extractive Summarization !")

T5 Doesnot support Extractive Summarization !


#### GPT 3.5 Extractive Summarization

In [37]:
# Computation of Length of article and hughlights
def article_len(row):
    return len(row['article'].split())

def highlights_len(row):
    return len(row['highlights'].split())


sample_test_df['article_len'] = sample_test_df.apply(lambda r: article_len(r), axis= 1)
sample_test_df['highlights_len'] = sample_test_df.apply(lambda r: highlights_len(r), axis= 1)

In [38]:
sorted_by_article_size_df = sample_test_df.sort_values('article_len')

In [39]:
def generate_GPT_ETS_1_Shot(record, model, temp, max_tokens):
    summary = ""
    ext_pmt = f"""
                Your task is to Generate extractive summary from the original text, 
                
                by identifying 4-5 key sentences from the original text and 
                
                DO NOT rewrite or paraphrase the content.
                
                Text is delimited by triple backticks. 

                TEXT: ```{record['article']}```
            """
    
    #create a concise summary by selecting and combining key sentences from the original text : {text}"
    try:
        response = client.chat.completions.create(
        model=model, # model = "deployment_name".
        messages=[
            {"role": "system", "content": "You are a LLM trained by OpenAI."},
            {"role":"user","content":sorted_by_article_size_df.iloc[0]['article']},
            {"role":"assistant","content":sorted_by_article_size_df.iloc[0]['highlights']},
            {"role": "user", "content": ext_pmt},
            ],
        max_tokens = max_tokens,
        temperature = temp, 
        n=1,
        )
        summary = response.choices[0].message.content
    except Exception as e:
        logging.info(f"Exception Occured - {e}")
        summary = "ERROR"
    
    return summary

In [40]:
logging.info("Generating GPT35 Extractive Summaries (After Prompt Tuning) ...") 
st_gpt35_ft_ets=time.time()
sample_test_df['gpt35-ext-ft'] = sample_test_df.apply(lambda rec: generate_GPT_ETS_1_Shot(rec, 'gpt35', 0, 60), axis=1)
end_gpt35_ft_ets=time.time()
logging.info(f"GPT35 FT ETS Duration - {roundTS(st_gpt35_ft_ets, end_gpt35_ft_ets)} seconds")    
sample_test_df.head(3)

Unnamed: 0.1,Unnamed: 0,id,highlights,article,baseline-ext,bert-base-ext-zs,t5-small-ext-zs,gpt-3.5-ext-zs,gpt-4-ext-zs,bert-base-ext-ft,article_len,highlights_len,gpt35-ext-ft
0,0,92c514c913c0bdfe25341af9fd72b29db544099b,Experts question if packed out planes are put...,Ever noticed how plane seats appear to be gett...,Ever noticed how plane seats appear to be gett...,Ever noticed how plane seats appear to be gett...,some experts are questioning if shrinking spac...,Experts are concerned that the shrinking space...,Experts argue that the diminishing space on pl...,the dot and faa are happy to set standards for...,374,36,Experts are questioning if having such packed ...
1,1,2003841c7dc0e7c5b1a248f9cd536d727f27a45a,Drunk teenage boy climbed into lion enclosure ...,A drunk teenage boy had to be rescued by secur...,A drunk teenage boy had to be rescued by secur...,A drunk teenage boy had to be rescued by secur...,Rahul Kumar 17 climbed into the enclosure fenc...,,Intoxicated 17-year-old Rahul Kumar jumped int...,rahul kumar 17 climbed into the enclosure fenc...,317,38,
2,2,91b7d2311527f5c2b63a65ca98d21d9c92485149,Nottingham Forest are close to extending Dougi...,Dougie Freedman is on the verge of agreeing a ...,Dougie Freedman is on the verge of agreeing a ...,Dougie Freedman is on the verge of agreeing a ...,Dougie Freedman is set to sign a new two year ...,Dougie Freedman is close to agreeing to a new ...,Dougie Freedman is poised to sign a new two-ye...,dougie freedman is set to sign a new two year ...,114,35,"Nottingham Forest's manager, Dougie Freedman, ..."


In [41]:
duration['GPT35-FT'] = avgTimePerRecord(st_gpt35_ft_ets, end_gpt35_ft_ets,NO_OF_TEST_RECORDS)

#### GPT 4 Extractive Summarization

In [42]:
logging.info("Generating GPT4 Extractive Summaries (After Prompt Tuning) ...") 
st_gpt4_ft_ets=time.time()
sample_test_df['gpt40-ext-ft'] = sample_test_df.apply(lambda rec: generate_GPT_ETS_1_Shot(rec, 'gpt4', 0, 60), axis=1)
end_gpt4_ft_ets=time.time()
logging.info(f"GPT4 FT ETS Duration - {roundTS(st_gpt4_ft_ets, end_gpt4_ft_ets)} seconds")   


In [43]:
duration['GPT4-FT'] = avgTimePerRecord(st_gpt4_ft_ets, end_gpt4_ft_ets,NO_OF_TEST_RECORDS)

In [44]:
sample_test_df=sample_test_df.drop(['article_len', 'highlights_len'], axis=1)

#### Saving the generated summaries for evaluation

In [45]:
end_time_ets=time.time()
logging.info(f"Total ETS Duration - {roundTS(start_time_ets, end_time_ets)} seconds")

In [46]:
duration

{'Baseline': 0.0004,
 'BERT': 1.1654,
 'T5': 2.3829,
 'GPT35': 2.2201,
 'GPT4': 15.1073,
 'BERT-FT': 0.4883,
 'GPT35-FT': 1.14,
 'GPT4-FT': 5.6538}

In [47]:
duration_df = pd.DataFrame(duration.items(), columns=['models', 'avg_inf_time'])
duration_df

Unnamed: 0,models,avg_inf_time
0,Baseline,0.0004
1,BERT,1.1654
2,T5,2.3829
3,GPT35,2.2201
4,GPT4,15.1073
5,BERT-FT,0.4883
6,GPT35-FT,1.14
7,GPT4-FT,5.6538


In [48]:
logging.info(f"MAP - Average Inference Time Per Request - {duration}")

In [49]:
file_path = './output/ext-ts-final.csv'
logging.info(f"Summaries will be saved in this file : {file_path}")
sample_test_df.to_csv(file_path,  mode="w+")

In [50]:
duration_file_path = './output/ext-ts-duration-final.csv'
logging.info(f"Duration will be saved in this file : {duration_file_path}")
duration_df.to_csv(duration_file_path,  mode="w+")

In [51]:
sample_test_df.head(3)

Unnamed: 0.1,Unnamed: 0,id,highlights,article,baseline-ext,bert-base-ext-zs,t5-small-ext-zs,gpt-3.5-ext-zs,gpt-4-ext-zs,bert-base-ext-ft,gpt35-ext-ft,gpt40-ext-ft
0,0,92c514c913c0bdfe25341af9fd72b29db544099b,Experts question if packed out planes are put...,Ever noticed how plane seats appear to be gett...,Ever noticed how plane seats appear to be gett...,Ever noticed how plane seats appear to be gett...,some experts are questioning if shrinking spac...,Experts are concerned that the shrinking space...,Experts argue that the diminishing space on pl...,the dot and faa are happy to set standards for...,Experts are questioning if having such packed ...,```\nEver noticed how plane seats appear to be...
1,1,2003841c7dc0e7c5b1a248f9cd536d727f27a45a,Drunk teenage boy climbed into lion enclosure ...,A drunk teenage boy had to be rescued by secur...,A drunk teenage boy had to be rescued by secur...,A drunk teenage boy had to be rescued by secur...,Rahul Kumar 17 climbed into the enclosure fenc...,,Intoxicated 17-year-old Rahul Kumar jumped int...,rahul kumar 17 climbed into the enclosure fenc...,,
2,2,91b7d2311527f5c2b63a65ca98d21d9c92485149,Nottingham Forest are close to extending Dougi...,Dougie Freedman is on the verge of agreeing a ...,Dougie Freedman is on the verge of agreeing a ...,Dougie Freedman is on the verge of agreeing a ...,Dougie Freedman is set to sign a new two year ...,Dougie Freedman is close to agreeing to a new ...,Dougie Freedman is poised to sign a new two-ye...,dougie freedman is set to sign a new two year ...,"Nottingham Forest's manager, Dougie Freedman, ...",```\nDougie Freedman is on the verge of agreei...


In [52]:
logging.info("Extractive Text Summarization End ")
read_content = pd.read_csv(file_path)
read_content.head(3)

Unnamed: 0.2,Unnamed: 0.1,Unnamed: 0,id,highlights,article,baseline-ext,bert-base-ext-zs,t5-small-ext-zs,gpt-3.5-ext-zs,gpt-4-ext-zs,bert-base-ext-ft,gpt35-ext-ft,gpt40-ext-ft
0,0,0,92c514c913c0bdfe25341af9fd72b29db544099b,Experts question if packed out planes are put...,Ever noticed how plane seats appear to be gett...,Ever noticed how plane seats appear to be gett...,Ever noticed how plane seats appear to be gett...,some experts are questioning if shrinking spac...,Experts are concerned that the shrinking space...,Experts argue that the diminishing space on pl...,the dot and faa are happy to set standards for...,Experts are questioning if having such packed ...,```\nEver noticed how plane seats appear to be...
1,1,1,2003841c7dc0e7c5b1a248f9cd536d727f27a45a,Drunk teenage boy climbed into lion enclosure ...,A drunk teenage boy had to be rescued by secur...,A drunk teenage boy had to be rescued by secur...,A drunk teenage boy had to be rescued by secur...,Rahul Kumar 17 climbed into the enclosure fenc...,,Intoxicated 17-year-old Rahul Kumar jumped int...,rahul kumar 17 climbed into the enclosure fenc...,,
2,2,2,91b7d2311527f5c2b63a65ca98d21d9c92485149,Nottingham Forest are close to extending Dougi...,Dougie Freedman is on the verge of agreeing a ...,Dougie Freedman is on the verge of agreeing a ...,Dougie Freedman is on the verge of agreeing a ...,Dougie Freedman is set to sign a new two year ...,Dougie Freedman is close to agreeing to a new ...,Dougie Freedman is poised to sign a new two-ye...,dougie freedman is set to sign a new two year ...,"Nottingham Forest's manager, Dougie Freedman, ...",```\nDougie Freedman is on the verge of agreei...
