In [1]:
import re
import spacy
from spacy.lang.en.stop_words import STOP_WORDS
from nltk.stem.snowball import SnowballStemmer
import spacy
from spacy.lang.en.stop_words import STOP_WORDS
from nltk.stem.snowball import SnowballStemmer
from nltk.corpus import stopwords
from nltk.tokenize import sent_tokenize, word_tokenize
from nltk.stem import WordNetLemmatizer, PorterStemmer
from tqdm import tqdm


def clean_text(text):
    # Convert text to lowercase
    text = text.lower()

    # Remove special characters and numbers
    text = re.sub('[^a-zA-Z. ]', ' ', text)


    # Remove extra whitespace
    text = re.sub('\s+', ' ', text).strip()

    return text

def filter_sentences(article, remove_stopwords=True, lemmatize=True, stem=True):
    # Clean the article
    article = clean_text(article)

    # Tokenize the article into sentences
    sentences = sent_tokenize(article)

    # Create a list to store the filtered sentences
    filtered_sentences = []

    # Define functions for removing stop words, lemmatizing, and stemming
    if remove_stopwords:
        stop_words = set(stopwords.words('english'))
        remove_stopwords = lambda x: x.lower() not in stop_words
    if lemmatize:
        lemmatizer = WordNetLemmatizer().lemmatize
    if stem:
        stemmer = PorterStemmer().stem

    # Loop through each sentence and apply the filters
    for sentence in sentences:
        # Tokenize the sentence into words
        words = word_tokenize(sentence)

        # Remove stopwords if necessary
        if remove_stopwords:
            words = list(filter(remove_stopwords, words))

        # Lemmatize words if necessary
        if lemmatize:
            words = [lemmatizer(word) for word in words]

        # Stem words if necessary
        if stem:
            words = [stemmer(word) for word in words]

        # Join the filtered words back into a sentence
        filtered_sentence = ' '.join(words)

        # Add the filtered sentence to the list
        if filtered_sentence:
            filtered_sentences.append(filtered_sentence)

    return sentences, filtered_sentences


def filter_sentences_spacy(article, remove_stopwords=True, lemmatize=True, stem=True):
    # Clean the article
    article = clean_text(article)

    # Load the English language model
    nlp = spacy.load('en_core_web_sm')

    # Parse the article into sentences using Spacy
    doc = nlp(article)
    sentences = []
    sentences = list(doc.sents)
    sentences = list(map(str,sentences))#change list f spans to list of string

    # Create a list to store the filtered sentences
    filtered_sentences = []

    # Define functions for removing stop words, lemmatizing, and stemming
    if remove_stopwords:
        stop_words = STOP_WORDS
        remove_stopwords = lambda x: x not in stop_words
    if lemmatize:
        lemmatizer = lambda x: x.lemma_
    if stem:
        stemmer = SnowballStemmer('english').stem

    # Loop through each sentence and apply the filters
    for sentence in doc.sents:
        # Tokenize the sentence into words
        words = [token.text for token in sentence]

        # Remove stop words if necessary
        if remove_stopwords:
            words = list(filter(remove_stopwords, words))

        # Lemmatize words if necessary
        if lemmatize:
            words = [lemmatizer(nlp(word)[0]) for word in words]

        # Stem words if necessary
        if stem:
            words = [stemmer(word) for word in words]

        # Join the filtered words back into a sentence
        filtered_sentence = ' '.join(words)

         # Add the filtered sentence to the list
        if filtered_sentence:
            filtered_sentences.append(filtered_sentence)
        else:
            # If the filtered sentence is empty, add an empty sentence
            filtered_sentences.append('')

    return  sentences,filtered_sentences


In [2]:
import pandas as pd
from efficiency_scores import *
from preprocessing_algorithms import *
from summarization_algorithm import*

In [3]:
df1 = pd.read_csv('DUC/main_dataset/Duc_dataset_first_ref_summary.csv')
df1 = df1.rename(columns={'Original Summary': 'Original Summary_1'})
df1 = df1.reset_index(drop=True)
df2 = pd.read_csv('DUC/main_dataset/Duc_dataset_second_ref_summary.csv')
df2 = df2.rename(columns={'Original Summary': 'Original Summary_2'})
df3 = pd.read_csv('DUC/main_dataset/Duc_dataset_third_ref_summary.csv')
df3 = df3.rename(columns={'Original Summary': 'Original Summary_3'})
df4 = pd.read_csv('DUC/main_dataset/Duc_dataset_fourth_ref_summary.csv')
df4 = df4.rename(columns={'Original Summary': 'Original Summary_4'})

In [4]:
import pandas as pd

# Assume we have three dataframes: df1, df2, df3
# We want to concatenate them and keep only the second column

# Concatenate the dataframes
df_concat = pd.concat([df1, df2.iloc[:, 1], df3.iloc[:, 1], df4.iloc[:, 1]], axis=1)

# Reset the index
df_concat = df_concat.reset_index(drop=True)
# Output the result
df_concat


Unnamed: 0,Original Article,Original Summary_1,Original Summary_2,Original Summary_3,Original Summary_4
0,\r\nCambodian leader Hun Sen on Friday rejecte...,Prospects were dim for resolution of the polit...,Cambodian prime minister Hun Sen rejects deman...,Cambodia King Norodom Sihanouk praised formati...,"Cambodian elections, fraudulent according to o..."
1,\r\nHonduras braced for potential catastrophe ...,Hurricane Mitch approached Honduras on Oct. 27...,Honduras braced as category 5 Hurricane Mitch ...,"Hurricane Mitch, category 5 hurricane, brought...","A category 5 storm, Hurricane Mitch roared acr..."
2,\r\nCuban President Fidel Castro said Sunday h...,Britain caused international controversy and C...,Former Chilean dictator Augusto Pinochet has b...,Pinochet arrested in London on Oct. 16 at a Sp...,"On Oct. 16, 1998 British police arrested forme..."
3,"\r\nMUNICH, Germany (AP) _ U.S. prosecutors ha...",After the bombing of U.S. embassies in East Af...,Evidence shows Sudanese factory bombed by US i...,In the aftermath of the almost simultaneous bo...,Following the bombings of the embassies in Afr...
4,\r\nIn a critical ruling for the North America...,In a dispute over a new collective bargaining ...,The Larry Bird exception has been used to pay ...,The National Basketball Association joined the...,In a dispute between the owners and NBA player...
5,\r\nRebels attacked a village in western Ugand...,"After years of civil war, Congo in October 199...","Likely ADF rebels attack Chiondo, Uganda, kill...","Rebel groups, mostly Tutsis, but backed by Raw...","Civil strife, tribal rivalry and rebellion has..."
6,\r\nIndonesian President B.J. Habibie finds at...,Prospects for the Asia-Pacific Economic Cooper...,Philippine and Indonesian presidents may not a...,The coming Asian-Pacific Economic Cooperation ...,A gloomy economic picture is facing Pacific-ri...
7,\r\nIsrael's Cabinet announced within hours of...,"On Nov. 6, 1998 a suicide auto-bomb attack on ...","After a bombing in a Jerusalem market Fri., th...",A car bomb exploded prematurely near a busy Je...,The Wye River accord has not been implemented....
8,\r\nBruises on the face of jailed dissident An...,Malaysian Prime Minister Mahathir Mohamad rule...,Mahathir's 17-year rule saw great advances.\r\...,Newspaper photos of the bruised face of Thaila...,Malaysian PM Mahathir Mohamad fired Deputy PM ...
9,"\r\nBRUSSELS, Belgium (AP) _ U.S. special envo...","On Oct. 4, 1998 Yugoslav President Milosevic o...","Under threat of NATO attack, Milosevic orders ...",Yugoslav President Slbodan Milosevic does not ...,Yugoslavia failed to comply with a U.N. resolu...


In [5]:
def process_one_column_df_with(df,processing_function,remove_stopwords=True, lemmatize=True, stem=True):
    rows = len(df)
    sentences,processed_sentences =[],[] 
    for row in tqdm(range(rows)):
        articles_sentence , filtered_article = processing_function(df.iloc[row],remove_stopwords,lemmatize,stem)
        sentences.append(articles_sentence)
        processed_sentences.append(filtered_article)
    
    return sentences, processed_sentences

In [36]:
nltk_sentences_ttt, nltk_filtered_sentences_ttt = process_one_column_df_with(df = df_concat['Original Article'] , 
                                                      processing_function= filter_sentences,remove_stopwords=True, lemmatize=True, stem=True)
nltk_sentences_fff, nltk_filtered_sentences_fff = process_one_column_df_with(df = df_concat['Original Article'] , 
                                                      processing_function= filter_sentences,remove_stopwords=False, lemmatize=False, stem=False)
nltk_sentences_tff, nltk_filtered_sentences_tff = process_one_column_df_with(df = df_concat['Original Article'] , 
                                                      processing_function= filter_sentences,remove_stopwords=True, lemmatize=False, stem=False)
nltk_sentences_ftf, nltk_filtered_sentences_ftf = process_one_column_df_with(df = df_concat['Original Article'] , 
                                                      processing_function= filter_sentences,remove_stopwords=False, lemmatize=True, stem=False)
nltk_sentences_fft, nltk_filtered_sentences_fft = process_one_column_df_with(df = df_concat['Original Article'] , 
                                                      processing_function= filter_sentences,remove_stopwords=False, lemmatize=False, stem=True)
nltk_sentences_ttf, nltk_filtered_sentences_ttf = process_one_column_df_with(df = df_concat['Original Article'] , 
                                                      processing_function= filter_sentences,remove_stopwords=True, lemmatize=True, stem=False)
nltk_sentences_tft, nltk_filtered_sentences_tft = process_one_column_df_with(df = df_concat['Original Article'] , 
                                                      processing_function= filter_sentences,remove_stopwords=True, lemmatize=False, stem=True)
nltk_sentences_ftt, nltk_filtered_sentences_ftt = process_one_column_df_with(df = df_concat['Original Article'] , 
                                                      processing_function= filter_sentences,remove_stopwords=False, lemmatize=True, stem=True)

100%|██████████| 49/49 [00:04<00:00, 11.13it/s]
100%|██████████| 49/49 [00:01<00:00, 32.41it/s]
100%|██████████| 49/49 [00:01<00:00, 31.78it/s]
100%|██████████| 49/49 [00:02<00:00, 21.75it/s]
100%|██████████| 49/49 [00:04<00:00, 10.50it/s]
100%|██████████| 49/49 [00:02<00:00, 23.40it/s]
100%|██████████| 49/49 [00:03<00:00, 12.84it/s]
100%|██████████| 49/49 [00:05<00:00,  9.25it/s]


In [11]:
spacy_sentences_ttt, spacy_filtered_sentences_ttt = process_one_column_df_with(df = df_concat['Original Article'] , 
                                                      processing_function= filter_sentences_spacy,remove_stopwords=True, lemmatize=True, stem=True)
spacy_sentences_fff, spacy_filtered_sentences_fff = process_one_column_df_with(df = df_concat['Original Article'] , 
                                                      processing_function= filter_sentences_spacy,remove_stopwords=False, lemmatize=False, stem=False)
spacy_sentences_tff, spacy_filtered_sentences_tff = process_one_column_df_with(df = df_concat['Original Article'] , 
                                                      processing_function= filter_sentences_spacy,remove_stopwords=True, lemmatize=False, stem=False)
spacy_sentences_ftf, spacy_filtered_sentences_ftf = process_one_column_df_with(df = df_concat['Original Article'] , 
                                                      processing_function= filter_sentences_spacy,remove_stopwords=False, lemmatize=True, stem=False)
spacy_sentences_fft, spacy_filtered_sentences_fft = process_one_column_df_with(df = df_concat['Original Article'] , 
                                                      processing_function= filter_sentences_spacy,remove_stopwords=False, lemmatize=False, stem=True)
spacy_sentences_ttf, spacy_filtered_sentences_ttf = process_one_column_df_with(df = df_concat['Original Article'] , 
                                                      processing_function= filter_sentences_spacy,remove_stopwords=True, lemmatize=True, stem=False)
spacy_sentences_tft, spacy_filtered_sentences_tft = process_one_column_df_with(df = df_concat['Original Article'] , 
                                                      processing_function= filter_sentences_spacy,remove_stopwords=True, lemmatize=False, stem=True)
spacy_sentences_ftt, spacy_filtered_sentences_ftt = process_one_column_df_with(df = df_concat['Original Article'] , 
                                                      processing_function= filter_sentences_spacy,remove_stopwords=False, lemmatize=True, stem=True)

100%|██████████| 49/49 [10:22<00:00, 12.71s/it]
100%|██████████| 49/49 [01:00<00:00,  1.23s/it]
100%|██████████| 49/49 [01:00<00:00,  1.24s/it]
100%|██████████| 49/49 [17:38<00:00, 21.60s/it]
100%|██████████| 49/49 [01:02<00:00,  1.27s/it]
100%|██████████| 49/49 [10:00<00:00, 12.25s/it]
100%|██████████| 49/49 [00:59<00:00,  1.21s/it]
100%|██████████| 49/49 [18:46<00:00, 22.99s/it]


In [12]:
our_sentences_ttt, our_filtered_sentence_ttt = process_one_column_df_with(df = df_concat['Original Article'] , 
                                                      processing_function= preprocessing_text_with_spacy,remove_stopwords=True, lemmatize=True, stem=True)
our_sentences_fff, our_filtered_sentence_fff = process_one_column_df_with(df = df_concat['Original Article'] , 
                                                      processing_function= preprocessing_text_with_spacy,remove_stopwords=False, lemmatize=False, stem=False)
our_sentences_tff, our_filtered_sentence_tff = process_one_column_df_with(df = df_concat['Original Article'] , 
                                                      processing_function= preprocessing_text_with_spacy,remove_stopwords=True, lemmatize=False, stem=False)
our_sentences_ftf, our_filtered_sentence_ftf = process_one_column_df_with(df = df_concat['Original Article'] , 
                                                      processing_function= preprocessing_text_with_spacy,remove_stopwords=False, lemmatize=True, stem=False)


100%|██████████| 49/49 [03:02<00:00,  3.73s/it]
100%|██████████| 49/49 [01:22<00:00,  1.69s/it]
100%|██████████| 49/49 [01:23<00:00,  1.70s/it]
100%|██████████| 49/49 [03:17<00:00,  4.02s/it]


In [13]:
def summarize_with(list_of_filtered_articles, list_of_articles, summary_algorithm, summary_ratio=0.2):
    if not isinstance(list_of_filtered_articles[0], list):
        # If list_of_filtered_articles is not a list of lists, convert it into a list of lists
        list_of_filtered_articles = [list_of_filtered_articles]
        list_of_articles = [list_of_articles]
    
    rows = len(list_of_filtered_articles)
    summarized_text = []
    
    for row in tqdm(range(rows)):
        filtered_sentences = list_of_filtered_articles[row]
        sentences = list_of_articles[row]
        
        if not filtered_sentences:
            summarized_text.append('')
            continue
        
        # summary_size = int(summary_ratio * len(filtered_sentences))
        summary = summary_algorithm(filtered_sentences, sentences, size=summary_ratio)
        summarized_text.append(summary)
    
    summary_df = pd.DataFrame(summarized_text, columns=[f'{summary_algorithm.__name__} summary'])
    return summary_df

In [43]:
nltk_luhn_ttt = summarize_with(nltk_filtered_sentences_ttt,nltk_sentences_ttt, luhn_algorithm,5)
nltk_luhn_fff = summarize_with(nltk_filtered_sentences_fff,nltk_sentences_fff, luhn_algorithm,5)
nltk_luhn_tff = summarize_with(nltk_filtered_sentences_tff,nltk_sentences_tff, luhn_algorithm,5)
nltk_luhn_ftf = summarize_with(nltk_filtered_sentences_ftf,nltk_sentences_ftf, luhn_algorithm,5)
nltk_luhn_fft = summarize_with(nltk_filtered_sentences_fft,nltk_sentences_fft, luhn_algorithm,5)
nltk_luhn_ttf = summarize_with(nltk_filtered_sentences_ttf,nltk_sentences_ttf, luhn_algorithm,5)
nltk_luhn_tft = summarize_with(nltk_filtered_sentences_tft,nltk_sentences_tft, luhn_algorithm,5)
nltk_luhn_ftt = summarize_with(nltk_filtered_sentences_ftt,nltk_sentences_ftt, luhn_algorithm,5)

100%|██████████| 49/49 [00:00<00:00, 1531.13it/s]
100%|██████████| 49/49 [00:00<00:00, 1580.54it/s]
100%|██████████| 49/49 [00:00<00:00, 1885.01it/s]
100%|██████████| 49/49 [00:00<00:00, 1814.75it/s]
100%|██████████| 49/49 [00:00<00:00, 1750.31it/s]
100%|██████████| 49/49 [00:00<00:00, 2451.08it/s]
100%|██████████| 49/49 [00:00<00:00, 2579.26it/s]
100%|██████████| 49/49 [00:00<00:00, 1750.14it/s]


In [44]:
spacy_luhn_ttt = summarize_with(spacy_filtered_sentences_ttt,spacy_sentences_ttt,luhn_algorithm,5)
spacy_luhn_fff = summarize_with(spacy_filtered_sentences_fff,spacy_sentences_fff,luhn_algorithm,5)
spacy_luhn_tff = summarize_with(spacy_filtered_sentences_tff,spacy_sentences_tff,luhn_algorithm,5)
spacy_luhn_ftf = summarize_with(spacy_filtered_sentences_ftf,spacy_sentences_ftf,luhn_algorithm,5)
spacy_luhn_fft = summarize_with(spacy_filtered_sentences_fft,spacy_sentences_fft,luhn_algorithm,5)
spacy_luhn_ttf = summarize_with(spacy_filtered_sentences_ttf,spacy_sentences_ttf,luhn_algorithm,5)
spacy_luhn_tft = summarize_with(spacy_filtered_sentences_tft,spacy_sentences_tft,luhn_algorithm,5)
spacy_luhn_ftt = summarize_with(spacy_filtered_sentences_ftt,spacy_sentences_ftt,luhn_algorithm,5)

100%|██████████| 49/49 [00:00<00:00, 2130.46it/s]
100%|██████████| 49/49 [00:00<00:00, 1689.87it/s]
100%|██████████| 49/49 [00:00<00:00, 2450.53it/s]
100%|██████████| 49/49 [00:00<00:00, 1634.06it/s]
100%|██████████| 49/49 [00:00<00:00, 1689.53it/s]
100%|██████████| 49/49 [00:00<00:00, 2578.94it/s]
100%|██████████| 49/49 [00:00<00:00, 2333.50it/s]
100%|██████████| 49/49 [00:00<00:00, 1749.96it/s]


In [45]:
our_luhn_ttt = summarize_with(our_filtered_sentence_ttt,our_sentences_ttt, luhn_algorithm,5)
our_luhn_fff = summarize_with(our_filtered_sentence_fff,our_sentences_fff, luhn_algorithm,5)
our_luhn_tff = summarize_with(our_filtered_sentence_tff,our_sentences_tff, luhn_algorithm,5)
our_luhn_ftf = summarize_with(our_filtered_sentence_ftf,our_sentences_ftf, luhn_algorithm,5)


100%|██████████| 49/49 [00:00<00:00, 2578.68it/s]
100%|██████████| 49/49 [00:00<00:00, 1256.85it/s]
100%|██████████| 49/49 [00:00<00:00, 2330.06it/s]
100%|██████████| 49/49 [00:00<00:00, 1812.95it/s]


In [46]:
nltk_LexRank_ttt = summarize_with(nltk_filtered_sentences_ttt,nltk_sentences_ttt, LexRank_algorithm,5)
nltk_LexRank_fff = summarize_with(nltk_filtered_sentences_fff,nltk_sentences_fff, LexRank_algorithm,5)
nltk_LexRank_tff = summarize_with(nltk_filtered_sentences_tff,nltk_sentences_tff, LexRank_algorithm,5)
nltk_LexRank_ftf = summarize_with(nltk_filtered_sentences_ftf,nltk_sentences_ftf, LexRank_algorithm,5)
nltk_LexRank_fft = summarize_with(nltk_filtered_sentences_fft,nltk_sentences_fft, LexRank_algorithm,5)
nltk_LexRank_ttf = summarize_with(nltk_filtered_sentences_ttf,nltk_sentences_ttf, LexRank_algorithm,5)
nltk_LexRank_tft = summarize_with(nltk_filtered_sentences_tft,nltk_sentences_tft, LexRank_algorithm,5)
nltk_LexRank_ftt = summarize_with(nltk_filtered_sentences_ftt,nltk_sentences_ftt, LexRank_algorithm,5)

100%|██████████| 49/49 [00:07<00:00,  6.29it/s]
100%|██████████| 49/49 [00:13<00:00,  3.65it/s]
100%|██████████| 49/49 [00:07<00:00,  6.71it/s]
100%|██████████| 49/49 [00:12<00:00,  3.79it/s]
100%|██████████| 49/49 [00:12<00:00,  3.85it/s]
100%|██████████| 49/49 [00:07<00:00,  6.56it/s]
100%|██████████| 49/49 [00:07<00:00,  6.19it/s]
100%|██████████| 49/49 [00:13<00:00,  3.75it/s]


In [47]:
spacy_LexRank_ttt = summarize_with(spacy_filtered_sentences_ttt,spacy_sentences_ttt,LexRank_algorithm,5)
spacy_LexRank_fff = summarize_with(spacy_filtered_sentences_fff,spacy_sentences_fff,LexRank_algorithm,5)
spacy_LexRank_tff = summarize_with(spacy_filtered_sentences_tff,spacy_sentences_tff,LexRank_algorithm,5)
spacy_LexRank_ftf = summarize_with(spacy_filtered_sentences_ftf,spacy_sentences_ftf,LexRank_algorithm,5)
spacy_LexRank_fft = summarize_with(spacy_filtered_sentences_fft,spacy_sentences_fft,LexRank_algorithm,5)
spacy_LexRank_ttf = summarize_with(spacy_filtered_sentences_ttf,spacy_sentences_ttf,LexRank_algorithm,5)
spacy_LexRank_tft = summarize_with(spacy_filtered_sentences_tft,spacy_sentences_tft,LexRank_algorithm,5)
spacy_LexRank_ftt = summarize_with(spacy_filtered_sentences_ftt,spacy_sentences_ftt,LexRank_algorithm,5)

100%|██████████| 49/49 [00:07<00:00,  6.42it/s]
100%|██████████| 49/49 [00:13<00:00,  3.72it/s]
100%|██████████| 49/49 [00:07<00:00,  6.87it/s]
100%|██████████| 49/49 [00:13<00:00,  3.67it/s]
100%|██████████| 49/49 [00:13<00:00,  3.59it/s]
100%|██████████| 49/49 [00:07<00:00,  6.39it/s]
100%|██████████| 49/49 [00:07<00:00,  6.68it/s]
100%|██████████| 49/49 [00:13<00:00,  3.73it/s]


In [48]:
our_LexRank_ttt = summarize_with(our_filtered_sentence_ttt,our_sentences_ttt, LexRank_algorithm,5)
our_LexRank_fff = summarize_with(our_filtered_sentence_fff,our_sentences_fff, LexRank_algorithm,5)
our_LexRank_tff = summarize_with(our_filtered_sentence_tff,our_sentences_tff, LexRank_algorithm,5)
our_LexRank_ftf = summarize_with(our_filtered_sentence_ftf,our_sentences_ftf, LexRank_algorithm,5)


100%|██████████| 49/49 [00:06<00:00,  7.36it/s]
100%|██████████| 49/49 [00:12<00:00,  3.78it/s]
100%|██████████| 49/49 [00:06<00:00,  7.68it/s]
100%|██████████| 49/49 [00:13<00:00,  3.67it/s]


In [49]:
nltk_luhn_ttt_scores = rouge_scores_df(df1,nltk_luhn_ttt,summary_column_name=df1.columns[1])
nltk_luhn_fff_scores = rouge_scores_df(df1,nltk_luhn_fff,summary_column_name=df1.columns[1])
nltk_luhn_tff_scores = rouge_scores_df(df1,nltk_luhn_tff,summary_column_name=df1.columns[1])
nltk_luhn_ftf_scores = rouge_scores_df(df1,nltk_luhn_ftf,summary_column_name=df1.columns[1])
nltk_luhn_fft_scores = rouge_scores_df(df1,nltk_luhn_fft,summary_column_name=df1.columns[1])
nltk_luhn_ttf_scores = rouge_scores_df(df1,nltk_luhn_ttf,summary_column_name=df1.columns[1])
nltk_luhn_tft_scores = rouge_scores_df(df1,nltk_luhn_tft,summary_column_name=df1.columns[1])
nltk_luhn_ftt_scores = rouge_scores_df(df1,nltk_luhn_ftt,summary_column_name=df1.columns[1])

100%|██████████| 49/49 [01:03<00:00,  1.29s/it]
100%|██████████| 49/49 [01:01<00:00,  1.25s/it]
100%|██████████| 49/49 [01:01<00:00,  1.25s/it]
100%|██████████| 49/49 [01:00<00:00,  1.24s/it]
100%|██████████| 49/49 [01:00<00:00,  1.24s/it]
100%|██████████| 49/49 [01:01<00:00,  1.26s/it]
100%|██████████| 49/49 [01:02<00:00,  1.27s/it]
100%|██████████| 49/49 [01:04<00:00,  1.31s/it]


In [50]:
spacy_luhn_ttt_scores = rouge_scores_df(df1,spacy_luhn_ttt,summary_column_name=df1.columns[1])
spacy_luhn_fff_scores = rouge_scores_df(df1,spacy_luhn_fff,summary_column_name=df1.columns[1])
spacy_luhn_tff_scores = rouge_scores_df(df1,spacy_luhn_tff,summary_column_name=df1.columns[1])
spacy_luhn_ftf_scores = rouge_scores_df(df1,spacy_luhn_ftf,summary_column_name=df1.columns[1])
spacy_luhn_fft_scores = rouge_scores_df(df1,spacy_luhn_fft,summary_column_name=df1.columns[1])
spacy_luhn_ttf_scores = rouge_scores_df(df1,spacy_luhn_ttf,summary_column_name=df1.columns[1])
spacy_luhn_tft_scores = rouge_scores_df(df1,spacy_luhn_tft,summary_column_name=df1.columns[1])
spacy_luhn_ftt_scores = rouge_scores_df(df1,spacy_luhn_ftt,summary_column_name=df1.columns[1])

100%|██████████| 49/49 [01:02<00:00,  1.28s/it]
100%|██████████| 49/49 [01:01<00:00,  1.26s/it]
100%|██████████| 49/49 [01:03<00:00,  1.29s/it]
100%|██████████| 49/49 [01:01<00:00,  1.26s/it]
100%|██████████| 49/49 [01:00<00:00,  1.24s/it]
100%|██████████| 49/49 [01:01<00:00,  1.26s/it]
100%|██████████| 49/49 [01:01<00:00,  1.25s/it]
100%|██████████| 49/49 [01:02<00:00,  1.27s/it]


In [51]:
our_luhn_ttt_scores = rouge_scores_df(df1,our_luhn_ttt,summary_column_name=df1.columns[1])
our_luhn_fff_scores = rouge_scores_df(df1,our_luhn_fff,summary_column_name=df1.columns[1])
our_luhn_tff_scores = rouge_scores_df(df1,our_luhn_tff,summary_column_name=df1.columns[1])
our_luhn_ftf_scores = rouge_scores_df(df1,our_luhn_ftf,summary_column_name=df1.columns[1])


100%|██████████| 49/49 [00:59<00:00,  1.22s/it]
100%|██████████| 49/49 [00:58<00:00,  1.19s/it]
100%|██████████| 49/49 [00:58<00:00,  1.19s/it]
100%|██████████| 49/49 [01:00<00:00,  1.23s/it]


In [52]:
nltk_LexRank_ttt_scores = rouge_scores_df(df1,nltk_LexRank_ttt,summary_column_name=df1.columns[1])
nltk_LexRank_fff_scores = rouge_scores_df(df1,nltk_LexRank_fff,summary_column_name=df1.columns[1])
nltk_LexRank_tff_scores = rouge_scores_df(df1,nltk_LexRank_tff,summary_column_name=df1.columns[1])
nltk_LexRank_ftf_scores = rouge_scores_df(df1,nltk_LexRank_ftf,summary_column_name=df1.columns[1])
nltk_LexRank_fft_scores = rouge_scores_df(df1,nltk_LexRank_fft,summary_column_name=df1.columns[1])
nltk_LexRank_ttf_scores = rouge_scores_df(df1,nltk_LexRank_ttf,summary_column_name=df1.columns[1])
nltk_LexRank_tft_scores = rouge_scores_df(df1,nltk_LexRank_tft,summary_column_name=df1.columns[1])
nltk_LexRank_ftt_scores = rouge_scores_df(df1,nltk_LexRank_ftt,summary_column_name=df1.columns[1])

100%|██████████| 49/49 [01:01<00:00,  1.26s/it]
100%|██████████| 49/49 [01:04<00:00,  1.32s/it]
100%|██████████| 49/49 [01:02<00:00,  1.27s/it]
100%|██████████| 49/49 [01:01<00:00,  1.25s/it]
100%|██████████| 49/49 [01:01<00:00,  1.25s/it]
100%|██████████| 49/49 [01:01<00:00,  1.26s/it]
100%|██████████| 49/49 [01:03<00:00,  1.29s/it]
100%|██████████| 49/49 [01:04<00:00,  1.32s/it]


In [53]:
spacy_LexRank_ttt_scores = rouge_scores_df(df1,spacy_LexRank_ttt,summary_column_name=df1.columns[1])
spacy_LexRank_fff_scores = rouge_scores_df(df1,spacy_LexRank_fff,summary_column_name=df1.columns[1])
spacy_LexRank_tff_scores = rouge_scores_df(df1,spacy_LexRank_tff,summary_column_name=df1.columns[1])
spacy_LexRank_ftf_scores = rouge_scores_df(df1,spacy_LexRank_ftf,summary_column_name=df1.columns[1])
spacy_LexRank_fft_scores = rouge_scores_df(df1,spacy_LexRank_fft,summary_column_name=df1.columns[1])
spacy_LexRank_ttf_scores = rouge_scores_df(df1,spacy_LexRank_ttf,summary_column_name=df1.columns[1])
spacy_LexRank_tft_scores = rouge_scores_df(df1,spacy_LexRank_tft,summary_column_name=df1.columns[1])
spacy_LexRank_ftt_scores = rouge_scores_df(df1,spacy_LexRank_ftt,summary_column_name=df1.columns[1])

100%|██████████| 49/49 [01:03<00:00,  1.30s/it]
100%|██████████| 49/49 [01:02<00:00,  1.27s/it]
100%|██████████| 49/49 [01:02<00:00,  1.27s/it]
100%|██████████| 49/49 [01:01<00:00,  1.25s/it]
100%|██████████| 49/49 [01:03<00:00,  1.30s/it]
100%|██████████| 49/49 [01:03<00:00,  1.29s/it]
100%|██████████| 49/49 [01:02<00:00,  1.28s/it]
100%|██████████| 49/49 [01:04<00:00,  1.31s/it]


In [54]:
our_LexRank_ttt_scores = rouge_scores_df(df1,our_LexRank_ttt,summary_column_name=df1.columns[1])
our_LexRank_fff_scores = rouge_scores_df(df1,our_LexRank_fff,summary_column_name=df1.columns[1])
our_LexRank_tff_scores = rouge_scores_df(df1,our_LexRank_tff,summary_column_name=df1.columns[1])
our_LexRank_ftf_scores = rouge_scores_df(df1,our_LexRank_ftf,summary_column_name=df1.columns[1])


100%|██████████| 49/49 [01:04<00:00,  1.31s/it]
100%|██████████| 49/49 [01:03<00:00,  1.29s/it]
100%|██████████| 49/49 [01:02<00:00,  1.28s/it]
100%|██████████| 49/49 [01:06<00:00,  1.36s/it]


In [55]:
nltk_luhn_ttt_scores_avg = df_avg_by_column(nltk_luhn_ttt_scores)
nltk_luhn_fff_scores_avg = df_avg_by_column(nltk_luhn_fff_scores)
nltk_luhn_tff_scores_avg = df_avg_by_column(nltk_luhn_tff_scores)
nltk_luhn_ftf_scores_avg = df_avg_by_column(nltk_luhn_ftf_scores)
nltk_luhn_fft_scores_avg = df_avg_by_column(nltk_luhn_fft_scores)
nltk_luhn_ttf_scores_avg = df_avg_by_column(nltk_luhn_ttf_scores)
nltk_luhn_tft_scores_avg = df_avg_by_column(nltk_luhn_tft_scores)
nltk_luhn_ftt_scores_avg = df_avg_by_column(nltk_luhn_ftt_scores)


In [56]:
spacy_luhn_ttt_scores_avg = df_avg_by_column(spacy_luhn_ttt_scores)
spacy_luhn_fff_scores_avg = df_avg_by_column(spacy_luhn_fff_scores)
spacy_luhn_tff_scores_avg = df_avg_by_column(spacy_luhn_tff_scores)
spacy_luhn_ftf_scores_avg = df_avg_by_column(spacy_luhn_ftf_scores)
spacy_luhn_fft_scores_avg = df_avg_by_column(spacy_luhn_fft_scores)
spacy_luhn_ttf_scores_avg = df_avg_by_column(spacy_luhn_ttf_scores)
spacy_luhn_tft_scores_avg = df_avg_by_column(spacy_luhn_tft_scores)
spacy_luhn_ftt_scores_avg = df_avg_by_column(spacy_luhn_ftt_scores)

In [57]:
our_luhn_ttt_scores_avg = df_avg_by_column(our_luhn_ttt_scores)
our_luhn_fff_scores_avg = df_avg_by_column(our_luhn_fff_scores)
our_luhn_tff_scores_avg = df_avg_by_column(our_luhn_tff_scores)
our_luhn_ftf_scores_avg = df_avg_by_column(our_luhn_ftf_scores)

In [58]:
nltk_LexRank_ttt_scores_avg = df_avg_by_column(nltk_LexRank_ttt_scores)
nltk_LexRank_fff_scores_avg = df_avg_by_column(nltk_LexRank_fff_scores)
nltk_LexRank_tff_scores_avg = df_avg_by_column(nltk_LexRank_tff_scores)
nltk_LexRank_ftf_scores_avg = df_avg_by_column(nltk_LexRank_ftf_scores)
nltk_LexRank_fft_scores_avg = df_avg_by_column(nltk_LexRank_fft_scores)
nltk_LexRank_ttf_scores_avg = df_avg_by_column(nltk_LexRank_ttf_scores)
nltk_LexRank_tft_scores_avg = df_avg_by_column(nltk_LexRank_tft_scores)
nltk_LexRank_ftt_scores_avg = df_avg_by_column(nltk_LexRank_ftt_scores)

In [59]:
spacy_LexRank_ttt_scores_avg = df_avg_by_column(spacy_LexRank_ttt_scores)
spacy_LexRank_fff_scores_avg = df_avg_by_column(spacy_LexRank_fff_scores)
spacy_LexRank_tff_scores_avg = df_avg_by_column(spacy_LexRank_tff_scores)
spacy_LexRank_ftf_scores_avg = df_avg_by_column(spacy_LexRank_ftf_scores)
spacy_LexRank_fft_scores_avg = df_avg_by_column(spacy_LexRank_fft_scores)
spacy_LexRank_ttf_scores_avg = df_avg_by_column(spacy_LexRank_ttf_scores)
spacy_LexRank_tft_scores_avg = df_avg_by_column(spacy_LexRank_tft_scores)
spacy_LexRank_ftt_scores_avg = df_avg_by_column(spacy_LexRank_ftt_scores)

In [60]:
our_LexRank_ttt_scores_avg = df_avg_by_column(our_LexRank_ttt_scores)
our_LexRank_fff_scores_avg = df_avg_by_column(our_LexRank_fff_scores)
our_LexRank_tff_scores_avg = df_avg_by_column(our_LexRank_tff_scores)
our_LexRank_ftf_scores_avg = df_avg_by_column(our_LexRank_ftf_scores)

In [61]:
# nltk_LexRank_ttt_scores_avg.to_csv('preprocessing/nltk_LexRank_ttt_scores_avg.csv')
# nltk_LexRank_fff_scores_avg.to_csv('preprocessing/nltk_LexRank_fff_scores_avg.csv')
# nltk_LexRank_tff_scores_avg.to_csv('preprocessing/nltk_LexRank_tff_scores_avg.csv')
# nltk_LexRank_ftf_scores_avg.to_csv('preprocessing/nltk_LexRank_ftf_scores_avg.csv')
# nltk_LexRank_fft_scores_avg.to_csv('preprocessing/nltk_LexRank_fft_scores_avg.csv')
# nltk_LexRank_ttf_scores_avg.to_csv('preprocessing/nltk_LexRank_ttf_scores_avg.csv')
# nltk_LexRank_tft_scores_avg.to_csv('preprocessing/nltk_LexRank_tft_scores_avg.csv')
# nltk_LexRank_ftt_scores_avg.to_csv('preprocessing/nltk_LexRank_ftt_scores_avg.csv')

In [62]:
# spacy_LexRank_ttt_scores_avg.to_csv('preprocessing/spacy_LexRank_ttt_scores_avg.csv')
# spacy_LexRank_fff_scores_avg.to_csv('preprocessing/spacy_LexRank_fff_scores_avg.csv')
# spacy_LexRank_tff_scores_avg.to_csv('preprocessing/spacy_LexRank_tff_scores_avg.csv')
# spacy_LexRank_ftf_scores_avg.to_csv('preprocessing/spacy_LexRank_ftf_scores_avg.csv')
# spacy_LexRank_fft_scores_avg.to_csv('preprocessing/spacy_LexRank_fft_scores_avg.csv')
# spacy_LexRank_ttf_scores_avg.to_csv('preprocessing/spacy_LexRank_ttf_scores_avg.csv')
# spacy_LexRank_tft_scores_avg.to_csv('preprocessing/spacy_LexRank_tft_scores_avg.csv')
# spacy_LexRank_ftt_scores_avg.to_csv('preprocessing/spacy_LexRank_ftt_scores_avg.csv')

In [63]:
# our_LexRank_ttt_scores_avg.to_csv('preprocessing/our_LexRank_ttt_scores_avg.csv')
# our_LexRank_fff_scores_avg.to_csv('preprocessing/our_LexRank_fff_scores_avg.csv')
# our_LexRank_tff_scores_avg.to_csv('preprocessing/our_LexRank_tff_scores_avg.csv')
# our_LexRank_ftf_scores_avg.to_csv('preprocessing/our_LexRank_ftf_scores_avg.csv')

In [64]:
# nltk_luhn_ttt_scores_avg.to_csv('preprocessing/luhn/nltk_luhn_ttt_scores_avg.csv')
# nltk_luhn_fff_scores_avg.to_csv('preprocessing/luhn/nltk_luhn_fff_scores_avg.csv')
# nltk_luhn_tff_scores_avg.to_csv('preprocessing/luhn/nltk_luhn_tff_scores_avg.csv')
# nltk_luhn_ftf_scores_avg.to_csv('preprocessing/luhn/nltk_luhn_ftf_scores_avg.csv')
# nltk_luhn_fft_scores_avg.to_csv('preprocessing/luhn/nltk_luhn_fft_scores_avg.csv')
# nltk_luhn_ttf_scores_avg.to_csv('preprocessing/luhn/nltk_luhn_ttf_scores_avg.csv')
# nltk_luhn_tft_scores_avg.to_csv('preprocessing/luhn/nltk_luhn_tft_scores_avg.csv')
# nltk_luhn_ftt_scores_avg.to_csv('preprocessing/luhn/nltk_luhn_ftt_scores_avg.csv')

In [65]:
# spacy_luhn_ttt_scores_avg.to_csv('preprocessing/luhn/spacy_luhn_ttt_scores_avg.csv')
# spacy_luhn_fff_scores_avg.to_csv('preprocessing/luhn/spacy_luhn_fff_scores_avg.csv')
# spacy_luhn_tff_scores_avg.to_csv('preprocessing/luhn/spacy_luhn_tff_scores_avg.csv')
# spacy_luhn_ftf_scores_avg.to_csv('preprocessing/luhn/spacy_luhn_ftf_scores_avg.csv')
# spacy_luhn_fft_scores_avg.to_csv('preprocessing/luhn/spacy_luhn_fft_scores_avg.csv')
# spacy_luhn_ttf_scores_avg.to_csv('preprocessing/luhn/spacy_luhn_ttf_scores_avg.csv')
# spacy_luhn_tft_scores_avg.to_csv('preprocessing/luhn/spacy_luhn_tft_scores_avg.csv')
# spacy_luhn_ftt_scores_avg.to_csv('preprocessing/luhn/spacy_luhn_ftt_scores_avg.csv')

In [66]:
# our_luhn_ttt_scores_avg.to_csv('preprocessing/luhn/our_luhn_ttt_scores_avg.csv')
# our_luhn_fff_scores_avg.to_csv('preprocessing/luhn/our_luhn_fff_scores_avg.csv')
# our_luhn_tff_scores_avg.to_csv('preprocessing/luhn/our_luhn_tff_scores_avg.csv')
# our_luhn_ftf_scores_avg.to_csv('preprocessing/luhn/our_luhn_ftf_scores_avg.csv')