<a href="https://colab.research.google.com/github/nikhilm21/RapidReads/blob/main/BART_LARGE_model_evaluation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

### Installing Packages

In [1]:
!pip install datasets transformers rouge-score nltk

import datasets
from datasets import Dataset
from datasets import load_dataset, load_metric

import nltk
nltk.download('punkt')

import os
import random
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from IPython.display import display, HTML
import matplotlib.pyplot as plt

import torch
from transformers import AutoTokenizer,T5Tokenizer, T5ForConditionalGeneration
from transformers import pipeline
from transformers import AutoModelForSeq2SeqLM, DataCollatorForSeq2Seq, Seq2SeqTrainingArguments, Seq2SeqTrainer

import re
from bs4 import BeautifulSoup

Collecting datasets
  Downloading datasets-2.15.0-py3-none-any.whl (521 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m521.2/521.2 kB[0m [31m3.1 MB/s[0m eta [36m0:00:00[0m
Collecting rouge-score
  Downloading rouge_score-0.1.2.tar.gz (17 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting pyarrow-hotfix (from datasets)
  Downloading pyarrow_hotfix-0.6-py3-none-any.whl (7.9 kB)
Collecting dill<0.3.8,>=0.3.0 (from datasets)
  Downloading dill-0.3.7-py3-none-any.whl (115 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m115.3/115.3 kB[0m [31m13.2 MB/s[0m eta [36m0:00:00[0m
Collecting multiprocess (from datasets)
  Downloading multiprocess-0.70.15-py310-none-any.whl (134 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m134.8/134.8 kB[0m [31m16.7 MB/s[0m eta [36m0:00:00[0m
Building wheels for collected packages: rouge-score
  Building wheel for rouge-score (setup.py) ... [?25l[?25hdone
  Created wheel

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.


In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
base_model = 'facebook/bart-large'
tokenizer = AutoTokenizer.from_pretrained(base_model)

tokenizer_config.json:   0%|          | 0.00/26.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/1.63k [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

### Loading Data

In [4]:
PATH = '/content/drive/MyDrive/Models/model/'

model_summ_loaded = AutoModelForSeq2SeqLM.from_pretrained(PATH)
summarizer_loaded = pipeline("summarization", model = model_summ_loaded, tokenizer = tokenizer, device = 0)

### Data Preprocssing

In [5]:
def get_summary(summarizer_object, text, min_length = 10, max_length = 50):
    summary = [elem['summary_text'] for elem in summarizer_object(text, min_length = min_length, max_length = max_length)]
    display(HTML(pd.DataFrame({"text":text, "summary":summary}).to_html()))

In [6]:
contraction_mapping = {"ain't": "is not", "aren't": "are not","can't": "cannot", "'cause": "because", "could've": "could have",
                       "couldn't": "could not", "didn't": "did not",  "doesn't": "does not", "don't": "do not", "hadn't": "had not",
                       "hasn't": "has not", "haven't": "have not", "he'd": "he would","he'll": "he will", "he's": "he is", "how'd": "how did",
                       "how'd'y": "how do you", "how'll": "how will", "how's": "how is", "I'd": "I would", "I'd've": "I would have",
                       "I'll": "I will", "I'll've": "I will have","I'm": "I am", "I've": "I have", "i'd": "i would", "i'd've": "i would have",
                       "i'll": "i will",  "i'll've": "i will have","i'm": "i am", "i've": "i have", "isn't": "is not", "it'd": "it would",
                       "it'd've": "it would have", "it'll": "it will", "it'll've": "it will have","it's": "it is", "let's": "let us",
                       "ma'am": "madam", "mayn't": "may not", "might've": "might have","mightn't": "might not","mightn't've": "might not have",
                       "must've": "must have", "mustn't": "must not", "mustn't've": "must not have", "needn't": "need not",
                       "needn't've": "need not have","o'clock": "of the clock", "oughtn't": "ought not", "oughtn't've": "ought not have",
                       "shan't": "shall not", "sha'n't": "shall not", "shan't've": "shall not have", "she'd": "she would",
                       "she'd've": "she would have", "she'll": "she will", "she'll've": "she will have", "she's": "she is",
                       "should've": "should have", "shouldn't": "should not", "shouldn't've": "should not have", "so've": "so have","so's": "so as",
                       "this's": "this is","that'd": "that would", "that'd've": "that would have", "that's": "that is", "there'd": "there would",
                       "there'd've": "there would have", "there's": "there is", "here's": "here is","they'd": "they would",
                       "they'd've": "they would have", "they'll": "they will", "they'll've": "they will have", "they're": "they are",
                       "they've": "they have", "to've": "to have", "wasn't": "was not", "we'd": "we would", "we'd've": "we would have",
                       "we'll": "we will", "we'll've": "we will have", "we're": "we are", "we've": "we have", "weren't": "were not",
                       "what'll": "what will", "what'll've": "what will have", "what're": "what are", "what's": "what is", "what've": "what have",
                       "when's": "when is", "when've": "when have", "where'd": "where did", "where's": "where is", "where've": "where have",
                       "who'll": "who will", "who'll've": "who will have", "who's": "who is", "who've": "who have", "why's": "why is",
                       "why've": "why have", "will've": "will have", "won't": "will not", "won't've": "will not have", "would've": "would have",
                       "wouldn't": "would not", "wouldn't've": "would not have", "y'all": "you all", "y'all'd": "you all would",
                       "y'all'd've": "you all would have","y'all're": "you all are","y'all've": "you all have", "you'd": "you would",
                       "you'd've": "you would have", "you'll": "you will", "you'll've": "you will have", "you're": "you are", "you've": "you have"}

In [7]:
def text_cleaner(text):
    string = text.lower()
    string = BeautifulSoup(string, "lxml").text
    string = re.sub(r'\([^)]*\)', '', string)
    string = re.sub('"', '', string)
    string = ' '.join([contraction_mapping[t] if t in contraction_mapping else t for t in string.split(" ")])
    string = re.sub(r"'s\b", "", string)
    string = re.sub(r'\.{2,}', ' ', string)
    string = re.sub("[^a-zA-Z.]", " ", string)
    string = re.sub(r'\s+', ' ', string).strip()

    return string

In [9]:
test_data = pd.read_csv('/content/drive/MyDrive/Models/test_data.csv')
test_data.head()

Unnamed: 0,text,summary
0,"First things first - yes we're teenagers, youn...",Two months into relationship and I found out ...
1,"My partner's cousin, E, often invites us over ...",Partner's cousin and her husband frequently i...
2,It was my 21st birthday. My gf went with me to...,"Had drunken sex with my gf, only to find out ..."


In [10]:
test_data['text'] = test_data['text'].map(text_cleaner)

In [11]:
test_data['summary'] = test_data['summary'].map(text_cleaner)

In [12]:
test_data.head()

Unnamed: 0,text,summary
0,first things first yes we are teenagers young ...,two months into relationship and i found out t...
1,my partner cousin e often invites us over for ...,partner cousin and her husband frequently invi...
2,it was my st birthday. my gf went with me to a...,had drunken sex with my gf only to find out sh...


In [13]:
get_summary(summarizer_loaded, test_data.text.tolist())

Unnamed: 0,text,summary
0,first things first yes we are teenagers young and immature or whatever. please help me out anyways ok so. there was this girl i had a crush on since freshman year of high school. we have been really good friends since then and she means a lot to me. this summer i decided nothing was going to happen because it had already been over a year and nothing happened. after i made this decision i met another girl over summer. she was really pretty and i got her number. i asked her out and we have been together for a little over two months. i love her. she is been amazing to me so nice and caring and just seems like the perfect girl. but i have recently been told that the girl i had a long crush on the first girl is heartbroken that i am with this new girl and it breaks her heart a little more each time she sees me because she loves me. her friend told me this. and finding this out broke my heart i felt so bad because i liked her too. finding this out has made me think that i could make her happy by breaking up with my girlfriend and going with her. but i could never do that to my girlfriend i love her. does anyone have advice i can answer any questions.,i am in a relationship with a girl i had a crush on for a long time and she is heartbroken that i am with a new girl. i want to break up with my girlfriend but i love her.
1,my partner cousin e often invites us over for dinner. this is a very nice gesture and we have been many times. i like to spend time with e and her family. the problem is that my boyfriend b and i have gotten explosive diarrhea from e meals on more than one occasion. it has happened at least times now and we are starting to see the pattern. e does not seem to have a very good understanding of safe food handling. for example i have seen her serve appetizers to people when they have been sitting out for several hours. also i once saw her leave some steaks on the counter for almost hours before cooking them. i know that it is good to let steak rest at room temperature for a short time before cooking them but definitely not for hours both me and b have had the shits so bad from e meals that we have had to miss work. it does not seem as though e her husband m or kids m ever really get sick like we do perhaps they are just used to it. i really do not know how to approach this issue. we do not want to stop hanging out with e and her family because they are good friends . it is really nice of her to always be inviting us for dinner and i do not know how we can tell her about this issue without sounding rude.,my partner cousin e often invites us over for dinner. i like to spend time with her and her family but my boyfriend and i have gotten explosive diarrhea from e meals on more than one occasion. how do we tell her about this without
2,it was my st birthday. my gf went with me to a strip club first strip club i have ever been to in my life. we both get completely shit faced and head back to her place. drunk as fuck we both pass out. i woke up in the middle of the night and began to make out with her because i was horny as fuck and my buzz was wearing off. she began to kiss back and was giving me a hj so i figured she wanted it bad. we began to get our sex on and i noticed she was really wet so dumbass me thinks that i have made her horny as fuck. so here i am thinking i am the god of sex and we both finish. wake up the next morning with a nasty hangover and i noticed that my hand was covered in something dark and red. at this moment i realized something horrible. i removed the sheets only to find out she was on her period the whole time. her sheets pants my pants and pillows were ruined. what felt like an awesome sex experience turned out to be the worst thing that has ever happened to me. your turn reddit.,got drunk and had sex with my gf she was on her period the whole time.


In [14]:
metric = load_metric("rouge")

  metric = load_metric("rouge")


Downloading builder script:   0%|          | 0.00/2.17k [00:00<?, ?B/s]

In [15]:
text_test = test_data.text.tolist()
summary_test = test_data.summary.tolist()
summary_pred = []

In [16]:
batch_size = 8
start_idx = 0

for i in range(0, len(text_test[start_idx:]), batch_size):
    batch = int(i/batch_size)
    batch_text = text_test[start_idx+i : start_idx+i+batch_size]
    summary_pred += [elem['summary_text'] for elem in summarizer_loaded(batch_text, min_length = 10, max_length = 50)]
    if batch == 2:
        break

In [17]:
results = metric.compute(predictions = summary_pred, references = summary_test[:len(summary_pred)])

In [18]:
rouge1 = []
rouge2 = []
rougeL = []

for i in range(len(summary_pred)):
    results = metric.compute(predictions = [summary_pred[i]], references = [summary_test[i]])
    for key in ["rouge1", "rouge2", "rougeL"]:
        if key == "rouge1":
            rouge1.append(results[key].mid.fmeasure)
        elif key == "rouge2":
            rouge2.append(results[key].mid.fmeasure)
        else:
            rougeL.append(results[key].mid.fmeasure)

In [19]:
n = len(summary_pred)
df_results_sample = pd.DataFrame({"text": text_test[:n],
                                 "summary": summary_test[:n],
                                 "summary_pred": summary_pred[:n],
                                 "rouge1": rouge1,
                                 "rouge2": rouge2,
                                 "rougeL": rougeL}
                                 )

df_results_sample

Unnamed: 0,text,summary,summary_pred,rouge1,rouge2,rougeL
0,first things first yes we are teenagers young ...,two months into relationship and i found out t...,i am in a relationship with a girl i had a cru...,0.529412,0.363636,0.470588
1,my partner cousin e often invites us over for ...,partner cousin and her husband frequently invi...,my partner cousin e often invites us over for ...,0.506667,0.273973,0.426667
2,it was my st birthday. my gf went with me to a...,had drunken sex with my gf only to find out sh...,got drunk and had sex with my gf she was on he...,0.764706,0.625,0.764706


In [20]:
for i in range(len(df_results_sample)):
  print('Text:')
  print(df_results_sample['text'][i])
  print()
  print('Reference summary')
  print(df_results_sample['summary'][i])
  print()
  print('Predicted summary')
  print(df_results_sample['summary_pred'][i])
  print()

Text:
first things first yes we are teenagers young and immature or whatever. please help me out anyways ok so. there was this girl i had a crush on since freshman year of high school. we have been really good friends since then and she means a lot to me. this summer i decided nothing was going to happen because it had already been over a year and nothing happened. after i made this decision i met another girl over summer. she was really pretty and i got her number. i asked her out and we have been together for a little over two months. i love her. she is been amazing to me so nice and caring and just seems like the perfect girl. but i have recently been told that the girl i had a long crush on the first girl is heartbroken that i am with this new girl and it breaks her heart a little more each time she sees me because she loves me. her friend told me this. and finding this out broke my heart i felt so bad because i liked her too. finding this out has made me think that i could make he