# 1. Installing Transformers and Importing Dependencies

In [77]:
!pip install transformers



In [78]:
!pip install sentencepiece



In [79]:
!pip install bert-extractive-summarizer



In [80]:
from transformers import pipeline
from bs4 import BeautifulSoup
import requests
import pandas as pd

# 2. Loading Summarization Pipeline

In [81]:
from summarizer import Summarizer, TransformerSummarizer
from transformers import pipeline

## GPT-2

In [82]:
GPT2_model = TransformerSummarizer(transformer_type="GPT2",transformer_model_key="gpt2-medium")

def gptSum(body):
  return ''.join(GPT2_model(body, min_length=60))

## BERT

In [83]:
bert_model = Summarizer()

def bertSum(body):
  return ''.join(bert_model(body, min_length=60))

## BART

In [109]:
bart_model = pipeline('summarization', model='facebook/bart-large-cnn', tokenizer='facebook/bart-large-cnn')

def bartSum(body):
   summary=bart_model(body, min_length = round(0.1 * len(body.split(' '))), max_length = round(0.2 * len(body.split(' '))), do_sample=False, truncation=True)
   return summary[0]['summary_text']

# 3. Get Blog Post

In [85]:
URL = "https://medium.com/@kevinnokiawriting/why-reading-is-part-of-success-6c9cea0c1edb"

In [86]:
r = requests.get(URL)

In [87]:
r.text

'<!doctype html><html lang="en"><head><title data-rh="true">Why Reading Is Part of Success. I have read for almost 4 years. Even… | by Kevin Nokia | Medium</title><meta data-rh="true" charset="utf-8"/><meta data-rh="true" name="viewport" content="width=device-width,minimum-scale=1,initial-scale=1,maximum-scale=1"/><meta data-rh="true" name="theme-color" content="#000000"/><meta data-rh="true" name="twitter:app:name:iphone" content="Medium"/><meta data-rh="true" name="twitter:app:id:iphone" content="828256236"/><meta data-rh="true" property="al:ios:app_name" content="Medium"/><meta data-rh="true" property="al:ios:app_store_id" content="828256236"/><meta data-rh="true" property="al:android:package" content="com.medium.reader"/><meta data-rh="true" property="fb:app_id" content="542599432471018"/><meta data-rh="true" property="og:site_name" content="Medium"/><meta data-rh="true" property="og:type" content="article"/><meta data-rh="true" property="article:published_time" content="2023-06-07

In [88]:
soup = BeautifulSoup(r.text, 'html.parser')
results = soup.find_all(['h1', 'p'])

In [89]:
results

[<h1 class="pw-post-title et eu ev be ew ex ey ez fa fb fc fd fe ff fg fh fi fj fk fl fm fn fo fp fq fr fs ft fu fv bj" data-testid="storyTitle" id="4dd7">Why Reading Is Part of Success</h1>,
 <p class="be b gr gs bj"><a class="af ag ah ai aj ak al am an ao ap aq ar gt" data-testid="authorName" href="/@kevinnokiawriting?source=post_page-----6c9cea0c1edb--------------------------------" rel="noopener follow">Kevin Nokia</a></p>,
 <p class="be b gr gs gw"><span><a class="gx gy ah ai aj ak al am an ao ap aq ar gz ha hb" href="/m/signin?actionUrl=https%3A%2F%2Fmedium.com%2F_%2Fsubscribe%2Fuser%2F5ea8423844cb&amp;operation=register&amp;redirect=https%3A%2F%2Fmedium.com%2F%40kevinnokiawriting%2Fwhy-reading-is-part-of-success-6c9cea0c1edb&amp;user=Kevin+Nokia&amp;userId=5ea8423844cb&amp;source=post_page-5ea8423844cb----6c9cea0c1edb---------------------post_header-----------" rel="noopener follow">Follow</a></span></p>,
 <p class="be b jm z gw"><span class="iw">--</span></p>,
 <p class="be b j

In [90]:
text = [result.text for result in results]
ARTICLE = ' '.join(text)

In [91]:
ARTICLE

'Why Reading Is Part of Success Kevin Nokia Follow -- 73 Listen Share I have read for almost 4 years. Even though my reading skills are still decent, I found it interesting that it separates readers and non-readers. What I’m going to tell you about is my opinion. This is not a generalization because I know there’s someone out there who doesn’t need reading for their part of success. I always feel enjoyment after reading a couple of books. I love reading as much as I love writing. Reading is like my input, and my output is through my writing. The articles that I wrote are not perfect. I know that I need to learn and improve more through reading and, of course, evaluation. My journey started with reading. I will never get this far without reading. I haven’t succeeded yet, but I know that my reading skills will be part of my success in the future. It’s not because of how the knowledge is implemented in my brain from reading, even though it’s one of the parts of success. It’s imbecility. W

# 4. Data Preprocessing

## Chunk Article

In [92]:
ARTICLE = ARTICLE.replace('.', '.<eos>')
ARTICLE = ARTICLE.replace('!', '!<eos>')
ARTICLE = ARTICLE.replace('?', '?<eos>')
sentences = ARTICLE.split('<eos>')

In [93]:
sentences

['Why Reading Is Part of Success Kevin Nokia Follow -- 73 Listen Share I have read for almost 4 years.',
 ' Even though my reading skills are still decent, I found it interesting that it separates readers and non-readers.',
 ' What I’m going to tell you about is my opinion.',
 ' This is not a generalization because I know there’s someone out there who doesn’t need reading for their part of success.',
 ' I always feel enjoyment after reading a couple of books.',
 ' I love reading as much as I love writing.',
 ' Reading is like my input, and my output is through my writing.',
 ' The articles that I wrote are not perfect.',
 ' I know that I need to learn and improve more through reading and, of course, evaluation.',
 ' My journey started with reading.',
 ' I will never get this far without reading.',
 ' I haven’t succeeded yet, but I know that my reading skills will be part of my success in the future.',
 ' It’s not because of how the knowledge is implemented in my brain from reading, eve

In [94]:
max_chunk = 500
current_chunk = 0 
chunks = []
for sentence in sentences:
    if len(chunks) == current_chunk + 1: 
        if len(chunks[current_chunk]) + len(sentence.split(' ')) <= max_chunk:
            chunks[current_chunk].extend(sentence.split(' '))
        else:
            current_chunk += 1
            chunks.append(sentence.split(' '))
    else:
        print(current_chunk)
        chunks.append(sentence.split(' '))

for chunk_id in range(len(chunks)):
    chunks[chunk_id] = ' '.join(chunks[chunk_id])

0


In [95]:
len(chunks)

2

In [96]:
chunks

['Why Reading Is Part of Success Kevin Nokia Follow -- 73 Listen Share I have read for almost 4 years.  Even though my reading skills are still decent, I found it interesting that it separates readers and non-readers.  What I’m going to tell you about is my opinion.  This is not a generalization because I know there’s someone out there who doesn’t need reading for their part of success.  I always feel enjoyment after reading a couple of books.  I love reading as much as I love writing.  Reading is like my input, and my output is through my writing.  The articles that I wrote are not perfect.  I know that I need to learn and improve more through reading and, of course, evaluation.  My journey started with reading.  I will never get this far without reading.  I haven’t succeeded yet, but I know that my reading skills will be part of my success in the future.  It’s not because of how the knowledge is implemented in my brain from reading, even though it’s one of the parts of success.  It’s

# 5. Generating Summaries

In [97]:
gptResult = []
bertResult = []
bartResult = []

In [110]:
i = 1
for article in chunks:
  gptResult.append(gptSum(article))
  bertResult.append(bertSum(article))
  bartResult.append(bartSum(article))
  i = i + 1

  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)


In [112]:
# Assuming gptResult, bertResult, and bartResult have different lengths
max_length = max(len(gptResult), len(bertResult), len(bartResult))

# Pad shorter lists with a placeholder value (e.g., an empty string)
gptResult += [''] * (max_length - len(gptResult))
bertResult += [''] * (max_length - len(bertResult))
bartResult += [''] * (max_length - len(bartResult))

In [124]:
# Create a DataFrame to store the data
data = {'gpt-summary':gptResult,'bert-summary':bertResult,'bart-summary':bartResult}
df = pd.DataFrame(data)
# Save the data to a CSV file
df.to_csv("article.csv", index=False)
print("Data saved to 'article.csv'")

Data saved to 'article.csv'


In [125]:
df.head(10)

Unnamed: 0,gpt-summary,bert-summary,bart-summary
0,Why Reading Is Part of Success Kevin Nokia Fol...,Why Reading Is Part of Success Kevin Nokia Fol...,Why Reading Is Part of Success Kevin Nokia Fol...
1,Why Reading Is Part of Success Kevin Nokia Fol...,Why Reading Is Part of Success Kevin Nokia Fol...,"Even so, making yourself sit and read is actua..."
2,Why Reading Is Part of Success Kevin Nokia Fol...,Why Reading Is Part of Success Kevin Nokia Fol...,"Kevin Nokia: Reading is like my input, and my ..."
3,Why Reading Is Part of Success Kevin Nokia Fol...,Why Reading Is Part of Success Kevin Nokia Fol...,Kevin Nokia says reading is part of success be...
4,Why Reading Is Part of Success Kevin Nokia Fol...,Why Reading Is Part of Success Kevin Nokia Fol...,
5,"Even so, making yourself sit and read is actua...","Even so, making yourself sit and read is actua...",
6,Why Reading Is Part of Success Kevin Nokia Fol...,Why Reading Is Part of Success Kevin Nokia Fol...,
7,"Even so, making yourself sit and read is actua...","Even so, making yourself sit and read is actua...",


# 6. Rouge and Blue Score Calculation

In [114]:
!pip install rouge-score



In [115]:
gptRouge = []
bertRouge = []
bartRouge = []

In [116]:
from rouge_score import rouge_scorer

scorer = rouge_scorer.RougeScorer(['rouge1', 'rougeL'], use_stemmer=True)

In [117]:
for i in range(0,len(chunks)):
  gptRouge.append(scorer.score(chunks[i], gptResult[i]))
  bertRouge.append(scorer.score(chunks[i], bertResult[i]))
  bartRouge.append(scorer.score(chunks[i], bartResult[i]))

In [118]:
print('GPT Rouge Scores: ', gptRouge)
print('BERT Rouge Scores: ', bertRouge)
print('BART Rouge Scores: ', bartRouge)

GPT Rouge Scores:  [{'rouge1': Score(precision=1.0, recall=0.15778688524590165, fmeasure=0.27256637168141595), 'rougeL': Score(precision=1.0, recall=0.15778688524590165, fmeasure=0.27256637168141595)}, {'rouge1': Score(precision=0.7402597402597403, recall=0.16521739130434782, fmeasure=0.2701421800947867), 'rougeL': Score(precision=0.38961038961038963, recall=0.08695652173913043, fmeasure=0.14218009478672985)}]
BERT Rouge Scores:  [{'rouge1': Score(precision=1.0, recall=0.1557377049180328, fmeasure=0.2695035460992908), 'rougeL': Score(precision=1.0, recall=0.1557377049180328, fmeasure=0.2695035460992908)}, {'rouge1': Score(precision=0.618421052631579, recall=0.13623188405797101, fmeasure=0.22327790973871733), 'rougeL': Score(precision=0.34210526315789475, recall=0.07536231884057971, fmeasure=0.12351543942992875)}]
BART Rouge Scores:  [{'rouge1': Score(precision=1.0, recall=0.1557377049180328, fmeasure=0.2695035460992908), 'rougeL': Score(precision=1.0, recall=0.1557377049180328, fmeasur

In [119]:
gptFmeasure = []
bertFmeasure = []
bartFmeasure = []

In [120]:
for i in range(0,len(chunks)):
  gptFmeasure.append(gptRouge[i]['rougeL'].fmeasure)
  bertFmeasure.append(bertRouge[i]['rougeL'].fmeasure)
  bartFmeasure.append(bartRouge[i]['rougeL'].fmeasure)

In [121]:
from statistics import mean

print("Cumulative F Measure")
print("GPT-2 : ", round(mean(gptFmeasure), 6))
print("BERT : ", round(mean(bertFmeasure), 6))
print("BART : ", round(mean(bartFmeasure), 6))

Cumulative F Measure
GPT-2 :  0.207373
BERT :  0.196509
BART :  0.259117


### Here, on evaluation, we can see that BART outperforms all other models (GPT-2 and BERT). Thus, we will be leveraging BART in our web application to summarize more such articles.

# 7. Output to Text File

In [130]:
# output_file_path = "bart_summary.txt"
# with open(output_file_path, "w", encoding="utf-8") as file:
#     for summary in bartResult:
#         file.write(summary + "\n")
# print(f"BART summary saved to {output_file_path}")

BART summary saved to bart_summary.txt
