In [None]:
# Install necessary packages
!pip install datasets
!pip install sentencepiece
!pip install transformers==4.21.3 

In [None]:
# Import the dataset
import torch
from datasets import load_dataset
answersumm = load_dataset("alexfabbri/answersumm")
device = "cuda:0" if torch.cuda.is_available() else "cpu"



  0%|          | 0/3 [00:00<?, ?it/s]

In [None]:
all_full_answers = []

for answer in answersumm["train"][1]["answers"]:
    full_answer = []

    for sentence in answer['sents']:
        full_answer.append(sentence["text"])

    fullanswer = " ".join(full_answer)
    all_full_answers.append(fullanswer)
    # print(fullanswer, end="\n\n")

passage = ""
for summary in all_full_answers:
    passage = passage + summary + " "
passage = passage[:-1]
passage
len(answersumm["train"])

2783

In [None]:
# t5-base Model
from transformers import pipeline

summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
# summarizer.device = 0
# summary_text = summarizer(passage[:-1], truncation=True, model_max_length=512, max_length=100, min_length=5, do_sample=False)[0]['summary_text']
# print(summary_text)

In [None]:
# del summarizer.device
summary_text = summarizer(passage[:-1], max_length=100, min_length=5, do_sample=False)[0]['summary_text']
print(summary_text)

To imitate her writing style read as much of her writing as you can. Check her correspondence, and cut-and-paste from it. Create yourself a library of templates from her correspondence. Create a listing of her favorite expressions.


In [None]:
# Get the summary of the text from the dataset
summaries = []

for summary in answersumm["train"]["summaries"][1]:
    full_summary = []

    for sentence in summary:
        full_summary.append(sentence)

    fullsummary = " ".join(full_summary)
    summaries.append(fullsummary)
    # print(fullanswer, end="\n\n")

reference_text = summaries[0]
reference_text

"Read as much of her writing as you can for inspiration and make a list of her favourite expressions that you can cut and paste into your writing. Write a draft letter and ask for her opinion on it.  An important step when trying to imitate your boss' writing style would be to read her previous correspondence for inspiration. From this, you can also try to identify commonly used expressions and idioms or even create a template for future correspondence. Of course, you may also wish to get her feedback on any draft letters that you do write."

In [None]:
!pip install rouge

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [None]:
# Calculate the ROUGE scores for the different summaries
from rouge import Rouge
rouge = Rouge()

rouge.get_scores(summary_text, reference_text)

[{'rouge-1': {'r': 0.23076923076923078,
   'p': 0.5769230769230769,
   'f': 0.329670325588697},
  'rouge-2': {'r': 0.08695652173913043,
   'p': 0.23529411764705882,
   'f': 0.12698412304358792},
  'rouge-l': {'r': 0.23076923076923078,
   'p': 0.5769230769230769,
   'f': 0.329670325588697}}]

In [None]:
# Initialise parameters
MAX_COUNT = 100

In [None]:
# Extract the questions
questions = []
for i in range(MAX_COUNT):
  all_full_answers = []
  for answer in answersumm["train"][i]["answers"]:
    full_answer = []

    for sentence in answer['sents']:
      full_answer.append(sentence["text"])

    fullanswer = " ".join(full_answer)
    all_full_answers.append(fullanswer)

  passage = ""
  for summary in all_full_answers:
    passage = passage + summary + " "

  questions.append(passage[:-1])

# Extract the corresponding summaries
summaries = []
for i in range(MAX_COUNT):
  for summ in answersumm["train"]["summaries"][i]:
    full_summary = []

    for sentence in summ:
      full_summary.append(sentence)

    fullsummary = " ".join(full_summary)
  summaries.append(fullsummary)

In [None]:
from tqdm import tqdm

In [None]:
# Run the model
model_text = []
for i in tqdm(range(100)):
  summary_text = summarizer(questions[i], max_length=100, min_length=5, do_sample=False)[0]['summary_text']
  model_text.append(summary_text)

In [None]:
# Calculate Rouge Scores
avg_r1 = 0
avg_r2 = 0
avg_rl = 0
for i in range(14):
  scores = rouge.get_scores(model_text[i], summaries[i])
  avg_r1 += scores[0]["rouge-1"]['f']
  avg_r2 += scores[0]["rouge-2"]['f']
  avg_rl += scores[0]["rouge-l"]['f']
           

avg_r1 = avg_r1 * 100.0 / MAX_COUNT
avg_r2 = avg_r2 * 100.0 / MAX_COUNT
avg_rl = avg_rl * 100.0 / MAX_COUNT
avg_r1,avg_r2,avg_rl

(3.524440287429318, 0.7791491352038669, 3.069827975152606)

(24.737984077420254, 5.344360780333648, 21.933849116934454)


In [None]:
# save model_text to a text file
with open('model_text.txt', 'w') as f:
  for item in model_text:
    # write summary one by one
    f.write("%s\n\n" % item)