In [1]:
!pip3 install transformers

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [2]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, LogitsProcessorList, MinLengthLogitsProcessor

#importing the pretrained large language Model
tokenizer = AutoTokenizer.from_pretrained("gpt2")
model = AutoModelForCausalLM.from_pretrained("gpt2")

In [3]:
#Setting the prompt
prompt = "Today I believe we can finally"
input_ids = tokenizer(prompt, return_tensors="pt").input_ids

In [4]:
# Calculate perplexity for the generated sequence
def get_perplexity(generated_sequence):
    input_ids_generated = tokenizer(generated_sequence, return_tensors='pt').input_ids
    logits = model(input_ids_generated).logits
    loss_fn = torch.nn.CrossEntropyLoss()
    loss = loss_fn(logits.view(-1, logits.size(-1)), input_ids_generated.view(-1))
    perplexity = torch.exp(loss)
    print("perplexity: ", perplexity.item())


# Calculate likelihood for the generated sequence
def get_likelihoold(generated_sequence):
    input_ids_generated = tokenizer(generated_sequence, return_tensors='pt').input_ids
    logits = model(input_ids_generated).logits
    log_probs = torch.nn.functional.log_softmax(logits, dim=-1)
    total_log_prob = 0
    for i in range(len(input_ids_generated[0]) - 1):
        total_log_prob += log_probs[0, i, input_ids_generated[0, i+1]]
    likelihood = torch.exp(total_log_prob)
    print("likelihood: ",likelihood.item())

In [5]:
#generate up to 30 tokens
#using default decoder and setting the parameters
#Greedy Search
torch.manual_seed(1)
outputs1 = model.generate(input_ids, do_sample=False, pad_token_id = model.config.eos_token_id, max_length=30)
generated_sequence1 = tokenizer.batch_decode(outputs1, skip_special_tokens=True)
print("Using Greedy Search Decoding:")
print(generated_sequence1[0])

get_perplexity(generated_sequence1[0])
get_likelihoold(generated_sequence1[0])

Using Greedy Search Decoding:
Today I believe we can finally get to the point where we can make a difference in the lives of the people of the United States of America.

perplexity:  14537.9736328125
likelihood:  9.171019205060845e-24


In [6]:
#Beam Search
torch.manual_seed(1)
outputs2 = model.generate(input_ids, num_beams=3, do_sample=False, pad_token_id = model.config.eos_token_id, max_length=30)
generated_sequence2 = tokenizer.batch_decode(outputs2, skip_special_tokens=True)
print("Using Beam Search Decoding:")
print(generated_sequence2[0])

get_perplexity(generated_sequence2[0])
get_likelihoold(generated_sequence2[0])

Using Beam Search Decoding:
Today I believe we can finally get to the point where we can make a difference in the lives of all of our children.

I believe that
perplexity:  10024.8564453125
likelihood:  2.1889369590488264e-24


In [7]:
#Top k sampling
torch.manual_seed(1)
outputs3 = model.generate(input_ids, top_k=4, do_sample=True, pad_token_id = model.config.eos_token_id, max_length=30)
generated_sequence3 = tokenizer.batch_decode(outputs3, skip_special_tokens=True)
print("Using top-k sample Decoding:")
print(generated_sequence3[0])

get_perplexity(generated_sequence3[0])
get_likelihoold(generated_sequence3[0])

Using top-k sample Decoding:
Today I believe we can finally get a fair shake with the current state of affairs, with the new leadership and with the new direction of this country,
perplexity:  8688.6572265625
likelihood:  1.3506946665613972e-34


In [8]:
#Top p sampling
torch.manual_seed(1)
outputs4 = model.generate(input_ids, top_p=0.8, do_sample=True, pad_token_id = model.config.eos_token_id, max_length=30)
generated_sequence4 = tokenizer.batch_decode(outputs4, skip_special_tokens=True)
print("Using top-p Decoding:")
print(generated_sequence4[0])

get_perplexity(generated_sequence4[0])
get_likelihoold(generated_sequence4[0])

Using top-p Decoding:
Today I believe we can finally get a fair shake of these political issues. I think it's really important that we move forward in a bipartisan way,
perplexity:  10947.0478515625
likelihood:  6.463449259511617e-34


In [9]:
!pip3 install datasets

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [10]:
!pip3 install py7zr

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [11]:
#Task Dialogue Summerization
#importing dataset
import datasets
data = datasets.load_dataset('samsum')



  0%|          | 0/3 [00:00<?, ?it/s]

In [12]:
len(data['test'])

819

In [13]:
#Set the number of samples from test set for task
data_test = data['test'][:50]
data_test['dialogue']

["Hannah: Hey, do you have Betty's number?\nAmanda: Lemme check\nHannah: <file_gif>\nAmanda: Sorry, can't find it.\nAmanda: Ask Larry\nAmanda: He called her last time we were at the park together\nHannah: I don't know him well\nHannah: <file_gif>\nAmanda: Don't be shy, he's very nice\nHannah: If you say so..\nHannah: I'd rather you texted him\nAmanda: Just text him 🙂\nHannah: Urgh.. Alright\nHannah: Bye\nAmanda: Bye bye",
 "Eric: MACHINE!\r\nRob: That's so gr8!\r\nEric: I know! And shows how Americans see Russian ;)\r\nRob: And it's really funny!\r\nEric: I know! I especially like the train part!\r\nRob: Hahaha! No one talks to the machine like that!\r\nEric: Is this his only stand-up?\r\nRob: Idk. I'll check.\r\nEric: Sure.\r\nRob: Turns out no! There are some of his stand-ups on youtube.\r\nEric: Gr8! I'll watch them now!\r\nRob: Me too!\r\nEric: MACHINE!\r\nRob: MACHINE!\r\nEric: TTYL?\r\nRob: Sure :)",
 "Lenny: Babe, can you help me with something?\r\nBob: Sure, what's up?\r\nLenny

In [14]:
from transformers import AutoModelForSeq2SeqLM

tokenizer.pad_token = tokenizer.eos_token
input_ids = tokenizer(data_test['dialogue'], return_tensors="pt", padding=True, truncation=True).input_ids

tokenizer = AutoTokenizer.from_pretrained("lidiya/bart-base-samsum")

model = AutoModelForSeq2SeqLM.from_pretrained("lidiya/bart-base-samsum")

In [15]:
#Dialogue Summerization
#Greedy search
outputs1 = model.generate(input_ids, do_sample=False, pad_token_id = model.config.eos_token_id, max_length=50)
test_output1 = tokenizer.batch_decode(outputs1, skip_special_tokens=True)
print(test_output1)

['creat as costumes in 9. There is a lot of change around tastekey and hurry.', 'There is a lot of people around Legion. They are not sure if they are right or left.', 'The football June won Re by us aroundTOR and PetroleumK League left Walluau arm by them aroundTOR as a result of a football match.', 'Narendra is around thratt as a tacticnum. There are sports byann disorders by the group around Thratt. There is a lot of sexual activity in the group.', 'There is a lot of people around. There is a group of people. They are all in the same group. There are also some people in the group.', "Airports as costumesside in January 30 look limited. They are around fever Airways as a result of modernization. They're around the number 7 Player 8 Ballu Issue for left Korea.", 'Ful function family is around Primary. They are around Primary as well.', 'There is a lot of sports for us around their cur as left Korea earnings. There is also a family system for them around the cur. The family system is n

In [16]:
#Beam Search
outputs2 = model.generate(input_ids, num_beams=3, do_sample=False, pad_token_id = model.config.eos_token_id, max_length=50)
test_output2 = tokenizer.batch_decode(outputs2, skip_special_tokens=True)
print(test_output2)

['creat as costumes in 9. There is a lot of change around tastekey and hurry.', 'There is a lot of people around Legion. They are not sure if they are right or left.', 'The football June won Re by us around TOR as well as PetroleumK League. The football June left Walluau arm by them aroundTOR as a result of a football match. They are going to meet on Tuesday.', 'There is a tacticnum in January. There is a new tacticnum.', 'There is a lot of people around. There is also a group of people who are in the same place. There are also some people in the group.', 'Airports as costumesside in January 30 look limited. They are around fever Airways as a result of modernization.', 'Ful function family is around Primary. They are around Primary as well.', 'There are sports for us around their cur as left ask lot. There is a family system for them around the cur. The family system was nominated by Jackson Forb.', 'There is a family 7 Program for Star. There is an International familylaw for Jersey f

In [17]:
#Top k sampling
outputs3 = model.generate(input_ids, top_k=4, do_sample=True, pad_token_id = model.config.eos_token_id, max_length=50)
test_output3 = tokenizer.batch_decode(outputs3, skip_special_tokens=True)
print(test_output3)

['creat as costumes in 9. There is a lot of change around tastekey and hurry.', 'They are around Legion as a lot of people around them. They are also around the club.', 'The football June won Re by us aroundTOR as well as PetroleumK League. They left Walluau arm by them around TOR. They are going to meet on Tuesday.', "There is a new tacticnum in January. There is a lot of sports byann disorders by the group around thratt as well as some of them around Dj as well. There's a lot going on in January and there's a", 'There is a lot of people around. There is a group of people who are not in the group. There are also some people from the group who are in it.', "Airports as costumesside in January 30 look limited. They are around fever Airways as a result of modernization. They're around the number 7 Player 8 Ballu Issue for left Korea.", 'Ful function family is around Primary. They are around Primary as well.', 'The cur as left Koreael pipes marking for reversible Cindy. The cur is left Ko

In [18]:
#Top p sampling
outputs4 = model.generate(input_ids, top_p=0.8, do_sample=True, pad_token_id = model.config.eos_token_id, max_length=50)
test_output4 = tokenizer.batch_decode(outputs4, skip_special_tokens=True)
print(test_output4)

["There is a lot of new costumes in the new season. There's a lot around tastekey and hurry. There is also a lot going on around them.", 'There are 40 people around Legion. They are all from the same family.', 'The football June won Re by us aroundTOR and PetroleumK League left Walluau arm by them around TOR as well as 6 countries surrendered to them.', 'There is a tacticnum in January. There is a new tacticnum.', "There is a lot of people around March. There is a group of people who don't agree with them.", "Airports as costumesside in January 30 look limited. They are around fever Airways as well as a number of countries. They're around the number 7 Player 8 ballu Issue for left Korea.", 'Ful function family is around Primary. They are around Primary as well.', 'They are celebrating their anniversary. They are celebrating 15 years of independence. They will celebrate their anniversary on January 15.', 'The family 7 Program for Star is going to be in Jersey first.', 'Therean is bringi

In [23]:
!pip3 install evaluate

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting evaluate
  Downloading evaluate-0.4.0-py3-none-any.whl (81 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m81.4/81.4 KB[0m [31m9.2 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: evaluate
Successfully installed evaluate-0.4.0


In [25]:
!pip3 install rouge_score

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting rouge_score
  Downloading rouge_score-0.1.2.tar.gz (17 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: rouge_score
  Building wheel for rouge_score (setup.py) ... [?25l[?25hdone
  Created wheel for rouge_score: filename=rouge_score-0.1.2-py3-none-any.whl size=24954 sha256=0615bb9cc8fc37c3a269bcb4dffc70be0d331b2508a9d27dee3d9daf7a4ea78f
  Stored in directory: /root/.cache/pip/wheels/9b/3d/39/09558097d3119ca0a4d462df68f22c6f3c1b345ac63a09b86e
Successfully built rouge_score
Installing collected packages: rouge_score
Successfully installed rouge_score-0.1.2


In [37]:
#content overlap metrics
import evaluate
#ROUGE, or Recall-Oriented Understudy for Gisting Evaluation, is a set of metrics and a software package used for evaluating automatic summarization and machine translation software in natural language processing. The metrics compare an automatically produced summary or translation against a reference or a set of references (human-produced) summary or translation.

def get_rogue(predictions, references):
    rouge = evaluate.load('rouge')
    results = rouge.compute(predictions=predictions, references=references, rouge_types=['rougeL'], use_aggregator=False)
    print(results)
    return results

print("ROGUE scores for greedy, beam, top-k, top-p respectively: ")
rougeL1 = get_rogue(test_output1, data_test['summary'])
rougeL2 = get_rogue(test_output2, data_test['summary'])
rougeL3 = get_rogue(test_output3, data_test['summary'])
rougeL4 = get_rogue(test_output4, data_test['summary'])


ROGUE scores for greedy, beam, top-k, top-p respectively: 
{'rougeL': [0.0, 0.06666666666666667, 0.07547169811320754, 0.0, 0.11267605633802816, 0.0909090909090909, 0.04761904761904761, 0.0425531914893617, 0.1132075471698113, 0.05263157894736842, 0.11538461538461539, 0.05128205128205128, 0.13636363636363635, 0.07407407407407408, 0.05128205128205128, 0.047619047619047616, 0.0, 0.14634146341463414, 0.0, 0.05405405405405406, 0.1, 0.0, 0.12244897959183673, 0.0, 0.1, 0.10526315789473684, 0.06666666666666667, 0.13333333333333333, 0.03773584905660377, 0.1276595744680851, 0.12903225806451615, 0.0, 0.0, 0.16666666666666669, 0.03571428571428571, 0.16666666666666666, 0.0, 0.0, 0.125, 0.0, 0.05, 0.11764705882352941, 0.10526315789473685, 0.0, 0.0, 0.0, 0.057971014492753624, 0.13793103448275862, 0.0, 0.06666666666666667]}
{'rougeL': [0.0, 0.06666666666666667, 0.06060606060606061, 0.0, 0.08450704225352113, 0.02666666666666667, 0.04761904761904761, 0.09302325581395349, 0.11764705882352941, 0.0476190476

In [29]:
!pip3 install bert_score

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting bert_score
  Downloading bert_score-0.3.13-py3-none-any.whl (61 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m61.1/61.1 KB[0m [31m7.6 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: bert_score
Successfully installed bert_score-0.3.13


In [31]:
#BERTScore leverages the pre-trained contextual embeddings from BERT and matches words in candidate and reference sentences by cosine similarity. It has been shown to correlate with human judgment on sentence-level and system-level evaluation. Moreover, BERTScore computes precision, recall, and F1 measure, which can be useful for evaluating different language generation tasks.
from evaluate import load

bertscore = load("bertscore")

results1 = bertscore.compute(predictions=test_output1, references=data_test['summary'], lang="en")
results2 = bertscore.compute(predictions=test_output2, references=data_test['summary'], lang="en")
results3 = bertscore.compute(predictions=test_output3, references=data_test['summary'], lang="en")
results4 = bertscore.compute(predictions=test_output4, references=data_test['summary'], lang="en")

print(results1)
print(results2)
print(results3)
print(results4)

{'precision': [0.8512061238288879, 0.8538724780082703, 0.8141111135482788, 0.83831787109375, 0.8623378276824951, 0.8118817806243896, 0.8599495887756348, 0.8231745958328247, 0.8285024166107178, 0.8392717242240906, 0.84035325050354, 0.8663447499275208, 0.8512718677520752, 0.8545701503753662, 0.8322279453277588, 0.8223929405212402, 0.8287087678909302, 0.8674830198287964, 0.8342915773391724, 0.8173035383224487, 0.8356386423110962, 0.7968676686286926, 0.8470924496650696, 0.8017774820327759, 0.8517205119132996, 0.8537673354148865, 0.8411814570426941, 0.8161758184432983, 0.8336976766586304, 0.8807542324066162, 0.8399636149406433, 0.8183467984199524, 0.871692955493927, 0.871242880821228, 0.8408223390579224, 0.8412129878997803, 0.8152247667312622, 0.8335275650024414, 0.8668780326843262, 0.7972748279571533, 0.8352828025817871, 0.8591717481613159, 0.8599562644958496, 0.855533242225647, 0.8236486911773682, 0.834702730178833, 0.8140403032302856, 0.8708922863006592, 0.8192157745361328, 0.80088633298

In [38]:
#CSV generate
import csv

rows = zip(data_test['dialogue'],
           test_output1, rougeL1['rougeL'], results1['f1'],
           test_output2, rougeL2['rougeL'], results2['f1'],
           test_output3, rougeL3['rougeL'], results3['f1'],
           test_output4, rougeL4['rougeL'], results4['f1'],)

with open('/content/drive/Shareddrives/IronCodeBenders/HW3/DecodedSamples.csv', "w") as f:
    writer = csv.writer(f)
    for row in rows:
        writer.writerow(row)