In [23]:
!pip3 install transformers

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [24]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, LogitsProcessorList, MinLengthLogitsProcessor

#importing the pretrained large language Model
tokenizer = AutoTokenizer.from_pretrained("gpt2")
model = AutoModelForCausalLM.from_pretrained("gpt2")

In [25]:
#Setting the prompt
prompt = "Today I believe we can finally"
input_ids = tokenizer(prompt, return_tensors="pt").input_ids

In [26]:
# Calculate perplexity for the generated sequence
def get_perplexity(generated_sequence):
    input_ids_generated = tokenizer(generated_sequence, return_tensors='pt').input_ids
    logits = model(input_ids_generated).logits
    loss_fn = torch.nn.CrossEntropyLoss()
    loss = loss_fn(logits.view(-1, logits.size(-1)), input_ids_generated.view(-1))
    perplexity = torch.exp(loss)
    print("perplexity: ", perplexity.item())


# Calculate likelihood for the generated sequence
def get_likelihoold(generated_sequence):
    input_ids_generated = tokenizer(generated_sequence, return_tensors='pt').input_ids
    logits = model(input_ids_generated).logits
    log_probs = torch.nn.functional.log_softmax(logits, dim=-1)
    total_log_prob = 0
    for i in range(len(input_ids_generated[0]) - 1):
        total_log_prob += log_probs[0, i, input_ids_generated[0, i+1]]
    likelihood = torch.exp(total_log_prob)
    print("likelihood: ",likelihood.item())

In [27]:
#generate up to 30 tokens
#using default decoder and setting the parameters
#Greedy Search
torch.manual_seed(1)
outputs1 = model.generate(input_ids, do_sample=False, pad_token_id = model.config.eos_token_id, max_length=30)
generated_sequence1 = tokenizer.batch_decode(outputs1, skip_special_tokens=True)
print("Using Greedy Search Decoding:")
print(generated_sequence1[0])

get_perplexity(generated_sequence1[0])
get_likelihoold(generated_sequence1[0])

Using Greedy Search Decoding:
Today I believe we can finally get to the point where we can make a difference in the lives of the people of the United States of America.

perplexity:  14537.9736328125
likelihood:  9.171019205060845e-24


In [28]:
#Beam Search
torch.manual_seed(1)
outputs2 = model.generate(input_ids, num_beams=3, do_sample=False, pad_token_id = model.config.eos_token_id, max_length=30)
generated_sequence2 = tokenizer.batch_decode(outputs2, skip_special_tokens=True)
print("Using Beam Search Decoding:")
print(generated_sequence2[0])

get_perplexity(generated_sequence2[0])
get_likelihoold(generated_sequence2[0])

Using Beam Search Decoding:
Today I believe we can finally get to the point where we can make a difference in the lives of all of our children.

I believe that
perplexity:  10024.8564453125
likelihood:  2.1889369590488264e-24


In [29]:
#Top k sampling
torch.manual_seed(1)
outputs3 = model.generate(input_ids, top_k=4, do_sample=True, pad_token_id = model.config.eos_token_id, max_length=30)
generated_sequence3 = tokenizer.batch_decode(outputs3, skip_special_tokens=True)
print("Using top-k sample Decoding:")
print(generated_sequence3[0])

get_perplexity(generated_sequence3[0])
get_likelihoold(generated_sequence3[0])

Using top-k sample Decoding:
Today I believe we can finally get a fair shake with the current state of affairs, with the new leadership and with the new direction of this country,
perplexity:  8688.6572265625
likelihood:  1.3506946665613972e-34


In [30]:
#Top p sampling
torch.manual_seed(1)
outputs4 = model.generate(input_ids, top_p=0.8, do_sample=True, pad_token_id = model.config.eos_token_id, max_length=30)
generated_sequence4 = tokenizer.batch_decode(outputs4, skip_special_tokens=True)
print("Using top-p Decoding:")
print(generated_sequence4[0])

get_perplexity(generated_sequence4[0])
get_likelihoold(generated_sequence4[0])

Using top-p Decoding:
Today I believe we can finally get a fair shake of these political issues. I think it's really important that we move forward in a bipartisan way,
perplexity:  10947.0478515625
likelihood:  6.463449259511617e-34


In [31]:
!pip3 install datasets

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [32]:
!pip3 install py7zr

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [33]:
#Task Dialogue Summerization
#importing dataset
import datasets
data = datasets.load_dataset('samsum')



  0%|          | 0/3 [00:00<?, ?it/s]

In [34]:
len(data['test'])

819

In [35]:
#Set the number of samples from test set for task
data_test = data['test'][:50]
data_test['dialogue']

["Hannah: Hey, do you have Betty's number?\nAmanda: Lemme check\nHannah: <file_gif>\nAmanda: Sorry, can't find it.\nAmanda: Ask Larry\nAmanda: He called her last time we were at the park together\nHannah: I don't know him well\nHannah: <file_gif>\nAmanda: Don't be shy, he's very nice\nHannah: If you say so..\nHannah: I'd rather you texted him\nAmanda: Just text him 🙂\nHannah: Urgh.. Alright\nHannah: Bye\nAmanda: Bye bye",
 "Eric: MACHINE!\r\nRob: That's so gr8!\r\nEric: I know! And shows how Americans see Russian ;)\r\nRob: And it's really funny!\r\nEric: I know! I especially like the train part!\r\nRob: Hahaha! No one talks to the machine like that!\r\nEric: Is this his only stand-up?\r\nRob: Idk. I'll check.\r\nEric: Sure.\r\nRob: Turns out no! There are some of his stand-ups on youtube.\r\nEric: Gr8! I'll watch them now!\r\nRob: Me too!\r\nEric: MACHINE!\r\nRob: MACHINE!\r\nEric: TTYL?\r\nRob: Sure :)",
 "Lenny: Babe, can you help me with something?\r\nBob: Sure, what's up?\r\nLenny

In [36]:
from transformers import AutoModelForSeq2SeqLM

model = AutoModelForSeq2SeqLM.from_pretrained("lidiya/bart-base-samsum")

tokenizer = AutoTokenizer.from_pretrained("lidiya/bart-base-samsum")

input_ids = tokenizer(data_test['dialogue'], return_tensors="pt", padding=True, truncation=True).input_ids


In [37]:
#Dialogue Summerization
#Greedy search
outputs1 = model.generate(input_ids, do_sample=False, pad_token_id = model.config.eos_token_id, max_length=60)
test_output1 = tokenizer.batch_decode(outputs1, skip_special_tokens=True)
print(test_output1)

["Hannah doesn't know Betty's number. She texted Larry last time they were at the park together.", 'Rob and Eric like MACHINE. Eric will watch them now. ', 'Bob will help Lenny with a pair of purple trousers.', 'Emma will be home soon. Will will pick her up when he gets home.', 'Ollie and Jane are going to meet on Friday at 6 pm after their courses. Ollie will bring tea and pastries.', 'Benjamin and Elliot are meeting at the entrance to the conference hall at 2 pm to discuss their subject of research. Hilary will take the keys and take a nap.', 'Payton is looking for good clothes to buy from. He likes browsing, looking in the mirror and seeing how he looks. He usually buys clothes from 2 or 3 of them.', "Rita is falling asleep at work. She doesn't want to work because she's tired. ", 'Beatrice is in town, shopping. They have nice scarfs in the shop next to the church. She will get a scarf.', 'Eric is coming to the wedding with his brother. He has a lot to do at home, plus he has to tak

In [38]:
#Beam Search
outputs2 = model.generate(input_ids, num_beams=3, do_sample=False, pad_token_id = model.config.eos_token_id, max_length=50)
test_output2 = tokenizer.batch_decode(outputs2, skip_special_tokens=True)
print(test_output2)

["Hannah is looking for Betty's number. ", 'Rob and Eric are watching MACHINE. Eric likes the train and Rob likes the stand-up.', 'Bob will help Lenny with a pair of purple trousers. Lenny will buy the first pair and the third pair.', 'Emma will be home soon. Will will pick her up when he gets home.', 'Ollie is in Warsaw. Jane is going to Morocco on Friday. Ollie will bring tea and pastries.', 'Benjamin and Elliot are meeting at the entrance to the conference hall at 2 pm to discuss their subject of research. Hilary will take the keys and take a nap.', 'Payton is looking for good clothes to buy from. He likes browsing, looking in the mirror and seeing how he looks. He usually buys clothes from 2 or 3 of them.', "Rita is tired at work. She is nodding off at her keyboard hoping that the boss doesn't notice. ", 'Beatrice is in town, shopping. They have nice scarfs in the shop next to the church. She will get a scarf.', 'Eric is coming to the wedding with his brother. He has a lot to do at

In [39]:
#Top k sampling
outputs3 = model.generate(input_ids, top_k=4, do_sample=True, pad_token_id = model.config.eos_token_id, max_length=50)
test_output3 = tokenizer.batch_decode(outputs3, skip_special_tokens=True)
print(test_output3)

["Hannah doesn't know Betty's number. She texted Larry last time they were at the park together.", 'Rob and Eric are watching MACHINE. Eric likes the train and Rob likes the stand-up.', 'Bob will help Lenny with a pair of purple trousers. Lenny will buy the first pair and the third pair.', 'Emma will be home soon and will tell Will when she gets home.', 'Jane is in Warsaw. Ollie will meet her on Friday at 6 pm after her courses.', 'Benjamin and Elliot are meeting at the entrance to the conference hall at 2 pm to discuss their subject of research. Hilary will take the keys and take a nap.', 'Payton likes shopping. He usually buys clothes from 2 or 3 of the sites. He prefers browsing, looking in the mirror and seeing how he looks.', "Rita is tired. She is nodding off at her keyboard hoping that the boss doesn't notice. ", 'Beatrice is in town shopping. They have nice scarfs in the shop next to the church. She will get a scarf.', 'Eric is coming to the wedding with his brother. ', "Wanda 

In [40]:
#Top p sampling
outputs4 = model.generate(input_ids, top_p=0.8, do_sample=True, pad_token_id = model.config.eos_token_id, max_length=50)
test_output4 = tokenizer.batch_decode(outputs4, skip_special_tokens=True)
print(test_output4)

["Hannah doesn't have Betty's number. She texted Larry last time they were at the park together.", 'Rob and Eric are going to watch MACHINE. ', 'Bob will help Lenny with a pair of purple trousers. Lenny will buy the first pair and the third pair.', 'Emma will be home soon and will tell Will when she gets home.', 'Jane is back in Warsaw. Ollie and Jane will have lunch on Friday at 6 pm after their courses.', 'Benjamin and Elliot are meeting at the entrance to the conference hall at 2 pm to discuss their subject of research. Hilary will take the keys and go take a nap.', 'Payton likes shopping. He prefers browsing, looking in the mirror and seeing how he looks.', "Rita is tired. She is looking at the clock and there's still 4 hours of this drudgery to go. ", "Beatrice is shopping. They have nice scarfs in the shop next to the church. She doesn't need a scarf.", 'Eric is coming to the wedding with his brother. ', "Wanda and Gina will make a party on Friday. Gina will take her father's car

In [41]:
!pip3 install evaluate

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [42]:
!pip3 install rouge_score

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [43]:
#content overlap metrics
import evaluate
#ROUGE, or Recall-Oriented Understudy for Gisting Evaluation, is a set of metrics and a software package used for evaluating automatic summarization and machine translation software in natural language processing. The metrics compare an automatically produced summary or translation against a reference or a set of references (human-produced) summary or translation.

def get_rogue(predictions, references):
    rouge = evaluate.load('rouge')
    results = rouge.compute(predictions=predictions, references=references, rouge_types=['rougeL'], use_aggregator=False)
    print(results)
    return results

print("ROGUE scores for greedy, beam, top-k, top-p respectively: ")
rougeL1 = get_rogue(test_output1, data_test['summary'])
rougeL2 = get_rogue(test_output2, data_test['summary'])
rougeL3 = get_rogue(test_output3, data_test['summary'])
rougeL4 = get_rogue(test_output4, data_test['summary'])


ROGUE scores for greedy, beam, top-k, top-p respectively: 
{'rougeL': [0.35294117647058826, 0.1818181818181818, 0.20512820512820515, 0.56, 0.25, 0.3218390804597701, 0.26229508196721313, 0.20689655172413793, 0.24489795918367344, 0.1702127659574468, 0.5217391304347826, 0.409090909090909, 0.35897435897435903, 0.3076923076923077, 0.32142857142857145, 0.14814814814814817, 0.6666666666666666, 0.4444444444444445, 0.1694915254237288, 0.5, 0.2571428571428572, 0.25531914893617025, 0.10526315789473685, 0.33333333333333326, 0.21739130434782608, 0.2777777777777778, 0.17391304347826086, 0.2631578947368421, 0.28, 0.19999999999999998, 0.16666666666666669, 0.15000000000000002, 0.7999999999999999, 0.2, 0.23333333333333336, 0.19047619047619044, 0.2424242424242424, 0.3, 0.4285714285714285, 0.35999999999999993, 0.09090909090909091, 0.25, 0.3880597014925373, 0.5263157894736842, 0.34782608695652173, 0.5185185185185186, 0.16666666666666666, 0.5555555555555556, 0.27272727272727276, 0.15789473684210525]}
{'roug

In [44]:
!pip3 install bert_score

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [45]:
#BERTScore leverages the pre-trained contextual embeddings from BERT and matches words in candidate and reference sentences by cosine similarity. It has been shown to correlate with human judgment on sentence-level and system-level evaluation. Moreover, BERTScore computes precision, recall, and F1 measure, which can be useful for evaluating different language generation tasks.
from evaluate import load

bertscore = load("bertscore")

results1 = bertscore.compute(predictions=test_output1, references=data_test['summary'], lang="en")
results2 = bertscore.compute(predictions=test_output2, references=data_test['summary'], lang="en")
results3 = bertscore.compute(predictions=test_output3, references=data_test['summary'], lang="en")
results4 = bertscore.compute(predictions=test_output4, references=data_test['summary'], lang="en")

print(results1)
print(results2)
print(results3)
print(results4)

{'precision': [0.9006770849227905, 0.8934921622276306, 0.9034929275512695, 0.9231748580932617, 0.8815594911575317, 0.9246814250946045, 0.8971133828163147, 0.8945333957672119, 0.8994042873382568, 0.8926117420196533, 0.9400418996810913, 0.9473161697387695, 0.9361087083816528, 0.9107649326324463, 0.897466242313385, 0.8762146234512329, 0.925209105014801, 0.9262514710426331, 0.837440013885498, 0.9235422611236572, 0.8928099870681763, 0.9416254758834839, 0.8761715292930603, 0.9187290668487549, 0.9046262502670288, 0.9124165773391724, 0.8951793313026428, 0.9096864461898804, 0.9446636438369751, 0.889872133731842, 0.8787652254104614, 0.875282883644104, 0.9713918566703796, 0.9066333770751953, 0.9099509716033936, 0.8644928336143494, 0.8991104960441589, 0.909485936164856, 0.9012040495872498, 0.9133927822113037, 0.8647751212120056, 0.9101158380508423, 0.921950101852417, 0.8976958990097046, 0.9170227646827698, 0.9339193105697632, 0.8968753814697266, 0.9773547649383545, 0.9047852754592896, 0.8730415105

In [46]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [47]:
#CSV generate
import csv

rows = zip(data_test['dialogue'],
           test_output1, rougeL1['rougeL'], results1['f1'],
           test_output2, rougeL2['rougeL'], results2['f1'],
           test_output3, rougeL3['rougeL'], results3['f1'],
           test_output4, rougeL4['rougeL'], results4['f1'],)

with open('/content/drive/Shareddrives/IronCodeBenders/HW3/DecodedSamples.csv', "w") as f:
    writer = csv.writer(f)
    for row in rows:
        writer.writerow(row)