In [1]:
from transformers import (
    AutoTokenizer,
    AutoModelForCausalLM,
    LogitsProcessorList,
    MinLengthLogitsProcessor,
    StoppingCriteriaList,
    MaxLengthCriteria,
    AutoModelForSeq2SeqLM,
)
import numpy as np

tokenizer = AutoTokenizer.from_pretrained("gpt2")
model = AutoModelForCausalLM.from_pretrained("gpt2", pad_token_id=tokenizer.eos_token_id)

# prompt = "Today I believe we can finally"
prompt = "It might be possible to"
input_ids = tokenizer(prompt, return_tensors="pt").input_ids

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
#Greedy Search TASK 1

outputs = model.generate(input_ids, num_beams=1, do_sample=False, max_length=30,return_dict_in_generate=True, output_scores=True)
transition_scores = model.compute_transition_scores(outputs.sequences, outputs.scores, normalize_logits=True)
print(tokenizer.batch_decode(outputs[0], skip_special_tokens=True))

input_length = 1 if model.config.is_encoder_decoder else input_ids.shape[1]
generated_tokens = outputs.sequences[:, input_length:]

perplexity = 0
likelihood = 0

for tok, score in zip(generated_tokens[0], transition_scores[0]):
    # | token | token string | logits | probability
    print(f"| {tok:5d} | {tokenizer.decode(tok):8s} | {score.detach().numpy():.3f} | {np.exp(score.detach().numpy()):.2%}")
    likelihood += np.log(-1 * score.detach().numpy())
    perplexity += np.log(np.exp(score.detach().numpy()))

print(f"Length of the output: {generated_tokens.shape[1]}")
print(f"Perplexity: {np.exp((-1/ generated_tokens.shape[1]) * perplexity)}")
print(f"likelihood: {likelihood}")



["It might be possible to get a better understanding of the nature of the problem, but it's not clear how to do that.\n\nThe problem"]
|   651 |  get     | -3.341 | 3.54%
|   257 |  a       | -1.936 | 14.43%
|  1365 |  better  | -3.269 | 3.80%
|  4547 |  understanding | -1.486 | 22.63%
|   286 |  of      | -0.151 | 86.03%
|   262 |  the     | -1.247 | 28.73%
|  3450 |  nature  | -4.258 | 1.41%
|   286 |  of      | -0.083 | 92.02%
|   262 |  the     | -1.289 | 27.57%
|  1917 |  problem | -4.050 | 1.74%
|    11 | ,        | -1.583 | 20.54%
|   475 |  but     | -0.740 | 47.72%
|   340 |  it      | -1.707 | 18.14%
|   338 | 's       | -1.193 | 30.33%
|   407 |  not     | -1.242 | 28.88%
|  1598 |  clear   | -2.580 | 7.58%
|   703 |  how     | -1.511 | 22.07%
|   284 |  to      | -1.713 | 18.03%
|   466 |  do      | -1.116 | 32.77%
|   326 |  that    | -1.147 | 31.77%
|    13 | .        | -0.663 | 51.54%
|   198 | 
        | -1.139 | 32.01%
|   198 | 
        | -0.002 | 99.84%
|   464 | The

  beam_indices[beam_indices_mask] = 0
  beam_indices[beam_indices_mask] = 0


In [3]:
#Beam Search TASK 1

outputs = model.generate(
    input_ids,
    num_beams=3,
    early_stopping=True,
    max_length=30,
    return_dict_in_generate=True,
    output_scores=True,
)
transition_scores = model.compute_transition_scores(
    outputs.sequences, outputs.scores, outputs.beam_indices, normalize_logits=False
)
# If you sum the generated tokens' scores and apply the length penalty, you'll get the sequence scores.
# Tip: recomputing the scores is only guaranteed to match with `normalize_logits=False`. Depending on the
# use case, you might want to recompute it with `normalize_logits=True`.
output_length = input_length + np.sum(transition_scores.numpy() < 0, axis=1)
length_penalty = model.generation_config.length_penalty
reconstructed_scores = transition_scores.sum(axis=1) / (output_length**length_penalty)

print(tokenizer.batch_decode(outputs[0], skip_special_tokens=True))

# input_length = 1 if model.config.is_encoder_decoder else input_ids.shape[1]
generated_tokens = outputs.sequences[:, input_length:]

perplexity = 0
likelihood = 0

for tok, score in zip(generated_tokens[0], transition_scores[0]):
    # | token | token string | logits | probability
    print(f"| {tok:5d} | {tokenizer.decode(tok):8s} | {score.detach().numpy():.3f} | {np.exp(score.detach().numpy()):.2%}")
    likelihood += np.log(-1 * score.detach().numpy())
    perplexity += np.log(np.exp(score.detach().numpy()))

print(f"Length of the output: {generated_tokens.shape[1]}")
print(f"Perplexity: {np.exp((-1/ generated_tokens.shape[1]) * perplexity)}")
print(f"likelihood: {likelihood}")

["It might be possible to get a better understanding of how the system works, but it's not going to be easy.\n\nIn the meantime,"]
|   651 |  get     | -3.341 | 3.54%
|   257 |  a       | -1.936 | 14.43%
|  1365 |  better  | -3.269 | 3.80%
|  4547 |  understanding | -1.486 | 22.63%
|   286 |  of      | -0.151 | 86.02%
|   703 |  how     | -1.795 | 16.61%
|   262 |  the     | -1.791 | 16.67%
|  1080 |  system  | -4.110 | 1.64%
|  2499 |  works   | -0.338 | 71.32%
|    11 | ,        | -1.423 | 24.09%
|   475 |  but     | -0.623 | 53.65%
|   340 |  it      | -1.656 | 19.08%
|   338 | 's       | -0.925 | 39.67%
|   407 |  not     | -1.277 | 27.90%
|  1016 |  going   | -2.657 | 7.02%
|   284 |  to      | -0.006 | 99.38%
|   307 |  be      | -1.001 | 36.75%
|  2562 |  easy    | -1.028 | 35.76%
|    13 | .        | -0.485 | 61.58%
|   198 | 
        | -1.203 | 30.02%
|   198 | 
        | -0.001 | 99.91%
|   818 | In       | -3.445 | 3.19%
|   262 |  the     | -1.601 | 20.17%
| 14324 |  meanti

In [4]:
#Top-K Sampling TASK 1

outputs = model.generate(input_ids, do_sample=True, max_length=30, return_dict_in_generate=True, output_scores=True, top_k=50)

print(tokenizer.batch_decode(outputs[0], skip_special_tokens=True))

input_length = 1 if model.config.is_encoder_decoder else input_ids.shape[1]
generated_tokens = outputs.sequences[:, input_length:]

transition_scores = model.compute_transition_scores(outputs.sequences, outputs.scores, normalize_logits=True)

perplexity = 0
likelihood = 0

for tok, score in zip(generated_tokens[0], transition_scores[0]):
    # | token | token string | logits | probability
    print(f"| {tok:5d} | {tokenizer.decode(tok):8s} | {score.detach().numpy():.3f} | {np.exp(score.detach().numpy()):.2%}")
    likelihood += np.log(-1 * score.detach().numpy())
    perplexity += np.log(np.exp(score.detach().numpy()))

print(f"Length of the output: {generated_tokens.shape[1]}")
print(f"Perplexity: {np.exp((-1/ generated_tokens.shape[1]) * perplexity)}")
print(f"likelihood: {likelihood}")

["It might be possible to make a new game with these features but it would be very hard to do. You'd have to learn something new about coding"]
|   787 |  make    | -2.534 | 7.93%
|   257 |  a       | -1.169 | 31.07%
|   649 |  new     | -3.294 | 3.71%
|   983 |  game    | -2.451 | 8.62%
|   351 |  with    | -1.891 | 15.09%
|   777 |  these   | -3.688 | 2.50%
|  3033 |  features | -2.157 | 11.57%
|   475 |  but     | -3.300 | 3.69%
|   340 |  it      | -1.692 | 18.42%
|   561 |  would   | -1.570 | 20.81%
|   307 |  be      | -1.086 | 33.75%
|   845 |  very    | -1.930 | 14.51%
|  1327 |  hard    | -1.324 | 26.61%
|   284 |  to      | -0.510 | 60.08%
|   466 |  do      | -1.742 | 17.52%
|    13 | .        | -3.164 | 4.22%
|   921 |  You     | -4.072 | 1.70%
|  1549 | 'd       | -2.950 | 5.23%
|   423 |  have    | -0.649 | 52.24%
|   284 |  to      | -0.034 | 96.68%
|  2193 |  learn   | -4.167 | 1.55%
|  1223 |  something | -4.189 | 1.52%
|   649 |  new     | -0.601 | 54.84%
|   546 |  a

In [5]:
#Top-P Sampling TASK 1 
#Need to figure out a good value for top_p
#top_p = 4 gave good values but it's supposed to be bounded (0,1)

outputs = model.generate(input_ids, top_p = 0.92, top_k=0, do_sample=True, max_length=30, return_dict_in_generate=True, output_scores=True)
print(tokenizer.batch_decode(outputs[0], skip_special_tokens=True))

input_length = 1 if model.config.is_encoder_decoder else input_ids.shape[1]
generated_tokens = outputs.sequences[:, input_length:]

transition_scores = model.compute_transition_scores(outputs.sequences, outputs.scores, normalize_logits=True)

perplexity = 0
likelihood = 0

for tok, score in zip(generated_tokens[0], transition_scores[0]):
    # | token | token string | logits | probability
    print(f"| {tok:5d} | {tokenizer.decode(tok):8s} | {score.detach().numpy():.3f} | {np.exp(score.detach().numpy()):.2%}")
    likelihood += np.log(-1 * score.detach().numpy())
    perplexity += np.log(np.exp(score.detach().numpy()))

print(f"Length of the output: {generated_tokens.shape[1]}")
print(f"Perplexity: {np.exp((-1/ generated_tokens.shape[1]) * perplexity)}")
print(f"likelihood: {likelihood}")

['It might be possible to play rythm from here:\n\nThe man sat on the bench and gave the people what they asked for. This is']
|   711 |  play    | -5.174 | 0.57%
|   374 |  r       | -8.634 | 0.02%
| 34853 | ythm     | -3.827 | 2.18%
|   422 |  from    | -5.755 | 0.32%
|   994 |  here    | -4.302 | 1.35%
|    25 | :        | -4.675 | 0.93%
|   198 | 
        | -0.955 | 38.50%
|   198 | 
        | 0.000 | 100.00%
|   464 | The      | -3.849 | 2.13%
|   582 |  man     | -7.734 | 0.04%
|  3332 |  sat     | -7.012 | 0.09%
|   319 |  on      | -1.629 | 19.62%
|   262 |  the     | -0.707 | 49.29%
|  7624 |  bench   | -2.561 | 7.72%
|   290 |  and     | -2.089 | 12.38%
|  2921 |  gave    | -4.772 | 0.85%
|   262 |  the     | -2.039 | 13.01%
|   661 |  people  | -6.264 | 0.19%
|   644 |  what    | -3.606 | 2.72%
|   484 |  they    | -0.325 | 72.25%
|  1965 |  asked   | -2.079 | 12.51%
|   329 |  for     | -0.614 | 54.12%
|    13 | .        | -0.703 | 49.50%
|   770 |  This    | -4.817 | 0.81%

  likelihood += np.log(-1 * score.detach().numpy())


**************************************************
Task 2 starts here

In [6]:
#TASK 2
#Load the dataset

from datasets import load_dataset
import torch
dataset = load_dataset("cnn_dailymail", "3.0.0")

max_length = 100

Found cached dataset cnn_dailymail (C:/Users/zebzi/.cache/huggingface/datasets/cnn_dailymail/3.0.0/3.0.0/1b3c71476f6d152c31c1730e83ccb08bcf23e348233f4fcc11e182248e6bf7de)
100%|██████████| 3/3 [00:00<00:00,  9.37it/s]


In [7]:
#TASK 2 (switched from a downstream BERT because things were failing)
tokenizer2 = AutoTokenizer.from_pretrained("facebook/bart-large-cnn")
model2 = AutoModelForSeq2SeqLM.from_pretrained("facebook/bart-large-cnn")#, pad_token_id=tokenizer.eos_token_id)

In [8]:
# input_ids = dataset["test"][:50]
tokenizerInputs =  []
inputGroundTruths = []
for i in range(50):
    tokenizerInputs.append(dataset["test"][i]["article"])
    inputGroundTruths.append(dataset["test"][i]["highlights"])
print(tokenizerInputs[0])

encoder_input_ids = torch.LongTensor()

for i in range(50):
    encoder_input_ids = torch.cat((encoder_input_ids, tokenizer2(tokenizerInputs[i], return_tensors="pt", padding='max_length', truncation=True).input_ids))
print(tokenizer2.decode(encoder_input_ids[0]))

(CNN)The Palestinian Authority officially became the 123rd member of the International Criminal Court on Wednesday, a step that gives the court jurisdiction over alleged crimes in Palestinian territories. The formal accession was marked with a ceremony at The Hague, in the Netherlands, where the court is based. The Palestinians signed the ICC's founding Rome Statute in January, when they also accepted its jurisdiction over alleged crimes committed "in the occupied Palestinian territory, including East Jerusalem, since June 13, 2014." Later that month, the ICC opened a preliminary examination into the situation in Palestinian territories, paving the way for possible war crimes investigations against Israelis. As members of the court, Palestinians may be subject to counter-charges as well. Israel and the United States, neither of which is an ICC member, opposed the Palestinians' efforts to join the body. But Palestinian Foreign Minister Riad al-Malki, speaking at Wednesday's ceremony, sa

In [9]:
#Task 2 
outputs2 = []
#Greedy Search
outputs2.append(model2.generate(encoder_input_ids, num_beams=1, do_sample=False, max_length=max_length, no_repeat_ngram_size=2))

In [10]:
#Beam Search
outputs2.append(model2.generate(encoder_input_ids, num_beams=3, early_stopping=True, max_length=max_length, no_repeat_ngram_size=2))

In [11]:
#Top-K Sampling
outputs2.append(model2.generate(encoder_input_ids, do_sample=True, top_k=30, max_length=max_length, no_repeat_ngram_size=2))

#max_length = 50 with top_k>=40 gave the "index out of range in self" error

In [12]:
#Top-P Sampling
outputs2.append(model2.generate(encoder_input_ids, do_sample=True, top_p=0.8, top_k=0, max_length=max_length, no_repeat_ngram_size=2)) 

#max_length = 30 with top_p>=0.8 gave the "index out of range in self" error
#max_length = 50 with top_p>=0.4 gave the "index out of range in self" error

In [13]:
print(outputs[0][0])
print(tokenizer2.decode(outputs2[0][0], skip_special_tokens=True))
print(tokenizer2.decode(outputs2[1][0], skip_special_tokens=True))
print(tokenizer2.decode(outputs2[2][0], skip_special_tokens=True))
print(tokenizer2.decode(outputs2[3][0], skip_special_tokens=True))

tensor([ 1026,  1244,   307,  1744,   284,   711,   374, 34853,   422,   994,
           25,   198,   198,   464,   582,  3332,   319,   262,  7624,   290,
         2921,   262,   661,   644,   484,  1965,   329,    13,   770,   318])
The Palestinian Authority becomes the 123rd member of the International Criminal Court. The move gives the court jurisdiction over alleged crimes in Palestinian territories. Israel and the United States opposed the Palestinians' efforts to join the body. But Palestinian Foreign Minister Riad al-Malki says it is a move toward greater justice.
Palestinian Authority becomes 123rd member of the International Criminal Court. The move gives the court jurisdiction over alleged crimes in Palestinian territories. Israel and the United States opposed the Palestinians' efforts to join the body. But Palestinian Foreign Minister Riad al-Malki said it was a move toward greater justice.
The Palestinian Authority becomes the 123rd member of the International Criminal Cou

In [14]:
from evaluate import load
meteorScore = load("meteor")
bertScore = load("bertscore")
rougeScore = load("rouge")
perplexityScore = load("perplexity", module_type="metric")


[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\zebzi\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\zebzi\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package omw-1.4 to
[nltk_data]     C:\Users\zebzi\AppData\Roaming\nltk_data...
[nltk_data]   Package omw-1.4 is already up-to-date!


In [24]:
references = []
predictions = []
resultsMeteor = []
resultsBert = []
resultsRouge = []
resultsPerplexity = []
groundPerplexity = []

# references.append([inputGroundTruths[0], inputGroundTruths[0], inputGroundTruths[0], inputGroundTruths[0]])
# predictions.append([tokenizer2.decode(outputs2[0][0], skip_special_tokens=True), tokenizer2.decode(outputs2[1][0], skip_special_tokens=True), tokenizer2.decode(outputs2[2][0], skip_special_tokens=True), tokenizer2.decode(outputs2[3][0], skip_special_tokens=True)])
# results.append([roguescore.compute(predictions=predictions[0], references=references[0])])
# results.append([roguescore.compute(predictions=[tokenizer2.decode(outputs2[0][0], skip_special_tokens=True)], references=[inputGroundTruths[0]])])
# results.append([roguescore.compute(predictions=["hey"], references=[["hey"]])])

# print(predictions[0])
# print(references[0])
# print(results)
# print(outputs2)
with open("generatedText12.txt", "w") as f:
    for i in range(len(outputs2[0])):
        references.append([inputGroundTruths[i], inputGroundTruths[i], inputGroundTruths[i], inputGroundTruths[i]])
        predictions.append([tokenizer2.decode(outputs2[0][i], skip_special_tokens=True), tokenizer2.decode(outputs2[1][i], skip_special_tokens=True), tokenizer2.decode(outputs2[2][i], skip_special_tokens=True), tokenizer2.decode(outputs2[3][i], skip_special_tokens=True)])
        resultsBert.append([bertScore.compute(predictions=predictions[i], references=references[i], model_type="t5-base")])
        resultsRouge.append([rougeScore.compute(predictions=predictions[i], references=references[i], use_aggregator=False)])
        resultsPerplexity.append([perplexityScore.compute(predictions=predictions[i], model_id="facebook/bart-large-cnn")])
        groundPerplexity.append([perplexityScore.compute(predictions=[references[i][0]], model_id="facebook/bart-large-cnn")])
        # print(predictions[i][0])
        # print(references[i][0])
        temp = []
        for j in range(4):
            temp.append([meteorScore.compute(predictions=[predictions[i][j]], references=[references[i][j]])])
        resultsMeteor.append(temp)

        f.write(f"Ground Truth: {inputGroundTruths[i]} \n")
        f.write(f"Greedy Search: {predictions[i][0]} \n")
        f.write(f"Beam Search: {predictions[i][1]} \n")
        f.write(f"Top-K Sampling: {predictions[i][2]} \n")
        f.write(f"Top-P Sampling: {predictions[i][3]} \n")
        f.write(f"Meteor Score: {resultsMeteor[i]} \n")
        f.write(f"Bert Score: {resultsBert[i]} \n")
        f.write(f"Rouge Score: {resultsRouge[i]} \n")
        f.write(f"Perplexty: {resultsPerplexity[i]} \n")
        f.write(f"Ground Perplexity {groundPerplexity[i]} \n")
        f.write("\n \n")

#generatedText is with no capped max_length so it defaulted to 20 I think top_p=0.92 and top_k=50
#generatedText2 is with max_length = 30
#generatedText3 is also max_length = 30 but top_p=0.7 ~ 6mins of runtime
#generatedText4 is at max_length = 40 with top_p=0.7 ~5mins of runtime thanks to CPU acceleration
#generatedText5 is at max_length = 50 with top_p=0.3 and top_k=25 ~Xmins of runtime but model_max_length = 1024

Some weights of the model checkpoint at facebook/bart-large-cnn were not used when initializing BartForCausalLM: ['model.encoder.layers.4.fc1.bias', 'model.encoder.layers.2.self_attn.v_proj.weight', 'model.encoder.layers.7.fc2.bias', 'model.encoder.layers.1.self_attn_layer_norm.bias', 'model.encoder.layers.6.self_attn.q_proj.bias', 'model.encoder.layers.0.fc1.weight', 'model.encoder.layers.2.self_attn.out_proj.weight', 'model.encoder.layers.0.final_layer_norm.weight', 'model.encoder.layers.10.self_attn.v_proj.bias', 'model.encoder.layers.3.self_attn.k_proj.bias', 'model.encoder.layers.0.self_attn.k_proj.weight', 'model.encoder.layers.0.self_attn.out_proj.weight', 'model.encoder.layers.4.self_attn.q_proj.bias', 'model.encoder.layers.8.final_layer_norm.bias', 'model.encoder.layers.7.self_attn.v_proj.weight', 'model.encoder.layers.3.self_attn.v_proj.weight', 'model.encoder.layers.5.final_layer_norm.weight', 'model.encoder.layers.8.fc1.bias', 'model.encoder.layers.3.final_layer_norm.weight

In [54]:
import csv

with open("outputs4.csv", 'w', newline='') as file:
    writer = csv.writer(file)
    writer.writerow(["Ground Truth", "Greedy Search", "Beam Search", "Top-K Sampling", "Top-P Sampling", " ", " ", "Meteor Score", "BERT Precision Score", "Rouge1 Score", "Perplexity of Predicitons", "Perplexity of Ground Truth"])

    for i in range(len(predictions)):
        writer.writerow([inputGroundTruths[i], predictions[i][0], predictions[i][1], predictions[i][2], predictions[i][3], " ", " ", "Meteor Greedy: " + str(resultsMeteor[i][0][0]["meteor"]) + "\n Meteor Beam: " + str(resultsMeteor[i][1][0]["meteor"]) + "\n Meteor Top-K: " + str(resultsMeteor[i][2][0]["meteor"]) + "\n Meteor Top-P: " + str(resultsMeteor[i][3][0]["meteor"]), "BERT Greedy Precision " + str(resultsBert[i][0]["precision"][0]) + "\nBERT Beam Precision " + str(resultsBert[i][0]["precision"][1]) + "\nBERT Top-K Precision " + str(resultsBert[i][0]["precision"][2]) + "\nBERT Top-P Precision " + str(resultsBert[i][0]["precision"][3]),"Rouge1 Greedy: " + str(resultsRouge[i][0]["rouge1"][0]) + "\nRouge1 Beam: " + str(resultsRouge[i][0]["rouge1"][1]) + "\nRouge1 Top-K: " + str(resultsRouge[i][0]["rouge1"][2]) + "\nRouge1 Top-P: " + str(resultsRouge[i][0]["rouge1"][3]), "Greedy Preplexity: " + str(resultsPerplexity[i][0]["perplexities"][0]) + "\nBeam Preplexity: " + str(resultsPerplexity[i][0]["perplexities"][1]) + "\nTop-K Preplexity: " + str(resultsPerplexity[i][0]["perplexities"][2]) + "\nTop-P Preplexity: " + str(resultsPerplexity[i][0]["perplexities"][3]), "Ground Truth Perplexity: " + str(groundPerplexity[i][0]["perplexities"][0])])

In [None]:
for i in range(10):
    print(resultsMeteor[i])
    print(resultsBert[i])