In [None]:
from transformers import (
    AutoTokenizer,
    AutoModelForCausalLM,
    LogitsProcessorList,
    MinLengthLogitsProcessor,
    StoppingCriteriaList,
    MaxLengthCriteria,
    AutoModelForSeq2SeqLM,
)
import numpy as np

tokenizer = AutoTokenizer.from_pretrained("gpt2")
model = AutoModelForCausalLM.from_pretrained("gpt2", pad_token_id=tokenizer.eos_token_id)

# prompt = "Today I believe we can finally"
prompt = "It might be possible to"
input_ids = tokenizer(prompt, return_tensors="pt").input_ids

In [None]:
#Greedy Search TASK 1

outputs = model.generate(input_ids, num_beams=1, do_sample=False, max_length=30,return_dict_in_generate=True, output_scores=True)
transition_scores = model.compute_transition_scores(outputs.sequences, outputs.scores, normalize_logits=True)
print(tokenizer.batch_decode(outputs[0], skip_special_tokens=True))

input_length = 1 if model.config.is_encoder_decoder else input_ids.shape[1]
generated_tokens = outputs.sequences[:, input_length:]

perplexity = 0
likelihood = 0

for tok, score in zip(generated_tokens[0], transition_scores[0]):
    # | token | token string | logits | probability
    print(f"| {tok:5d} | {tokenizer.decode(tok):8s} | {score.detach().numpy():.3f} | {np.exp(score.detach().numpy()):.2%}")
    likelihood += np.log(-1 * score.detach().numpy())
    perplexity += np.log(np.exp(score.detach().numpy()))

print(f"Length of the output: {generated_tokens.shape[1]}")
print(f"Perplexity: {np.exp((-1/ generated_tokens.shape[1]) * perplexity)}")
print(f"likelihood: {likelihood}")

In [None]:
#Beam Search TASK 1

outputs = model.generate(
    input_ids,
    num_beams=3,
    early_stopping=True,
    max_length=30,
    return_dict_in_generate=True,
    output_scores=True,
)
transition_scores = model.compute_transition_scores(
    outputs.sequences, outputs.scores, outputs.beam_indices, normalize_logits=False
)
# If you sum the generated tokens' scores and apply the length penalty, you'll get the sequence scores.
# Tip: recomputing the scores is only guaranteed to match with `normalize_logits=False`. Depending on the
# use case, you might want to recompute it with `normalize_logits=True`.
output_length = input_length + np.sum(transition_scores.numpy() < 0, axis=1)
length_penalty = model.generation_config.length_penalty
reconstructed_scores = transition_scores.sum(axis=1) / (output_length**length_penalty)

print(tokenizer.batch_decode(outputs[0], skip_special_tokens=True))

# input_length = 1 if model.config.is_encoder_decoder else input_ids.shape[1]
generated_tokens = outputs.sequences[:, input_length:]

perplexity = 0
likelihood = 0

for tok, score in zip(generated_tokens[0], transition_scores[0]):
    # | token | token string | logits | probability
    print(f"| {tok:5d} | {tokenizer.decode(tok):8s} | {score.detach().numpy():.3f} | {np.exp(score.detach().numpy()):.2%}")
    likelihood += np.log(-1 * score.detach().numpy())
    perplexity += np.log(np.exp(score.detach().numpy()))

print(f"Length of the output: {generated_tokens.shape[1]}")
print(f"Perplexity: {np.exp((-1/ generated_tokens.shape[1]) * perplexity)}")
print(f"likelihood: {likelihood}")

In [None]:
#Top-K Sampling TASK 1

outputs = model.generate(input_ids, do_sample=True, max_length=30, return_dict_in_generate=True, output_scores=True, top_k=50)

print(tokenizer.batch_decode(outputs[0], skip_special_tokens=True))

input_length = 1 if model.config.is_encoder_decoder else input_ids.shape[1]
generated_tokens = outputs.sequences[:, input_length:]

transition_scores = model.compute_transition_scores(outputs.sequences, outputs.scores, normalize_logits=True)

perplexity = 0
likelihood = 0

for tok, score in zip(generated_tokens[0], transition_scores[0]):
    # | token | token string | logits | probability
    print(f"| {tok:5d} | {tokenizer.decode(tok):8s} | {score.detach().numpy():.3f} | {np.exp(score.detach().numpy()):.2%}")
    likelihood += np.log(-1 * score.detach().numpy())
    perplexity += np.log(np.exp(score.detach().numpy()))

print(f"Length of the output: {generated_tokens.shape[1]}")
print(f"Perplexity: {np.exp((-1/ generated_tokens.shape[1]) * perplexity)}")
print(f"likelihood: {likelihood}")

In [None]:
#Top-P Sampling TASK 1 
#Need to figure out a good value for top_p
#top_p = 4 gave good values but it's supposed to be bounded (0,1)

outputs = model.generate(input_ids, top_p = 0.92, top_k=0, do_sample=True, max_length=30, return_dict_in_generate=True, output_scores=True)
print(tokenizer.batch_decode(outputs[0], skip_special_tokens=True))

input_length = 1 if model.config.is_encoder_decoder else input_ids.shape[1]
generated_tokens = outputs.sequences[:, input_length:]

transition_scores = model.compute_transition_scores(outputs.sequences, outputs.scores, normalize_logits=True)

perplexity = 0
likelihood = 0

for tok, score in zip(generated_tokens[0], transition_scores[0]):
    # | token | token string | logits | probability
    print(f"| {tok:5d} | {tokenizer.decode(tok):8s} | {score.detach().numpy():.3f} | {np.exp(score.detach().numpy()):.2%}")
    likelihood += np.log(-1 * score.detach().numpy())
    perplexity += np.log(np.exp(score.detach().numpy()))

print(f"Length of the output: {generated_tokens.shape[1]}")
print(f"Perplexity: {np.exp((-1/ generated_tokens.shape[1]) * perplexity)}")
print(f"likelihood: {likelihood}")

**************************************************
Task 2 starts here

In [None]:
#TASK 2
#Load the dataset

from datasets import load_dataset
import torch
dataset = load_dataset("cnn_dailymail", "3.0.0")

max_length = 30

In [None]:
#TASK 2 (switched from a downstream BERT because things were failing)
tokenizer2 = AutoTokenizer.from_pretrained("t5-base")
model2 = AutoModelForSeq2SeqLM.from_pretrained("t5-base", pad_token_id=tokenizer.eos_token_id)

In [None]:
# input_ids = dataset["test"][:50]
tokenizerInputs =  []
inputGroundTruths = []
for i in range(50):
    tokenizerInputs.append(dataset["test"][i]["article"])
    inputGroundTruths.append(dataset["test"][i]["highlights"])
print(tokenizerInputs[0])

encoder_input_ids = torch.LongTensor()

for i in range(50):
    encoder_input_ids = torch.cat((encoder_input_ids, tokenizer2(tokenizerInputs[i], return_tensors="pt", padding='max_length', truncation=True).input_ids))
print(tokenizer2.decode(encoder_input_ids[0]))

In [None]:
#Task 2 
outputs2 = []
#Greedy Search
outputs2.append(model2.generate(encoder_input_ids, num_beams=1, do_sample=False, max_length=max_length, no_repeat_ngram_size=2))

In [None]:
#Beam Search
outputs2.append(model2.generate(encoder_input_ids, num_beams=3, early_stopping=True, max_length=max_length, no_repeat_ngram_size=2))

In [None]:
#Top-K Sampling
outputs2.append(model2.generate(encoder_input_ids, do_sample=True, top_k=30, max_length=max_length, no_repeat_ngram_size=2))

#max_length = 50 with top_k>=40 gave the "index out of range in self" error

In [None]:
#Top-P Sampling
outputs2.append(model2.generate(encoder_input_ids, do_sample=True, top_p=0.8, top_k=0, max_length=max_length, no_repeat_ngram_size=2)) 

#max_length = 30 with top_p>=0.8 gave the "index out of range in self" error
#max_length = 50 with top_p>=0.4 gave the "index out of range in self" error

In [None]:
print(outputs[0][0])
print(tokenizer2.decode(outputs2[0][0], skip_special_tokens=True))
print(tokenizer2.decode(outputs2[1][0], skip_special_tokens=True))
print(tokenizer2.decode(outputs2[2][0], skip_special_tokens=True))
print(tokenizer2.decode(outputs2[3][0], skip_special_tokens=True))

In [43]:
from evaluate import load
roguescore = load("meteor")

Downloading builder script: 100%|██████████| 6.81k/6.81k [00:00<00:00, 6.80MB/s]
[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\zebzi\AppData\Roaming\nltk_data...
[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\zebzi\AppData\Roaming\nltk_data...
[nltk_data]   Unzipping tokenizers\punkt.zip.
[nltk_data] Downloading package omw-1.4 to
[nltk_data]     C:\Users\zebzi\AppData\Roaming\nltk_data...


In [62]:
references = []
predictions = []
results = []

# references.append([inputGroundTruths[0], inputGroundTruths[0], inputGroundTruths[0], inputGroundTruths[0]])
# predictions.append([tokenizer2.decode(outputs2[0][0], skip_special_tokens=True), tokenizer2.decode(outputs2[1][0], skip_special_tokens=True), tokenizer2.decode(outputs2[2][0], skip_special_tokens=True), tokenizer2.decode(outputs2[3][0], skip_special_tokens=True)])
# results.append([roguescore.compute(predictions=predictions[0], references=references[0])])
# results.append([roguescore.compute(predictions=[tokenizer2.decode(outputs2[0][0], skip_special_tokens=True)], references=[inputGroundTruths[0]])])
# results.append([roguescore.compute(predictions=["hey"], references=[["hey"]])])

# print(predictions[0])
# print(references[0])
# print(results)
# print(outputs2)
with open("generatedText10.txt", "w") as f:
    for i in range(len(outputs2[0])):
        references.append([inputGroundTruths[i], inputGroundTruths[i], inputGroundTruths[i], inputGroundTruths[i]])
        predictions.append([tokenizer2.decode(outputs2[0][i], skip_special_tokens=True), tokenizer2.decode(outputs2[1][i], skip_special_tokens=True), tokenizer2.decode(outputs2[2][i], skip_special_tokens=True), tokenizer2.decode(outputs2[3][i], skip_special_tokens=True)])
        # print(predictions[i][0])
        # print(references[i][0])
        temp = []
        for j in range(4):
            temp.append([roguescore.compute(predictions=[predictions[i][j]], references=[references[i][j]])])
        results.append(temp)

        f.write(f"Ground Truth: {inputGroundTruths[i]} \n")
        f.write(f"Greedy Search: {predictions[i][0]} \n")
        f.write(f"Beam Search: {predictions[i][1]} \n")
        f.write(f"Top-K Sampling: {predictions[i][2]} \n")
        f.write(f"Top-P Sampling: {predictions[i][3]} \n")
        f.write(f"Scores: {results[i]}")
        f.write("\n \n")

#generatedText is with no capped max_length so it defaulted to 20 I think top_p=0.92 and top_k=50
#generatedText2 is with max_length = 30
#generatedText3 is also max_length = 30 but top_p=0.7 ~ 6mins of runtime
#generatedText4 is at max_length = 40 with top_p=0.7 ~5mins of runtime thanks to CPU acceleration
#generatedText5 is at max_length = 50 with top_p=0.3 and top_k=25 ~Xmins of runtime but model_max_length = 1024

In [59]:
for i in range(10):
    print(results[i])

[[{'meteor': 0.08474576271186442}], [{'meteor': 0.08450704225352114}], [{'meteor': 0.09749303621169916}], [{'meteor': 0.14044943820224717}]]
[[{'meteor': 0.11893087928390034}], [{'meteor': 0.20450957471167402}], [{'meteor': 0.09164969450101833}], [{'meteor': 0.08230452674897121}]]
[[{'meteor': 0.11778698588090851}], [{'meteor': 0.08264462809917356}], [{'meteor': 0.08196721311475409}], [{'meteor': 0.05555555555555555}]]
[[{'meteor': 0.0}], [{'meteor': 0.0}], [{'meteor': 0.07125890736342043}], [{'meteor': 0.02380952380952381}]]
[[{'meteor': 0.08512585812356978}], [{'meteor': 0.0453514739229025}], [{'meteor': 0.12443438914027148}], [{'meteor': 0.0894854586129754}]]
[[{'meteor': 0.11673151750972763}], [{'meteor': 0.27542892156862747}], [{'meteor': 0.1372549019607843}], [{'meteor': 0.09727626459143969}]]
[[{'meteor': 0.16827783745076977}], [{'meteor': 0.16827783745076977}], [{'meteor': 0.08488351092649449}], [{'meteor': 0.1296743800039055}]]
[[{'meteor': 0.16710961789188644}], [{'meteor': 0