## Install

In [1]:
!pip install --upgrade huggingface_hub



In [2]:
!pip install transformers
!pip install accelerate>=0.20.1
!pip install transformers[torch]



In [3]:
!pip install evaluate
!pip install torchmetrics
!pip install rouge_score

Collecting evaluate
  Downloading evaluate-0.4.1-py3-none-any.whl (84 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m84.1/84.1 kB[0m [31m2.0 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting datasets>=2.0.0 (from evaluate)
  Downloading datasets-2.15.0-py3-none-any.whl (521 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m521.2/521.2 kB[0m [31m8.8 MB/s[0m eta [36m0:00:00[0m
Collecting dill (from evaluate)
  Downloading dill-0.3.7-py3-none-any.whl (115 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m115.3/115.3 kB[0m [31m7.4 MB/s[0m eta [36m0:00:00[0m
Collecting multiprocess (from evaluate)
  Downloading multiprocess-0.70.15-py310-none-any.whl (134 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m134.8/134.8 kB[0m [31m8.6 MB/s[0m eta [36m0:00:00[0m
Collecting responses<0.19 (from evaluate)
  Downloading responses-0.18.0-py3-none-any.whl (38 kB)
Collecting pyarrow-hotfix (from datasets>=2.0.0->ev

In [4]:
from transformers import GPT2Tokenizer, GPT2LMHeadModel, GPT2Config, EvalPrediction
from transformers import TextDataset, DataCollatorForLanguageModeling
from transformers import Trainer, TrainingArguments

## Load Datasets

In [7]:
!git clone https://github.com/XingshuoXiao/cs221_love_poem

Cloning into 'cs221_love_poem'...
remote: Enumerating objects: 42, done.[K
remote: Counting objects: 100% (42/42), done.[K
remote: Compressing objects: 100% (36/36), done.[K
remote: Total 42 (delta 15), reused 0 (delta 0), pack-reused 0[K
Receiving objects: 100% (42/42), 102.07 KiB | 849.00 KiB/s, done.
Resolving deltas: 100% (15/15), done.


In [21]:
train_file_path = "/content/cs221_love_poem/dataset/lovepoem200.txt"
valid_file_path = "/content/cs221_love_poem/dataset/lovepoem25.txt"
output_dir = "/content/cs221_love_poem/models/perplexity/"

In [9]:
tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
model = GPT2LMHeadModel.from_pretrained('gpt2')

vocab.json:   0%|          | 0.00/1.04M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/665 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/548M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

In [10]:
train_dataset = TextDataset(
        tokenizer=tokenizer,
        file_path=train_file_path,
        block_size=128)

val_dataset = TextDataset(
        tokenizer=tokenizer,
        file_path=valid_file_path,
        block_size=128)

data_collator = DataCollatorForLanguageModeling(
        tokenizer=tokenizer,
        mlm=False
    )



## Train

In [11]:
# Add compute_metrics function

import evaluate
import numpy as np
import math
from scipy.special import softmax
from sklearn.metrics import log_loss
import torch
import torch.nn.functional as F


def postprocess_text(preds, labels):
    preds = [pred.strip() for pred in preds]
    labels = [[label.strip()] for label in labels]

    return preds, labels

def compute_metrics(eval_preds):
    metric_bleu = evaluate.load("bleu")
    metric_rouge = evaluate.load("rouge")

    preds, labels = eval_preds

    # Perplexity
    logits_np = torch.from_numpy(preds)
    labels_np = torch.from_numpy(labels)
    loss = F.cross_entropy(logits_np.view(-1, tokenizer.vocab_size), labels_np.view(-1))
    perplexity = math.exp(loss)

    preds = preds.argmax(axis=-1)


    decoded_preds = tokenizer.batch_decode(preds, skip_special_tokens=True)
    decoded_labels = tokenizer.batch_decode(labels, skip_special_tokens=True)

    # Some simple post-processing
    decoded_preds, decoded_labels = postprocess_text(decoded_preds, decoded_labels)

    result_bleu = metric_bleu.compute(predictions=decoded_preds, references=decoded_labels)
    result_rouge = metric_rouge.compute(predictions=decoded_preds, references=decoded_labels, use_stemmer=True)
    result_rouge["bleu"] = result_bleu["bleu"]
    result_rouge["perplexity"] = perplexity
    result_rouge["calculated_loss"] = loss

    return result_rouge

In [28]:
training_args = TrainingArguments(
          output_dir=output_dir,
          overwrite_output_dir=False,
          per_device_train_batch_size=8,
          num_train_epochs=5.0,
          # save_steps=500,
          logging_steps = 25,
          load_best_model_at_end=True,
          evaluation_strategy='epoch',
          save_strategy='epoch',
          metric_for_best_model='perplexity',
          greater_is_better=False,
          save_safetensors=False,
          push_to_hub=False,
          learning_rate=1e-5
          )

In [29]:
trainer = Trainer(
          model=model,
          args=training_args,
          data_collator=data_collator,
          train_dataset=train_dataset,
          eval_dataset=val_dataset,
          compute_metrics=compute_metrics
          )

In [30]:
trainer.train()
trainer.save_model()

Epoch,Training Loss,Validation Loss,Rouge1,Rouge2,Rougel,Rougelsum,Bleu,Perplexity,Calculated Loss,Runtime,Samples Per Second,Steps Per Second
1,3.7608,4.566612,0.382568,0.051258,0.230364,0.374088,0.017754,12090.117029,9.400144,5.3977,5.187,0.741
2,3.9827,4.536182,0.385467,0.051529,0.230373,0.375457,0.016062,10597.91529,9.268413,4.5412,6.166,0.881
3,3.9078,4.537906,0.38896,0.052174,0.232501,0.378612,0.01296,11012.334903,9.306771,4.5876,6.103,0.872
4,3.9109,4.535024,0.388164,0.050574,0.231026,0.37814,0.01288,10745.586642,9.28225,4.4424,6.303,0.9
5,3.8646,4.53533,0.388805,0.051849,0.232234,0.378412,0.012947,10828.718957,9.289957,4.3698,6.408,0.915


## Generation

In [31]:
def generate_poem(sequence, max_length, model_path):
    model = GPT2LMHeadModel.from_pretrained(model_path)
    tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
    ids = tokenizer.encode(f'{sequence}', return_tensors='pt')
    final_outputs = model.generate(
        ids,
        do_sample=True,
        max_length=max_length,
        pad_token_id=model.config.eos_token_id,
        top_k=50,
        top_p=0.95,
    )
    return tokenizer.decode(final_outputs[0], skip_special_tokens=True)

In [40]:
sequence = "Love"
max_len = 200
model_path = "/content/cs221_love_poem/models/perplexity"
poem = generate_poem(sequence, max_len, model_path)
print(poem)

Love at first sight but after the long and dark nights I wish I could be yours
That it is like a summer evening
That we should not be separated
That the wild flowers bloom within a long summer sky
That I dream it never ending I could not be you 


## Evaluation

### Perplexity

In [41]:
oracle_path = "/content/cs221_love_poem/dataset/oracle10.txt"

with open(oracle_path, "r",) as file:
    content = file.read()

oracle = [poem.strip() for poem in content.split('\n\n')]

In [42]:
oracle

['Love a fire that never fades\nThrough all seasons in cascades\nYour smile a sunlit ray\nLighting up my darkest day',
 "Love a melody in the silence\nEchoes of passion sweet and intense\nIn your eyes constellations align\nA universe of feelings forever thine\nYour touch a sonnet in every embrace\nIn love's embrace we find our grace\nMoonlight whispers secrets untold\nIn the language of hearts our story unfolds\nTogether we paint a canvas of dreams\nLove's symphony in infinite streams\nIn your laughter the joy of a sunrise\nOur love a journey with no goodbyes\nLove the eternal flame that won't cease\nIn your arms my heart finds its peace",
 "Love a symphony in the heart's embrace\nWhispers of passion a timeless grace\nIn your eyes galaxies of dreams unfold\nA canvas painted in hues of love untold\nYour touch a melody that dances on the skin\nIgniting sparks a flame that burns within\nMoonlight serenades a gentle caress\nIn the realm of love where feelings confess\nTogether we sail on w

In [44]:
import evaluate
model_id_path = '/content/cs221_love_poem/models/perplexity'
tokenizer.save_pretrained(model_id_path)
perplexity = evaluate.load("perplexity", module_type="metric")
results_perp = perplexity.compute(model_id=model_id_path,
                             predictions=oracle)

  0%|          | 0/1 [00:00<?, ?it/s]

In [45]:
results_perp

{'perplexities': [106.11487579345703,
  74.45262145996094,
  75.77159881591797,
  74.0783920288086,
  106.98212432861328,
  64.9191665649414,
  66.83291625976562,
  73.13457489013672,
  50.14946365356445,
  65.4713134765625],
 'mean_perplexity': 75.79070472717285}

### BLEU

In [46]:
references = [oracle]
predictions = [poem] # generated

In [48]:
bleu = evaluate.load("bleu")
results_bleu = bleu.compute(predictions=predictions, references=references, tokenizer=tokenizer)

In [49]:
results_bleu

{'bleu': 0.0,
 'precisions': [0.0, 3.0, 0.0, 0.0],
 'brevity_penalty': 1.0,
 'length_ratio': 1.0,
 'translation_length': 2,
 'reference_length': 2}

### ROUGE

In [52]:
rouge = evaluate.load("rouge")
results_rouge = rouge.compute(predictions=predictions, references=references)

In [53]:
results_rouge

{'rouge1': 0.15503875968992248,
 'rouge2': 0.0,
 'rougeL': 0.14545454545454545,
 'rougeLsum': 0.15503875968992248}