In [1]:
!pip install transformers datasets accelerate peft sentencepiece bitsandbytes

Collecting bitsandbytes
  Downloading bitsandbytes-0.48.2-py3-none-manylinux_2_24_x86_64.whl.metadata (10 kB)
Downloading bitsandbytes-0.48.2-py3-none-manylinux_2_24_x86_64.whl (59.4 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m59.4/59.4 MB[0m [31m12.3 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: bitsandbytes
Successfully installed bitsandbytes-0.48.2


In [2]:
import pandas as pd
from datasets import Dataset
from sklearn.model_selection import train_test_split

from transformers import (
    BartTokenizer,
    BartForConditionalGeneration,
    DataCollatorForSeq2Seq,
    TrainingArguments,
    Trainer
)

from peft import LoraConfig, get_peft_model
import torch

In [3]:
df = pd.read_csv("PoetryFoundationData_Cleaned.csv")

train_df, val_df = train_test_split(df, test_size=0.1, random_state=42)

train_dataset = Dataset.from_pandas(train_df)
val_dataset = Dataset.from_pandas(val_df)

In [4]:
tokenizer = BartTokenizer.from_pretrained("facebook/bart-large")
model = BartForConditionalGeneration.from_pretrained(
    "facebook/bart-large",
    load_in_8bit=True,
    device_map="auto"
)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/26.0 [00:00<?, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

config.json: 0.00B [00:00, ?B/s]

The `load_in_4bit` and `load_in_8bit` arguments are deprecated and will be removed in the future versions. Please, pass a `BitsAndBytesConfig` object in `quantization_config` argument instead.


pytorch_model.bin:   0%|          | 0.00/1.02G [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.02G [00:00<?, ?B/s]

In [5]:
lora_config = LoraConfig(
    r=32,
    lora_alpha=32,
    target_modules=["q_proj", "v_proj"],
    lora_dropout=0.05,
    bias="none",
    task_type="SEQ_2_SEQ_LM",
)

model = get_peft_model(model, lora_config)
model.print_trainable_parameters()

trainable params: 4,718,592 || all params: 411,010,048 || trainable%: 1.1480


In [6]:
def preprocess_function(examples):

    inputs = examples["Title"]
    targets = examples["Poem"]

    model_inputs = tokenizer(
        inputs,
        truncation=True
    )

    with tokenizer.as_target_tokenizer():
        labels = tokenizer(
            targets,
            truncation=True
        )["input_ids"]

    # mask padding tokens
    labels = [
        [(token if token != tokenizer.pad_token_id else -100) for token in seq]
        for seq in labels
    ]

    model_inputs["labels"] = labels
    return model_inputs


In [7]:
tokenized_train = train_dataset.map(preprocess_function, batched=True)
tokenized_val = val_dataset.map(preprocess_function, batched=True)

Map:   0%|          | 0/12306 [00:00<?, ? examples/s]



Map:   0%|          | 0/1368 [00:00<?, ? examples/s]

In [8]:
data_collator = DataCollatorForSeq2Seq(tokenizer=tokenizer, model=model)

In [9]:
training_args = TrainingArguments(
    output_dir="./bart_lora_poetry",
    save_strategy="epoch",
    learning_rate=1e-3,                 # FASTER learning
    num_train_epochs=4,                 # Less time, still good
    per_device_train_batch_size=2,      # Faster & lighter
    gradient_accumulation_steps=4,      # Equivalent to batch_size = 8
    warmup_steps=100,
    logging_steps=20,
    fp16=True,
    save_total_limit=2,
    optim="adamw_torch",
)

In [10]:
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_train,
    eval_dataset=tokenized_val,
    tokenizer=tokenizer,
    data_collator=data_collator
)
trainer.train()

  trainer = Trainer(
  | |_| | '_ \/ _` / _` |  _/ -_)
[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize?ref=models
[34m[1mwandb[0m: Paste an API key from your profile and hit enter:

 ··········


[34m[1mwandb[0m: No netrc file found, creating one.
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33myashadatembe07[0m ([33myashadatembe07-github[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin




Step,Training Loss
20,11.7302
40,7.5901
60,5.0459
80,4.2215
100,4.0551
120,3.8969
140,3.8326
160,3.8662
180,3.7887
200,3.8257




TrainOutput(global_step=6156, training_loss=3.6167045181799895, metrics={'train_runtime': 9073.1153, 'train_samples_per_second': 5.425, 'train_steps_per_second': 0.678, 'total_flos': 5253922889318400.0, 'train_loss': 3.6167045181799895, 'epoch': 4.0})

In [11]:
model.save_pretrained("./bart_lora_poetry/final")
tokenizer.save_pretrained("./bart_lora_poetry/final")

('./bart_lora_poetry/final/tokenizer_config.json',
 './bart_lora_poetry/final/special_tokens_map.json',
 './bart_lora_poetry/final/vocab.json',
 './bart_lora_poetry/final/merges.txt',
 './bart_lora_poetry/final/added_tokens.json')

In [12]:
def generate_poem(prompt, max_tokens=180):
    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)

    output_ids = model.generate(
        **inputs,
        max_length=max_tokens,
        do_sample=True,
        top_p=0.92,         # nucleus sampling → human creativity
        top_k=40,
        temperature=0.85,   # warm creativity
        repetition_penalty=1.2,
    )

    return tokenizer.decode(output_ids[0], skip_special_tokens=True)

In [13]:
prompts = [
    "Beyond the last horizon",
    "Under electric skies",
    "When the forest breathes",
    "At the edge of starlight",
    "Where laughter lingers",
    "Inside quite storm",
    "Beneath the neon glow",
    "As the shadows bloom",
    "Among the drifted worlds",
    "In the cradel of silence",
    "Before the dream unravels",
    "Where the past hums",
    "Under the watchful moon",
    "When the colors melt",
    "At the doorway of becoming"
]

generated_poems = []

for p in prompts:
    poem = generate_poem(p)
    print(f"\nPrompt: {p}\n--- Generated Poem ---\n{poem}\n")
    generated_poems.append(poem)


Prompt: Beyond the last horizon
--- Generated Poem ---

It’s hard to know what you’re doing.    You’ve never done it before.    I don’t want to do it, but I’m going to try. I want to be a good mother to my children. 


Prompt: Under electric skies
--- Generated Poem ---

I’ve seen my mother’s face in my mind, and I’m thinking of her today, and she said, “You’re the one.” And I said to her,  —   “I am the one,” and I said: “I don’t want you to know that. You are the only one, and you are the last one. I love you, but you are not the one!” She said: — “But I am the only person in your life who is the last person in my life. I am not the first one in your family. I have the last name of every person you know, and the last names of all the people you love. I belong to the family, and we are the


Prompt: When the forest breathes
--- Generated Poem ---

It was the first day of spring, and I had been waiting for it for a long time.    I was sitting on the edge of the porch railing, watch

In [14]:
!pip install evaluate bert-score nltk rouge_score
!pip install sentence-transformers
import evaluate
import math
from nltk.tokenize import word_tokenize
from nltk.util import ngrams
import nltk
nltk.download('punkt')
nltk.download('punkt_tab')

bleu = evaluate.load("bleu")
rouge = evaluate.load("rouge")
bertscore = evaluate.load("bertscore")

Collecting evaluate
  Downloading evaluate-0.4.6-py3-none-any.whl.metadata (9.5 kB)
Collecting bert-score
  Downloading bert_score-0.3.13-py3-none-any.whl.metadata (15 kB)
Collecting rouge_score
  Downloading rouge_score-0.1.2.tar.gz (17 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Downloading evaluate-0.4.6-py3-none-any.whl (84 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m84.1/84.1 kB[0m [31m5.1 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading bert_score-0.3.13-py3-none-any.whl (61 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m61.1/61.1 kB[0m [31m6.5 MB/s[0m eta [36m0:00:00[0m
[?25hBuilding wheels for collected packages: rouge_score
  Building wheel for rouge_score (setup.py) ... [?25l[?25hdone
  Created wheel for rouge_score: filename=rouge_score-0.1.2-py3-none-any.whl size=24934 sha256=09357f392255720242d77dbe35eb6a0a1815dbca2dbd2f0747737b5b0835f097
  Stored in directory: /root/.cache/pip/wheels/85/9d/af/01feefbe7d55ef546

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt_tab.zip.


Downloading builder script: 0.00B [00:00, ?B/s]

Downloading extra modules:   0%|          | 0.00/1.55k [00:00<?, ?B/s]

Downloading extra modules: 0.00B [00:00, ?B/s]

Downloading builder script: 0.00B [00:00, ?B/s]

Downloading builder script: 0.00B [00:00, ?B/s]

In [15]:
def calculate_perplexity(text):
    encodings = tokenizer(text, return_tensors="pt").to(model.device)
    with torch.no_grad():
        outputs = model(**encodings, labels=encodings["input_ids"])
        loss = outputs.loss
    return math.exp(loss.item())

In [16]:
def distinct_n(text, n=1):
    tokens = word_tokenize(text.lower())
    ngrams_list = list(ngrams(tokens, n))
    if len(tokens) == 0:
        return 0
    return len(set(ngrams_list)) / len(tokens)

In [17]:
def avg_length(text):
    tokens = word_tokenize(text)
    return len(tokens)

In [18]:
from sentence_transformers import SentenceTransformer, util
import numpy as np

# Load small, fast model
coherence_model = SentenceTransformer("all-MiniLM-L6-v2")

def calculate_coherence(poem):
    # Sentence-level coherence instead of newline-level
    lines = nltk.sent_tokenize(poem)

    if len(lines) < 2:
        return 0.0

    embeddings = coherence_model.encode(lines, convert_to_tensor=True)

    sims = []
    for i in range(len(lines) - 1):
        sims.append(util.cos_sim(embeddings[i], embeddings[i+1]).item())

    return float(np.mean(sims))


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

In [19]:

metrics_output = []

for poem in generated_poems:
    ppl = calculate_perplexity(poem)
    bleu_score = bleu.compute(predictions=[poem], references=[poem])["bleu"]
    rouge_score = rouge.compute(predictions=[poem], references=[poem])
    bert = bertscore.compute(predictions=[poem], references=[poem], lang="en")["f1"][0]
    d1 = distinct_n(poem, 1)
    d2 = distinct_n(poem, 2)
    length = avg_length(poem)
    coh = calculate_coherence(poem)

    metrics_output.append({
        "poem": poem,
        "perplexity": ppl,
        "bleu": bleu_score,
        "rouge": rouge_score,
        "bertscore": bert,
        "distinct1": d1,
        "distinct2": d2,
        "length": length,
        "coherence": coh
    })



tokenizer_config.json:   0%|          | 0.00/25.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/482 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.42G [00:00<?, ?B/s]

Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [20]:
for i, m in enumerate(metrics_output, 1):
    print(f"\n=== Poem {i} Metrics ===")
    print(f"Perplexity: {m['perplexity']:.4f}")
    print(f"BLEU: {m['bleu']:.4f}")
    print(f"ROUGE: {m['rouge']}")
    print(f"BERTScore (F1): {m['bertscore']:.4f}")
    print(f"Distinct-1: {m['distinct1']:.4f}")
    print(f"Distinct-2: {m['distinct2']:.4f}")
    print(f"Length (tokens): {m['length']}")
    print(f"Coherence: {m['coherence']:.4f}")



=== Poem 1 Metrics ===
Perplexity: 6.2010
BLEU: 1.0000
ROUGE: {'rouge1': np.float64(1.0), 'rouge2': np.float64(1.0), 'rougeL': np.float64(1.0), 'rougeLsum': np.float64(1.0)}
BERTScore (F1): 1.0000
Distinct-1: 0.6458
Distinct-2: 0.9167
Length (tokens): 48
Coherence: 0.3079

=== Poem 2 Metrics ===
Perplexity: 6.8488
BLEU: 1.0000
ROUGE: {'rouge1': np.float64(1.0), 'rouge2': np.float64(1.0), 'rougeL': np.float64(1.0), 'rougeLsum': np.float64(1.0)}
BERTScore (F1): 1.0000
Distinct-1: 0.3816
Distinct-2: 0.7763
Length (tokens): 152
Coherence: 0.4055

=== Poem 3 Metrics ===
Perplexity: 6.5487
BLEU: 1.0000
ROUGE: {'rouge1': np.float64(1.0), 'rouge2': np.float64(1.0), 'rougeL': np.float64(1.0), 'rougeLsum': np.float64(1.0)}
BERTScore (F1): 1.0000
Distinct-1: 0.4072
Distinct-2: 0.7904
Length (tokens): 167
Coherence: 0.5136

=== Poem 4 Metrics ===
Perplexity: 7.7785
BLEU: 1.0000
ROUGE: {'rouge1': np.float64(1.0), 'rouge2': np.float64(1.0), 'rougeL': np.float64(1.0), 'rougeLsum': np.float64(1.0)}
B