In [None]:
!pip install -q transformers datasets peft accelerate tqdm

In [None]:
import torch
from datasets import load_dataset
from transformers import GPT2TokenizerFast, AutoModelForCausalLM
from peft import LoraConfig, TaskType, get_peft_model
from torch.utils.data import DataLoader
from torch.optim import AdamW
from tqdm import tqdm


In [None]:
dataset = load_dataset(
    "text",
    data_files="https://raw.githubusercontent.com/karpathy/char-rnn/master/data/tinyshakespeare/input.txt"
)

dataset = dataset["train"].train_test_split(test_size=0.1, seed=42)
print("Dataset loaded")
print(dataset)


Downloading data:   0%|          | 0.00/1.12M [00:00<?, ?B/s]

Generating train split: 0 examples [00:00, ? examples/s]

Dataset loaded
DatasetDict({
    train: Dataset({
        features: ['text'],
        num_rows: 36000
    })
    test: Dataset({
        features: ['text'],
        num_rows: 4000
    })
})


In [None]:
tokenizer = GPT2TokenizerFast.from_pretrained("gpt2")
tokenizer.pad_token = tokenizer.eos_token
print("Tokenizer ready")


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/26.0 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/1.04M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/665 [00:00<?, ?B/s]

Tokenizer ready


In [None]:
def tokenize_fn(examples):
    return tokenizer(
        examples["text"],
        add_special_tokens=False
    )

tokenized = dataset.map(
    tokenize_fn,
    batched=True,
    remove_columns=["text"]
)

print("Tokenization complete")
print(tokenized)


Map:   0%|          | 0/36000 [00:00<?, ? examples/s]

Map:   0%|          | 0/4000 [00:00<?, ? examples/s]

Tokenization complete
DatasetDict({
    train: Dataset({
        features: ['input_ids', 'attention_mask'],
        num_rows: 36000
    })
    test: Dataset({
        features: ['input_ids', 'attention_mask'],
        num_rows: 4000
    })
})


In [None]:
def tokenize_fn(examples):
    return tokenizer(
        examples["text"],
        add_special_tokens=False
    )

tokenized = dataset.map(
    tokenize_fn,
    batched=True,
    remove_columns=["text"]
)

print("Tokenization complete")
print(tokenized)


Tokenization complete
DatasetDict({
    train: Dataset({
        features: ['input_ids', 'attention_mask'],
        num_rows: 36000
    })
    test: Dataset({
        features: ['input_ids', 'attention_mask'],
        num_rows: 4000
    })
})


In [None]:
tokenized = tokenized.remove_columns("attention_mask")

print("attention_mask removed")
print(tokenized)


attention_mask removed
DatasetDict({
    train: Dataset({
        features: ['input_ids'],
        num_rows: 36000
    })
    test: Dataset({
        features: ['input_ids'],
        num_rows: 4000
    })
})


In [None]:
block_size = 256

def group_texts(examples):
    concatenated = sum(examples["input_ids"], [])
    total_length = (len(concatenated) // block_size) * block_size

    input_ids = [
        concatenated[i:i + block_size]
        for i in range(0, total_length, block_size)
    ]

    return {
        "input_ids": input_ids,
        "labels": input_ids.copy()
    }

lm_dataset = tokenized.map(group_texts, batched=True)

print("lm_dataset created")
print(lm_dataset)


Map:   0%|          | 0/36000 [00:00<?, ? examples/s]

Map:   0%|          | 0/4000 [00:00<?, ? examples/s]

lm_dataset created
DatasetDict({
    train: Dataset({
        features: ['input_ids', 'labels'],
        num_rows: 1029
    })
    test: Dataset({
        features: ['input_ids', 'labels'],
        num_rows: 115
    })
})


In [None]:
train_dataloader = DataLoader(
    lm_dataset["train"],
    batch_size=4,
    shuffle=True
)

print("DataLoader ready")


DataLoader ready


In [None]:
model = AutoModelForCausalLM.from_pretrained("gpt2")
print(" GPT-2 loaded")


model.safetensors:   0%|          | 0.00/548M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

 GPT-2 loaded


In [None]:
lora_config = LoraConfig(
    task_type=TaskType.CAUSAL_LM,
    r=8,
    lora_alpha=32,
    lora_dropout=0.1,
    target_modules=["c_attn"],  # GPT-2 attention
    bias="none",
)

print(" LoRA config created")


 LoRA config created


In [None]:
model = get_peft_model(model, lora_config)

print(" LoRA applied")
model.print_trainable_parameters()


 LoRA applied
trainable params: 294,912 || all params: 124,734,720 || trainable%: 0.2364




In [None]:
optimizer = AdamW(model.parameters(), lr=3e-4)

device = "cuda" if torch.cuda.is_available() else "cpu"
model.to(device)

print("Optimizer & device ready:", device)


Optimizer & device ready: cuda


In [None]:
def collate_fn(batch):
    input_ids = torch.tensor([item["input_ids"] for item in batch])
    labels = torch.tensor([item["labels"] for item in batch])
    return {
        "input_ids": input_ids,
        "labels": labels
    }


In [None]:
from torch.utils.data import DataLoader

train_dataloader = DataLoader(
    lm_dataset["train"],
    batch_size=4,
    shuffle=True,
    collate_fn=collate_fn
)

print("DataLoader ready with collate_fn")


DataLoader ready with collate_fn


In [None]:
from tqdm import tqdm

num_epochs = 3
model.train()

for epoch in range(num_epochs):
    print(f"\n🔁 Epoch {epoch+1}/{num_epochs}")
    total_loss = 0

    for batch in tqdm(train_dataloader):
        input_ids = batch["input_ids"].to(device)
        labels = batch["labels"].to(device)

        outputs = model(input_ids=input_ids, labels=labels)
        loss = outputs.loss

        loss.backward()
        optimizer.step()
        optimizer.zero_grad()

        total_loss += loss.item()

    avg_loss = total_loss / len(train_dataloader)
    print(f" Average loss: {avg_loss:.4f}")



🔁 Epoch 1/3


  0%|          | 0/258 [00:00<?, ?it/s]`loss_type=None` was set in the config but it is unrecognized. Using the default loss: `ForCausalLMLoss`.
100%|██████████| 258/258 [00:46<00:00,  5.53it/s]


 Average loss: 4.9814

🔁 Epoch 2/3


100%|██████████| 258/258 [00:47<00:00,  5.42it/s]


 Average loss: 4.7633

🔁 Epoch 3/3


100%|██████████| 258/258 [00:50<00:00,  5.06it/s]

 Average loss: 4.6806





In [None]:
model.save_pretrained("lora-shakespeare")
tokenizer.save_pretrained("lora-shakespeare")

print(" LoRA adapters saved (few MB)")


 LoRA adapters saved (few MB)


In [None]:
model.eval()

prompt = "Hello I am Reading"
inputs = tokenizer(prompt, return_tensors="pt").to(device)

with torch.no_grad():
    outputs = model.generate(
        **inputs,
        max_new_tokens=120,
        temperature=0.8,
        do_sample=True,
        top_p=0.95
    )

print("GENERATED TEXT")

print(tokenizer.decode(outputs[0], skip_special_tokens=True))


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


GENERATED TEXT
Hello I am Reading your letter,And I will tell you your name, but I will tell you your name.I am, in your own words, the one that will take my life:By-come, come, let me go.What is this?The truth, the truth, the truth!Now what are you?You say that I may find her, but I am notA true soldier, but a soldier who is not.I have given a command to thee.But yet you know it, thou art not to be found with my word:Is it true that you are a man?No


In [None]:
import torch
from datasets import load_dataset
from transformers import AutoModelForCausalLM, AutoTokenizer
from torch.utils.data import DataLoader
import math

model_name = "gpt2"
block_size = 256
batch_size = 4

# load shakespeare
dataset = load_dataset("text", data_files="https://raw.githubusercontent.com/karpathy/char-rnn/master/data/tinyshakespeare/input.txt")
dataset = dataset["train"].train_test_split(test_size=0.1, seed=42)

tokenizer = AutoTokenizer.from_pretrained(model_name)
tokenizer.pad_token = tokenizer.eos_token

def tokenize_fn(examples):
    return tokenizer(examples["text"], add_special_tokens=False)

tokenized = dataset.map(tokenize_fn, batched=True, remove_columns=["text"])

def group_texts(examples):
    all_ids = sum(examples["input_ids"], [])
    all_masks = sum(examples["attention_mask"], [])
    total_len = (len(all_ids) // block_size) * block_size

    return {
        "input_ids": [all_ids[i:i+block_size] for i in range(0, total_len, block_size)],
        "attention_mask": [all_masks[i:i+block_size] for i in range(0, total_len, block_size)],
        "labels": [all_ids[i:i+block_size] for i in range(0, total_len, block_size)]
    }

lm_dataset = tokenized.map(group_texts, batched=True)

def collate_fn(batch):
    return {
        "input_ids": torch.tensor([x["input_ids"] for x in batch]),
        "attention_mask": torch.tensor([x["attention_mask"] for x in batch]),
        "labels": torch.tensor([x["labels"] for x in batch])
    }

test_loader = DataLoader(lm_dataset["test"], batch_size=batch_size, shuffle=False, collate_fn=collate_fn)

def calc_perplexity(model, loader):
    model.eval()
    total_loss = 0
    count = 0

    with torch.no_grad():
        for batch in loader:
            ids = batch["input_ids"].to("cuda")
            mask = batch["attention_mask"].to("cuda")
            labels = batch["labels"].to("cuda")

            out = model(input_ids=ids, attention_mask=mask, labels=labels)
            total_loss += out.loss.item() * ids.size(0)
            count += ids.size(0)

    avg_loss = total_loss / count
    return math.exp(avg_loss), avg_loss

# load original gpt2

print("EVALUATION GPT-2 (Before Finetuning)")
model = AutoModelForCausalLM.from_pretrained(model_name).to("cuda")
base_ppl, base_loss = calc_perplexity(model, test_loader)

print(f"\nTest Loss: {base_loss:.4f}")
print(f"Perplexity: {base_ppl:.2f}")

# generation samples
print("GENERATION SAMPLES - BEFORE FINE-TUNING")
prompts = ["To be or not to be", "Once upon a time", "The king said"]
for p in prompts:
    print(f"\nPrompt: '{p}'")
    inp = tokenizer(p, return_tensors="pt").to("cuda")

    with torch.no_grad():
        out = model.generate(**inp, max_new_tokens=80, temperature=0.8, do_sample=True, top_p=0.9)

    print(f"Output: {tokenizer.decode(out[0], skip_special_tokens=True)}")


print("\n evaluation complete!")
print(f"Save these metrics: Loss={base_loss:.4f}, Perplexity={base_ppl:.2f}")

Downloading data:   0%|          | 0.00/1.12M [00:00<?, ?B/s]

Generating train split: 0 examples [00:00, ? examples/s]

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/26.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/665 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/1.04M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

Map:   0%|          | 0/36000 [00:00<?, ? examples/s]

Map:   0%|          | 0/4000 [00:00<?, ? examples/s]

Map:   0%|          | 0/36000 [00:00<?, ? examples/s]

Map:   0%|          | 0/4000 [00:00<?, ? examples/s]

EVALUATION GPT-2 (Before Finetuning)


model.safetensors:   0%|          | 0.00/548M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

`loss_type=None` was set in the config but it is unrecognized. Using the default loss: `ForCausalLMLoss`.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.



Test Loss: 5.2251
Perplexity: 185.88
GENERATION SAMPLES - BEFORE FINE-TUNING

Prompt: 'To be or not to be'


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Output: To be or not to be, this is a great way to do it.

In addition to the great value it has, it also comes with an extra $30. You get $10 off your purchase.

And if you are looking for an instant "buy it now" deal, it is also available on Amazon.

Here is a video to help you get started:

We hope this helps

Prompt: 'Once upon a time'


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Output: Once upon a time, it would have been good to have some sort of political change that would be able to keep the regime from falling apart. But now the whole world has turned against us and our interests are at stake. I can't believe how stupid we have been, how ridiculous we have been. I know what you mean by "unacceptable". I know what you mean by "unacceptable". It's not

Prompt: 'The king said'
Output: The king said to him, "The men of the city are very strong; there is no other way."

The king said, "You shall not leave your people to the wolves. It is your duty to put an end to them; do not allow them to come out of your way."

The king said, "Do not be afraid; we will be with you, and it will be

 evaluation complete!
Save these metrics: Loss=5.2251, Perplexity=185.88


In [None]:
import torch
from datasets import load_dataset
from transformers import AutoModelForCausalLM, AutoTokenizer
from peft import LoraConfig, TaskType, get_peft_model, PeftModel
from torch.utils.data import DataLoader
from torch.optim import AdamW
from tqdm import tqdm
import shutil
from google.colab import files

model_name = "gpt2"
block_size = 256
batch_size = 4
epochs = 15
lr = 1e-4

adapter_path = "gpt2-lora-adapters"
merged_path = "gpt2-lora-merged"
best_checkpoint = "best_model_checkpoint"

# load shakespeare
dataset = load_dataset("text", data_files="https://raw.githubusercontent.com/karpathy/char-rnn/master/data/tinyshakespeare/input.txt")
dataset = dataset["train"].train_test_split(test_size=0.1, seed=42)

tokenizer = AutoTokenizer.from_pretrained(model_name)
tokenizer.pad_token = tokenizer.eos_token

def tokenize_fn(examples):
    return tokenizer(examples["text"], add_special_tokens=False)

tokenized = dataset.map(tokenize_fn, batched=True, remove_columns=["text"])

def group_texts(examples):
    all_ids = sum(examples["input_ids"], [])
    all_masks = sum(examples["attention_mask"], [])
    total_len = (len(all_ids) // block_size) * block_size

    return {
        "input_ids": [all_ids[i:i+block_size] for i in range(0, total_len, block_size)],
        "attention_mask": [all_masks[i:i+block_size] for i in range(0, total_len, block_size)],
        "labels": [all_ids[i:i+block_size] for i in range(0, total_len, block_size)]
    }

lm_dataset = tokenized.map(group_texts, batched=True)

def collate_fn(batch):
    return {
        "input_ids": torch.tensor([x["input_ids"] for x in batch]),
        "attention_mask": torch.tensor([x["attention_mask"] for x in batch]),
        "labels": torch.tensor([x["labels"] for x in batch])
    }

train_loader = DataLoader(lm_dataset["train"], batch_size=batch_size, shuffle=True, collate_fn=collate_fn)

# load gpt2 and add lora
model = AutoModelForCausalLM.from_pretrained(model_name).to("cuda")

lora_config = LoraConfig(
    task_type=TaskType.CAUSAL_LM,
    r=16,
    lora_alpha=32,
    lora_dropout=0.15,
    target_modules=["c_attn", "c_proj"],
    bias="none"
)

model = get_peft_model(model, lora_config)
model.print_trainable_parameters()

optimizer = AdamW(model.parameters(), lr=lr)

# training
print(f"\nFINE-TUNING GPT-2 WITH LORA")

model.train()

for epoch in range(epochs):
    print(f"\nepoch {epoch+1}/{epochs}")
    running_loss = 0

    for batch in tqdm(train_loader):
        ids = batch["input_ids"].to("cuda")
        mask = batch["attention_mask"].to("cuda")
        labels = batch["labels"].to("cuda")

        out = model(input_ids=ids, attention_mask=mask, labels=labels)
        loss = out.loss
        running_loss += loss.item()

        loss.backward()
        optimizer.step()
        optimizer.zero_grad()

    avg_loss = running_loss / len(train_loader)
    print(f"avg training loss: {avg_loss:.4f}")

# save
model.save_pretrained(adapter_path)
tokenizer.save_pretrained(adapter_path)

print("\nmerging lora with base model...")
base = AutoModelForCausalLM.from_pretrained(model_name).to("cuda")
model = PeftModel.from_pretrained(base, adapter_path)
model = model.merge_and_unload()

model.save_pretrained(merged_path)
tokenizer.save_pretrained(merged_path)

print(f"\n Fine-tuning complete! Model saved to {merged_path}")

# download
shutil.make_archive(merged_path, "zip", merged_path)
files.download(f"{merged_path}.zip")

Map:   0%|          | 0/36000 [00:00<?, ? examples/s]

Map:   0%|          | 0/4000 [00:00<?, ? examples/s]

Map:   0%|          | 0/36000 [00:00<?, ? examples/s]

Map:   0%|          | 0/4000 [00:00<?, ? examples/s]



trainable params: 1,622,016 || all params: 126,061,824 || trainable%: 1.2867

FINE-TUNING GPT-2 WITH LORA

epoch 1/15


100%|██████████| 258/258 [00:51<00:00,  5.02it/s]


avg training loss: 5.0204

epoch 2/15


100%|██████████| 258/258 [00:53<00:00,  4.86it/s]


avg training loss: 4.7976

epoch 3/15


100%|██████████| 258/258 [00:54<00:00,  4.73it/s]


avg training loss: 4.6966

epoch 4/15


100%|██████████| 258/258 [00:54<00:00,  4.76it/s]


avg training loss: 4.6358

epoch 5/15


100%|██████████| 258/258 [00:54<00:00,  4.74it/s]


avg training loss: 4.5894

epoch 6/15


100%|██████████| 258/258 [00:54<00:00,  4.74it/s]


avg training loss: 4.5579

epoch 7/15


100%|██████████| 258/258 [00:54<00:00,  4.76it/s]


avg training loss: 4.5306

epoch 8/15


100%|██████████| 258/258 [00:54<00:00,  4.76it/s]


avg training loss: 4.5066

epoch 9/15


100%|██████████| 258/258 [00:54<00:00,  4.76it/s]


avg training loss: 4.4886

epoch 10/15


100%|██████████| 258/258 [00:54<00:00,  4.75it/s]


avg training loss: 4.4709

epoch 11/15


100%|██████████| 258/258 [00:54<00:00,  4.75it/s]


avg training loss: 4.4547

epoch 12/15


100%|██████████| 258/258 [00:54<00:00,  4.75it/s]


avg training loss: 4.4379

epoch 13/15


100%|██████████| 258/258 [00:54<00:00,  4.75it/s]


avg training loss: 4.4243

epoch 14/15


100%|██████████| 258/258 [00:54<00:00,  4.75it/s]


avg training loss: 4.4103

epoch 15/15


100%|██████████| 258/258 [00:54<00:00,  4.75it/s]


avg training loss: 4.4012

merging lora with base model...

 Fine-tuning complete! Model saved to gpt2-lora-merged


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [3]:
import os

# check if model exists
if os.path.exists("gpt2-lora-merged"):
    print("✓ Model found!")
    print("\nFiles in the model directory:")
    print(os.listdir("gpt2-lora-merged"))
else:
    print("✗ Model not found")

✓ Model found!

Files in the model directory:
['merges.txt', 'special_tokens_map.json', 'vocab.json', 'model.safetensors', 'generation_config.json', 'config.json', 'tokenizer.json', 'tokenizer_config.json']


In [4]:
import torch
from datasets import load_dataset
from transformers import AutoModelForCausalLM, AutoTokenizer
from torch.utils.data import DataLoader
import math
model_name = "gpt2"
finetuned_path = "gpt2-lora-merged"  # path to your fine-tuned model
block_size = 256
batch_size = 4

# baseline metrics from CODE 1 (update these with your actual values)
baseline_loss = 5.2251
baseline_ppl = 185.88

# load shakespeare
dataset = load_dataset("text", data_files="https://raw.githubusercontent.com/karpathy/char-rnn/master/data/tinyshakespeare/input.txt")
dataset = dataset["train"].train_test_split(test_size=0.1, seed=42)

tokenizer = AutoTokenizer.from_pretrained(model_name)
tokenizer.pad_token = tokenizer.eos_token

def tokenize_fn(examples):
    return tokenizer(examples["text"], add_special_tokens=False)

tokenized = dataset.map(tokenize_fn, batched=True, remove_columns=["text"])

def group_texts(examples):
    all_ids = sum(examples["input_ids"], [])
    all_masks = sum(examples["attention_mask"], [])
    total_len = (len(all_ids) // block_size) * block_size

    return {
        "input_ids": [all_ids[i:i+block_size] for i in range(0, total_len, block_size)],
        "attention_mask": [all_masks[i:i+block_size] for i in range(0, total_len, block_size)],
        "labels": [all_ids[i:i+block_size] for i in range(0, total_len, block_size)]
    }

lm_dataset = tokenized.map(group_texts, batched=True)

def collate_fn(batch):
    return {
        "input_ids": torch.tensor([x["input_ids"] for x in batch]),
        "attention_mask": torch.tensor([x["attention_mask"] for x in batch]),
        "labels": torch.tensor([x["labels"] for x in batch])
    }

test_loader = DataLoader(lm_dataset["test"], batch_size=batch_size, shuffle=False, collate_fn=collate_fn)

def calc_perplexity(model, loader):
    model.eval()
    total_loss = 0
    count = 0

    with torch.no_grad():
        for batch in loader:
            ids = batch["input_ids"].to("cuda")
            mask = batch["attention_mask"].to("cuda")
            labels = batch["labels"].to("cuda")

            out = model(input_ids=ids, attention_mask=mask, labels=labels)
            total_loss += out.loss.item() * ids.size(0)
            count += ids.size(0)

    avg_loss = total_loss / count
    return math.exp(avg_loss), avg_loss

# load fine-tuned model

print("EVALUATION - GPT-2 (After Fine-tuning)")


finetuned_model = AutoModelForCausalLM.from_pretrained(finetuned_path).to("cuda")
ft_ppl, ft_loss = calc_perplexity(finetuned_model, test_loader)

print(f"\nTest Loss: {ft_loss:.4f}")
print(f"Perplexity: {ft_ppl:.2f}")

# comparison

print("PERFORMANCE COMPARISON")


loss_change = ((baseline_loss - ft_loss) / baseline_loss) * 100
ppl_change = ((baseline_ppl - ft_ppl) / baseline_ppl) * 100

print(f"\nMetric Before After Change")

print(f"Loss:           {baseline_loss:.4f}      {ft_loss:.4f}      {loss_change:+.1f}%")
print(f"Perplexity:     {baseline_ppl:.2f}     {ft_ppl:.2f}      {ppl_change:+.1f}%")

if ppl_change > 0:
    print(f"\n Model improved! Perplexity decreased by {ppl_change:.1f}%")
else:
    print(f"\n Model got worse. Perplexity increased by {abs(ppl_change):.1f}%")

# generation comparison

print("GENERATION COMPARISON")

base_model = AutoModelForCausalLM.from_pretrained(model_name).to("cuda")
prompts = ["To be or not to be", "Once upon a time", "The king said"]

for p in prompts:
    print(f"\nPrompt: '{p}'")
    inp = tokenizer(p, return_tensors="pt").to("cuda")

    # before
    with torch.no_grad():
        base_out = base_model.generate(**inp, max_new_tokens=80, temperature=0.8, do_sample=True, top_p=0.9)
    print(f"BEFORE: {tokenizer.decode(base_out[0], skip_special_tokens=True)}")

    # after
    with torch.no_grad():
        ft_out = finetuned_model.generate(**inp, max_new_tokens=80, temperature=0.8, do_sample=True, top_p=0.9)
    print(f"AFTER:  {tokenizer.decode(ft_out[0], skip_special_tokens=True)}")
print("Evaluation complete!")


Map:   0%|          | 0/36000 [00:00<?, ? examples/s]

Map:   0%|          | 0/4000 [00:00<?, ? examples/s]

Map:   0%|          | 0/36000 [00:00<?, ? examples/s]

Map:   0%|          | 0/4000 [00:00<?, ? examples/s]

EVALUATION - GPT-2 (After Fine-tuning)

Test Loss: 4.3850
Perplexity: 80.24
PERFORMANCE COMPARISON

Metric Before After Change
Loss:           5.2251      4.3850      +16.1%
Perplexity:     185.88     80.24      +56.8%

 Model improved! Perplexity decreased by 56.8%
GENERATION COMPARISON


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.



Prompt: 'To be or not to be'


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


BEFORE: To be or not to be, if the individual is an atheist, he or she is not a member of the family or the church. A person who is an atheist may not be a member of the family or church. He or she must be married or a parent, parent, child, sibling or spouse of the person.

A person who is an atheist may not be a member of the family or church. He


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


AFTER:  To be or not to be, to be.What we are doing, we have done; but to be not.What, you say? and so you, that be in a hurry, I pray,You are not a thing to be afraid of.I am not so; and I'll have you tell me.GLOUCESTER:And with all his powers I will not do it.Away, sir

Prompt: 'Once upon a time'


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


BEFORE: Once upon a time the two factions met, the First, Second and Third Kingdoms were divided into two different governments. The First Kingdom, led by King Jarl Greymane, sought to secure the kingdom of Jarlswood, and the Second, Third and Fourth Kingdoms sought to secure the kingdom of Kingsport. Both governments were in a war of attrition between the two kingdoms, and the Kingsport Rebellion broke out


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


AFTER:  Once upon a time of war, I had to stay;I'll have you here, and so he shall be.But I am in haste to find my cause.I'll give you all your time, and your leave,But I have got your house to yourself.And then the land is gone, and I shall be gone.And how he hath made so many of them,KING RICHARD III:

Prompt: 'The king said'


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


BEFORE: The king said that if I had not become an apostle of the church, there would not be any place for me. He said that when I saw the King, I would not be able to come into the kingdom of God. And I said, What is it that I do not know? He said, That is, because I am not an apostle of the church. And I said, If I had not
AFTER:  The king said, 'What then?'--My lord,--what is it?GLOUCESTER:ROMEO:LUCIO:And be sure to keep him from me.That, by my own choice, my lord, is but an idle man,The king of England, the king of Wales,KING RICHARD II:KING RICHARD II:KING RICHARD III:
Evaluation complete!
