<a href="https://colab.research.google.com/github/dietmarja/LLM-Elements/blob/main/Fine_Tuning_01.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Fine Tuning

In [9]:
import torch
from torch import nn, optim
from transformers import GPT2Tokenizer, GPT2LMHeadModel

In [10]:
# Initialize the model and tokenizer
tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
model = GPT2LMHeadModel.from_pretrained("gpt2")

In [11]:
# Add a padding token if it doesn't exist
if tokenizer.pad_token is None:
    tokenizer.add_special_tokens({"pad_token": tokenizer.eos_token})
    model.resize_token_embeddings(len(tokenizer))

In [12]:
# Prepare the data (we use a simply dummy dataset)
texts = ["Example sentence one.", "Example sentence two."]
inputs = tokenizer(
    texts, return_tensors="pt", padding=True, truncation=True, max_length=128
)


In [13]:
# Define the optimizer and loss function
optimizer = optim.AdamW(model.parameters(), lr=5e-5)
loss_fn = nn.CrossEntropyLoss()


In [17]:
# Training loop
epochs = 3
for epoch in range(epochs):
    model.train()
    for i in range(len(inputs["input_ids"])):
        input_ids = inputs["input_ids"][i].unsqueeze(0)
        attention_mask = inputs["attention_mask"][i].unsqueeze(0)

        outputs = model(input_ids, attention_mask=attention_mask, labels=input_ids)
        loss = outputs.loss

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # Print loss for every batch (we have here 1 sentence per batch)
        print(f"Epoch: {epoch}, Batch: {i}, Loss: {loss.item()}")


Epoch: 0, Batch: 0, Loss: 0.19336621463298798
Epoch: 0, Batch: 1, Loss: 0.7131404280662537
Epoch: 1, Batch: 0, Loss: 0.23843832314014435
Epoch: 1, Batch: 1, Loss: 3.7839481830596924
Epoch: 2, Batch: 0, Loss: 0.13806819915771484
Epoch: 2, Batch: 1, Loss: 0.3653947412967682
