In [1]:
!pip install transformers datasets




In [3]:
from transformers import GPT2LMHeadModel, GPT2Tokenizer, Trainer, TrainingArguments
from datasets import Dataset

# Step 1: Load the pre-trained model and tokenizer
model = "gpt2"
model = GPT2LMHeadModel.from_pretrained("gpt2")
tokenizer = GPT2Tokenizer.from_pretrained("gpt2")

tokenizer.pad_token = tokenizer.eos_token

# Step 2: Prepare the dataset
# For demonstration purposes, we will create a small dataset
data = {
    "text": [
        "Hello, how can I help you?",
        "I am looking for a restaurant recommendation.",
        "Sure, what type of cuisine do you prefer?",
        "I like Italian food.",
        "There is a great Italian restaurant nearby called Luigi's."
    ]
}

# Convert to Hugging Face dataset
dataset = Dataset.from_dict(data)
dataset = dataset.train_test_split(test_size=0.1)  # Split into train and test sets

# Tokenize the dataset
def tokenize_function(examples):
    tokenized = tokenizer(examples["text"], padding="max_length", truncation=True, max_length=64)
    tokenized["labels"] = tokenized["input_ids"].copy()
    return tokenized


tokenized_datasets = dataset.map(tokenize_function, batched=True)

# Step 3: Fine-tune the model
training_args = TrainingArguments(
    output_dir="./results",
    evaluation_strategy="epoch",
    learning_rate=2e-5,
    per_device_train_batch_size=2,
    per_device_eval_batch_size=2,
    num_train_epochs=3,
    weight_decay=0.01,
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_datasets["train"],
    eval_dataset=tokenized_datasets["test"],
)

trainer.train()

# Step 4: Save the fine-tuned model
model.save_pretrained("./fine-tuned-gpt2")
tokenizer.save_pretrained("./fine-tuned-gpt2")

print("Model fine-tuning complete and saved!")


Map:   0%|          | 0/4 [00:00<?, ? examples/s]

Map:   0%|          | 0/1 [00:00<?, ? examples/s]

Epoch,Training Loss,Validation Loss
1,No log,6.550166
2,No log,5.057351
3,No log,4.362455


Model fine-tuning complete and saved!
