# Training Notebook

This notebook handles model training for the chatbot.

In [None]:
import torch
from transformers import GPT2LMHeadModel, GPT2Tokenizer, Trainer, TrainingArguments
from src.data.dataset import ChatbotDataset
from src.models.train import train_model

# Load tokenizer and model
tokenizer = GPT2Tokenizer.from_pretrained('../models/tokenizer')
model = GPT2LMHeadModel.from_pretrained('../models/base')

# Load dataset
train_dataset = ChatbotDataset('../data/processed/train.jsonl', tokenizer)

# Training arguments
training_args = TrainingArguments(
    output_dir='../models/fine_tuned',
    num_train_epochs=3,
    per_device_train_batch_size=4,
    save_steps=500,
    save_total_limit=2,
    logging_dir='../logs',
)

# Train the model
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
)

trainer.train()
trainer.save_model('../models/fine_tuned/checkpoint-1000')

print("Training completed")