In [None]:
from transformers import AutoTokenizer, AutoModelForCausalLM, Trainer, TrainingArguments
import pandas as pd

# Load dataset
df = pd.read_csv("data/outreach_messages.csv")

# Load pre-trained model and tokenizer
model_name = "gpt2"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name)

# Tokenize dataset
def tokenize_function(row):
    return tokenizer(f"Write a personalized message to {row['prospect_name']} at {row['company']}: {row['message']}", truncation=True)

tokenized_data = df.apply(tokenize_function, axis=1).tolist()

# Define training arguments
training_args = TrainingArguments(
    output_dir="./fine_tuned_model",
    num_train_epochs=3,
    per_device_train_batch_size=4,
    save_steps=10_000,
    save_total_limit=2,
)

# Train the model
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_data,
)

trainer.train()
trainer.save_model("./fine_tuned_model")