# Question 2: Generative Model

In [None]:
import os
import warnings

import torch
from transformers import AdamW, GPT2LMHeadModel, GPT2Tokenizer
from utils.data_loader_gen import MultipleChoiceDataloader, read_file, read_json_data
from utils.printing import print_rouge_scores
from utils.train_gen import train_loop
from utils.valid_gen import valid_loop

In [None]:
warnings.filterwarnings("ignore")

In [None]:
NUM_EPOCHS = 3
BATCH_SIZE = 50
LOG_EVERY = 250
TEST_MODE = True

## Dataloader

In [None]:
tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
tokenizer.pad_token = tokenizer.eos_token

In [None]:
train_file_name = "data/train_complete.jsonl"
dev_file_name = "data/dev_complete.jsonl"
test_file_name = "data/test_complete.jsonl"

In [None]:
train_json = read_file(train_file_name)
dev_json = read_file(dev_file_name)
test_json = read_file(test_file_name)

In [None]:
train_dataset = read_json_data(train_json)
dev_dataset = read_json_data(dev_json, permute=False)
test_dataset = read_json_data(test_json, permute=False)

In [None]:
train_dataloader = MultipleChoiceDataloader(
    data=train_dataset, batch_size=BATCH_SIZE, tokenizer=tokenizer
)
dev_dataloader = MultipleChoiceDataloader(
    data=dev_dataset, batch_size=BATCH_SIZE, tokenizer=tokenizer
)
test_dataloader = MultipleChoiceDataloader(
    data=test_dataset, batch_size=BATCH_SIZE, tokenizer=tokenizer
)

## Model Training

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [None]:
model = GPT2LMHeadModel.from_pretrained("gpt2").to(device)
model.config.dropout_rate = 0.3

In [None]:
optimizer = AdamW(model.parameters(), lr=1e-5, weight_decay=0.01)
loss_fn = torch.nn.CrossEntropyLoss(reduction='none')

In [None]:
epoch_train_losses = []
epoch_train_rouge_scores = []
epoch_valid_rouge_scores = []

for epoch in range(NUM_EPOCHS):
    print(f"Epoch {epoch+1}/{NUM_EPOCHS}")
    print("Training...")
    train_dataloader.shuffle_data()
    train_metrics = train_loop(
        train_dataloader,
        tokenizer,
        model,
        loss_fn,
        optimizer,
        device,
        LOG_EVERY,
    )
    epoch_train_losses.append(train_metrics[0])
    epoch_train_rouge_scores.append(train_metrics[1])
    print(f"--Train ROUGE Metrics--")
    print_rouge_scores(train_metrics[1])

    print("Validating...")
    valid_metrics = valid_loop(dev_dataloader, tokenizer, model, device, LOG_EVERY)
    epoch_valid_rouge_scores.append(valid_metrics)
    print(f"--Valid ROUGE Metrics--")
    print_rouge_scores(valid_metrics)

In [None]:
model_save_path = f"model_weights/model_gen_epoch_{epoch+1}.pth"
torch.save(model.state_dict(), model_save_path)
print(f"Model saved to {model_save_path}")

Training for additional epochs.