# Question 1: RNN

In [None]:
import itertools
import math
import time

import pandas as pd
import torch

from models.RNNModel import RNNModel
from training_testing.training import train_model
from utils.batchifier import batchify
from utils.tokenizer import Vocabulary, process_corpus, read_corpus

## Read and Process Data

In [None]:
batch_size = 32

In [None]:
train_file_path = "data/wiki2.train.txt"
valid_file_path = "data/wiki2.valid.txt"

In [None]:
train_processed_tokens = process_corpus(train_file_path)
train_vocab = Vocabulary()
train_vocab.build_vocab(train_processed_tokens)
train_numericalized_tokens = train_vocab.numericalize(train_processed_tokens)
train_numericalized_tokens.size()

In [None]:
train_batched_data = batchify(train_numericalized_tokens, batch_size)
train_batched_data.shape

In [None]:
valid_processed_tokens = read_corpus(valid_file_path)
valid_numericalized_tokens = train_vocab.numericalize(valid_processed_tokens)
valid_numericalized_tokens.size()

In [None]:
valid_batched_data = batchify(valid_numericalized_tokens, batch_size)
valid_batched_data.shape

## Training

In [None]:
bptt_values = [30, 20, 10]
clip_threshold_values = [0.25]
log_interval = 1000
lr_values = [0.01]
epochs = 22
dropout_values = [0.3, 0.5]
ntokens = train_vocab.size

criterion = torch.nn.CrossEntropyLoss()

In [None]:
hyperparam_combinations = list(
    itertools.product(bptt_values, clip_threshold_values, lr_values, dropout_values)
)

In [None]:
results = []

for combination in hyperparam_combinations:
    bptt, clip_threshold, lr, dropout = combination

    model = RNNModel(ntokens=ntokens, ninp=100, nhid=100, dropout=dropout)
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)

    train_loss, val_loss, train_ppl, val_ppl = train_model(
        model=model,
        train_batched_data=train_batched_data,
        valid_batched_data=valid_batched_data,
        epochs=epochs,
        batch_size=batch_size,
        bptt=bptt,
        criterion=criterion,
        optimizer=optimizer,
        clip_threshold=clip_threshold,
        log_interval=log_interval,
        lr=lr,
        dropout=dropout,
    )

    results.append(
        {
            "bptt": bptt,
            "clip_threshold": clip_threshold,
            "lr": lr,
            "dropout": dropout,
            "epochs": epochs,
            "train_loss": train_loss,
            "val_loss": val_loss,
            "train_ppl": train_ppl,
            "val_ppl": val_ppl,
        }
    )

In [None]:
df_results = pd.DataFrame(results)

In [None]:
df_results.to_csv("results/rnn_hyperparam_results.csv", index=False)

## Training best model

In [None]:
df_results = pd.read_csv("results/rnn_hyperparam_results.csv")

In [None]:
df_results

In [None]:
bptt = 30
clip_threshold = 0.25
log_interval = 1000
lr = 0.01
epochs = 20
dropout = 0.3
ntokens = train_vocab.size

In [None]:
model = RNNModel(ntokens=ntokens, ninp=100, nhid=100, dropout=dropout)
optimizer = torch.optim.Adam(model.parameters(), lr=lr)
criterion = torch.nn.CrossEntropyLoss()

In [None]:
train_losses = []
valid_losses = []
train_perplexities = []
valid_perplexities = []

In [None]:
_, _, _, _ = train_model(
    model=model,
    train_batched_data=train_batched_data,
    valid_batched_data=valid_batched_data,
    epochs=epochs,
    batch_size=batch_size,
    bptt=bptt,
    criterion=criterion,
    optimizer=optimizer,
    clip_threshold=clip_threshold,
    log_interval=log_interval,
    lr=lr,
    dropout=dropout,
    architecture="LSTM",
)

In [None]:
torch.save(model.state_dict(), "models/saved_weights/rnn_model_weights.pth")

## Testing Best Model

In [None]:
from utils.batchifier import get_batch
from utils.modeler import repackage_hidden

In [None]:
ntokens = train_vocab.size
dropout = 0.3
bptt = 20
criterion = torch.nn.CrossEntropyLoss()

In [None]:
model = RNNModel(ntokens=ntokens, ninp=100, nhid=100, dropout=dropout)
model.load_state_dict(torch.load("models/saved_weights/rnn_model_weights.pth"))

In [None]:
test_file_path = "data/wiki2.test.txt"

In [None]:
test_processed_tokens = read_corpus(test_file_path)
test_numericalized_tokens = train_vocab.numericalize(test_processed_tokens)
test_numericalized_tokens.size()

In [None]:
test_batched_data = batchify(test_numericalized_tokens, batch_size)
test_batched_data.shape

In [None]:
test_loss = 0.0
with torch.no_grad():
    model.eval()
    hidden = model.init_hidden(batch_size)

    for i in range(0, test_batched_data.size(0) - 1, bptt):
        test_data, test_targets = get_batch(test_batched_data, i, bptt)
        test_output, hidden = model(test_data, hidden)
        hidden = repackage_hidden(hidden)
        loss = criterion(test_output.view(-1, ntokens), test_targets)
        test_loss += loss.item()

avg_test_loss = test_loss / (test_batched_data.size(0) // bptt)
avg_test_ppl = math.exp(avg_test_loss)

print(f"Test Loss: {avg_test_loss:.4f} | Test PPL: {avg_test_ppl:.2f}")