## Imports

In [None]:
from model_training import RNNLanguageModel
import numpy as np
import os
import pickle
import random
import torch
from tqdm import tqdm

## Setup

In [None]:
# Device agnostic code
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Set seeds
seed = 42
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
torch.cuda.manual_seed_all(seed)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
torch.backends.cudnn.enabled = False
random.seed(seed)
np.random.seed(seed)

## Load Data

In [None]:
with open(os.path.normpath(os.path.join("..", "data", "wikitext2_data.pkl")), "rb") as f:
    converted_tokenised_docs = pickle.load(f)

with open(os.path.normpath(os.path.join("..", "data", "wikitext2_train_vocab.pkl")), "rb") as f:
    train_vocab = pickle.load(f)

## Set Model Hyperparameters

In [None]:
vocab_size = len(train_vocab)
EMBEDDING_SIZE = 128
HIDDEN_SIZE = 256
NUM_LAYERS = 2

## Set Training Hyperparameters

In [None]:
SEQUENCE_LENGTH = 30
BATCH_SIZE = 32
NUM_EPOCHS = 10
LEARNING_RATE = 0.001
PATIENCE = 10       # For early stopping. Represents max number of consecutive epochs where val loss does not improve, before early stopping is triggered

## RNN

In [None]:
rnn = RNNLanguageModel(vocab_size=vocab_size, embed_size=EMBEDDING_SIZE, hidden_size=HIDDEN_SIZE, num_layers=NUM_LAYERS, pad_idx=train_vocab["<pad>"])
rnn_save_name = "rnn"

## LSTM