# RNN Classifier

**Note: There are some random processes within this notebook, so different runs of the notebook may result in different outcomes.**

**Note: This notebook assumes the data being loaded has already been randomly shuffled.**

In [11]:
import data_utils
import math
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import torch
import torch.nn as nn

from data_utils import BOWEncoding, WordEmbeddingEncoding, WordTokenDataset
from time import time
from torch.utils.data import DataLoader

## Load the Dataset

In [2]:
embeddings = data_utils.load_embeddings('./data/glove.6B/glove.6B.100d.txt',
                                        embedding_dim=100)

In [3]:
data = pd.read_json('./data/train_data.json', orient='records')


In [4]:
train_test_split = 0.95
split_idx = math.floor(len(data) * train_test_split)

train_data = data.iloc[0:split_idx]
valid_data = data.iloc[split_idx:]


In [5]:
emb_encoding = WordEmbeddingEncoding(data, embeddings)
emb_encoding.prepare()


In [8]:
bow_encoding = BOWEncoding(data, min_word_freq=5)
bow_encoding.prepare()


In [9]:
emb_train_dataset = WordTokenDataset(train_data, emb_encoding)
emb_train_dataset.prepare()

bow_train_dataset = WordTokenDataset(train_data, bow_encoding)
bow_train_dataset.prepare()


In [10]:
emb_valid_dataset = WordTokenDataset(valid_data, emb_encoding)
emb_valid_dataset.prepare()

bow_valid_dataset = WordTokenDataset(valid_data, bow_encoding)
bow_valid_dataset.prepare()


## Defining the Model

In [None]:
class Model(nn.Module):
    def __init__(self, encoding, hidden_size):
        super(Model, self).__init__()
        self.encoding = encoding
        
        self.input_size = encoding.n_inputs()
        self.output_size = encoding.n_classes()
        self.hidden_size = hidden_size
        
        if self.encoding.encoding_type == 'bow':
            self.encoding_layer = nn.Identity()
        else:
            self.encoding_layer = nn.EmbeddingBag.from_pretrained(encoding.embeddings, mode='sum')

        self.i2o = nn.Linear(self.input_size + hidden_size, self.output_size)
        self.i2h = nn.Linear(self.input_size + hidden_size, hidden_size)
        
    def forward(self, samples):
        pass
