# Collaborative Filtering with Neural Nets

In [54]:
# for data manipulation
import numpy as np
import pandas as pd
import os
import pickle
from tqdm import tqdm

# use surprise for collaborative filtering
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.autograd import Variable


# plot
import matplotlib.pyplot as plt

## Read in data

In [71]:
game_data_path = "data/neural_net_data/"
files = sorted(os.listdir(game_data_path))

files = ["2012-2013.pkl", "2013-2014.pkl", "2014-2015.pkl", "2015-2016.pkl","2016-2017.pkl", "2017-2018.pkl"]

X_train = np.zeros(5)

for file in files[:-2]:
    if ".pkl" not in file: continue
    
    with open(game_data_path + file, 'rb') as f:
        print(file)
        if X_train.shape[0] == 5:
            X_train, y_train = pickle.load(f, encoding='latin1')
        else:
            X_add, y_add = pickle.load(f, encoding='latin1')
            print(X_train.shape)
            print(X_add.shape)
            X_train = np.concatenate((X_train, X_add), axis = 0)
            y_train = np.concatenate((y_train, y_add), axis = 0)

with open(game_data_path + files[-2], 'rb') as f:
        print(files[-2])
        X_val, y_val = pickle.load(f, encoding='latin1')
        
with open(game_data_path + files[-1], 'rb') as f:
        print(files[-1])
        X_test, y_test = pickle.load(f, encoding='latin1')

2012-2013.pkl
2013-2014.pkl
(1265, 3, 508)
(1272, 3, 508)
2014-2015.pkl
(2537, 3, 508)
(1263, 3, 508)
2015-2016.pkl
(3800, 3, 508)
(1269, 3, 508)
2016-2017.pkl
2017-2018.pkl


## Clean Data

In [73]:
def clean_data(X, y):
    X = X[y > 0]
    y = y[y > 0]
    
    return X, y

In [74]:
X_train, y_train = clean_data(X_train, y_train)
X_val, y_val = clean_data(X_val, y_val)
X_test, y_test = clean_data(X_test, y_test)

## LSTM Model

In [75]:
#### Specify the model architecture
class LSTMModel(nn.Module):

    def __init__(self, input_dim, hidden_dim, target_size, num_layers, batch_size, time_steps, dropout):
        super(LSTMModel, self).__init__()
        
        self.hidden_dim = hidden_dim
        self.num_layers = num_layers
        self.batch_size = batch_size
        self.time_steps = time_steps
        self.dropout = dropout
        
        # Initialize LSTM unit
        self.lstm = nn.LSTM(input_size=input_dim, hidden_size=hidden_dim, num_layers=num_layers, batch_first=False)

        # The linear layer that maps from hidden state space to tag space
        self.hidden2out = nn.Linear(hidden_dim, target_size)
        self.hidden = self.init_hidden()
        
        self.drop = nn.Dropout(dropout)

    def init_hidden(self):
        # Before we've done anything, we dont have any hidden state.
        # Refer to the Pytorch documentation to see exactly
        # why they have this dimensionality.
        # The axes semantics are (num_layers, minibatch_size , hidden_dim)
        return (torch.zeros(self.num_layers, self.time_steps, self.hidden_dim),
                torch.zeros(self.num_layers, self.time_steps, self.hidden_dim))

    def forward(self, input_seq):
        lstm_out, self.hidden = self.lstm(input_seq, self.hidden)
        
        drop_out = self.drop(lstm_out)
        
        pred = self.hidden2out(drop_out)
        
        return pred

In [119]:
## Define the model
model = LSTMModel(input_dim = 508,
                     hidden_dim = 15,
                     target_size = 1,
                     num_layers = 2,
                     batch_size = 10, 
                     time_steps = 3,
                     dropout = 0.2
                 )
                     
loss_function = nn.MSELoss()
optimizer = optim.SGD(model.parameters(), lr=0.01)

In [120]:
losses = []
val_losses = []

for epoch in range(5000):
    train_loss = 0
    for i in range(0, len(X_train), model.batch_size):
        if i + model.batch_size >= len(X_train) : continue
        
        #Pytorch accumulates gradients. We need to clear them out before each instance
        model.zero_grad()

        # Also, we need to clear out the hidden state of the LSTM, detaching it from its history on the last instance.
        model.hidden = model.init_hidden()

        # Step 2. Get our inputs ready for the network.
        batch_input = X_train[i : i + model.batch_size] #.reshape((X.shape[1], model.batch_size, X.shape[2]))
        batch = Variable(torch.from_numpy(batch_input)).type(torch.FloatTensor)
                                                    
        targets = Variable(torch.from_numpy(y_train[i : i + model.batch_size])).type(torch.FloatTensor)

        # Step 3. Run our forward pass.
        scores = model(batch)
        scores = scores[:, -1].reshape((model.batch_size)) # we only care about the last output

        # Step 4. Compute the loss, gradients, and update the parameters by
        #  calling optimizer.step()
        loss = loss_function(scores, targets)
        loss.backward()
        optimizer.step()
        
        #optimizer.zero_grad()   # clear gradients for next train
        
        train_loss += loss.detach().numpy()
        
    ## validation loss
    if (epoch + 1) % 1 == 0:
        print("----------")
        print("Losses after {} iterations:".format(epoch + 1))
        n_batches = len(X_train)/model.batch_size
        print("Train: {}".format(train_loss/n_batches))
        with torch.no_grad():
            batch = Variable(torch.from_numpy(X_val)).type(torch.FloatTensor)
            targets = Variable(torch.from_numpy(y_val)).type(torch.FloatTensor)
            scores = model(batch)
            scores = scores[:, -1].reshape((len(y_val))) # we only care about the last output
            val_loss = loss_function(scores, targets)
            print("Val: {}".format(val_loss))
            val_losses.append(val_loss)
            losses.append(train_loss/n_batches)

----------
Losses after 1 iterations:
Train: 1255.0549522173835
Val: 1041.327880859375
----------
Losses after 2 iterations:
Train: 726.2589502594111
Val: 746.7875366210938
----------
Losses after 3 iterations:
Train: 597.5694204160125
Val: 621.0652465820312
----------
Losses after 4 iterations:
Train: 524.226513466415
Val: 615.1679077148438
----------
Losses after 5 iterations:
Train: 487.689893938221
Val: 572.86083984375
----------
Losses after 6 iterations:
Train: 463.28413303762017
Val: 573.881591796875
----------
Losses after 7 iterations:
Train: 440.418165153468
Val: 506.3221740722656
----------
Losses after 8 iterations:
Train: 414.465350570325
Val: 476.4446716308594
----------
Losses after 9 iterations:
Train: 401.25444685588604
Val: 511.23443603515625
----------
Losses after 10 iterations:
Train: 399.959305584659
Val: 487.2259216308594
----------
Losses after 11 iterations:
Train: 388.79255546626723
Val: 477.43731689453125
----------
Losses after 12 iterations:
Train: 383.7882

----------
Losses after 95 iterations:
Train: 370.3850030312733
Val: 460.06634521484375
----------
Losses after 96 iterations:
Train: 369.96007192281434
Val: 465.0225524902344
----------
Losses after 97 iterations:
Train: 371.27682116646315
Val: 461.7191162109375
----------
Losses after 98 iterations:
Train: 371.2206217791375
Val: 463.70111083984375
----------
Losses after 99 iterations:
Train: 370.58059830022563
Val: 456.49420166015625
----------
Losses after 100 iterations:
Train: 370.9482998301055
Val: 458.39984130859375
----------
Losses after 101 iterations:
Train: 370.77368158218576
Val: 456.3157043457031
----------
Losses after 102 iterations:
Train: 371.1057179763754
Val: 460.3294982910156
----------
Losses after 103 iterations:
Train: 370.1240917637184
Val: 459.9916687011719
----------
Losses after 104 iterations:
Train: 371.80735322213707
Val: 469.43902587890625
----------
Losses after 105 iterations:
Train: 371.13513349069024
Val: 456.73504638671875
----------
Losses after 1

KeyboardInterrupt: 

In [121]:
np.min(val_losses)

447.9333

In [122]:
losses[np.argmin(val_losses)]

371.8974472942479