# Collaborative Filtering with Neural Nets

In [2]:
# for data manipulation
import numpy as np
import pandas as pd
import os
import pickle

# use surprise for collaborative filtering
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.autograd import Variable


# plot
import matplotlib.pyplot as plt

## Read in data

In [64]:
game_data_path = "data/neural_net_data_short/"
files = sorted(os.listdir(game_data_path))

X_train = np.zeros(5)

for file in files[:-2]:
    if ".pkl" not in file: continue
    
    with open(game_data_path + file, 'rb') as f:
        print(file)
        if X_train.shape[0] == 5:
            X_train, y_train = pickle.load(f, encoding='latin1')
        else:
            X_add, y_add = pickle.load(f, encoding='latin1')
            print(X_train.shape)
            print(X_add.shape)
            X_train = np.concatenate((X_train, X_add), axis = 0)
            y_train = np.concatenate((y_train, y_add), axis = 0)

with open(game_data_path + files[-2], 'rb') as f:
        print(files[-2])
        X_val, y_val = pickle.load(f, encoding='latin1')
        
with open(game_data_path + files[-1], 'rb') as f:
        print(files[-1])
        X_test, y_test = pickle.load(f, encoding='latin1')

2007-2008.pkl
2008-2009.pkl
(1267, 3, 240)
(1266, 3, 240)
2009-2010.pkl
(2533, 3, 240)
(1263, 3, 240)
2010-2011.pkl
(3796, 3, 240)
(1263, 3, 240)
2012-2013.pkl
(5059, 3, 240)
(1265, 3, 240)
2013-2014.pkl
(6324, 3, 240)
(1272, 3, 240)
2014-2015.pkl
(7596, 3, 240)
(1263, 3, 240)
2015-2016.pkl
(8859, 3, 240)
(1269, 3, 240)
2016-2017.pkl
2017-2018.pkl


## Clean Data

In [65]:
def clean_data(X, y):
    X = X[y > 0]
    y = y[y > 0]
    
    return X, y

In [66]:
X_train, y_train = clean_data(X_train, y_train)
X_val, y_val = clean_data(X_val, y_val)
X_test, y_test = clean_data(X_test, y_test)

## LSTM Model

In [67]:
#### Specify the model architecture
class LSTMModel(nn.Module):

    def __init__(self, input_dim, hidden_dim, target_size, num_layers, batch_size, time_steps):
        super(LSTMModel, self).__init__()
        
        self.hidden_dim = hidden_dim
        self.num_layers = num_layers
        self.batch_size = batch_size
        self.time_steps = time_steps
        
        # Initialize LSTM unit
        self.lstm = nn.LSTM(input_size=input_dim, hidden_size=hidden_dim, num_layers=num_layers, batch_first=False)

        # The linear layer that maps from hidden state space to tag space
        self.hidden2out = nn.Linear(hidden_dim, target_size)
        self.hidden = self.init_hidden()

    def init_hidden(self):
        # Before we've done anything, we dont have any hidden state.
        # Refer to the Pytorch documentation to see exactly
        # why they have this dimensionality.
        # The axes semantics are (num_layers, minibatch_size , hidden_dim)
        return (torch.zeros(self.num_layers, self.time_steps, self.hidden_dim),
                torch.zeros(self.num_layers, self.time_steps, self.hidden_dim))
        
        return (torch.zeros(self.num_layers, self.batch_size, self.hidden_dim),
                torch.zeros(self.num_layers, self.batch_size, self.hidden_dim))

    def forward(self, input_seq):
        lstm_out, self.hidden = self.lstm(input_seq, self.hidden)
        
        pred = self.hidden2out(lstm_out)
        
        return pred

In [71]:
## Define the model
model = LSTMModel(input_dim = 240,
                     hidden_dim = 50,
                     target_size = 1,
                     num_layers = 2,
                     batch_size = 10, 
                     time_steps = 3)
                     
loss_function = nn.MSELoss()
optimizer = optim.SGD(model.parameters(), lr=0.01)

In [72]:
losses = []
val_losses = []

for epoch in range(5000):   # again, normally you would NOT do 300 epochs, it is toy data
    train_loss = 0
    for i in range(0, len(X_train), model.batch_size):
        if i + model.batch_size >= len(X_train) : continue
        if i > 100: break
        
        #Pytorch accumulates gradients. We need to clear them out before each instance
        model.zero_grad()

        # Also, we need to clear out the hidden state of the LSTM, detaching it from its history on the last instance.
        model.hidden = model.init_hidden()

        # Step 2. Get our inputs ready for the network.
        batch_input = X_train[i : i + model.batch_size] #.reshape((X.shape[1], model.batch_size, X.shape[2]))
        batch = Variable(torch.from_numpy(batch_input)).type(torch.FloatTensor)
                                                    
        targets = Variable(torch.from_numpy(y[i : i + model.batch_size])).type(torch.FloatTensor)

        # Step 3. Run our forward pass.
        scores = model(batch)
        scores = scores[:, -1].reshape((model.batch_size)) # we only care about the last output

        # Step 4. Compute the loss, gradients, and update the parameters by
        #  calling optimizer.step()
        loss = loss_function(scores, targets)
        loss.backward()
        optimizer.step()
        
        optimizer.zero_grad()   # clear gradients for next train
        
        train_loss += loss.detach().numpy()
        
    ## validation loss
    if (epoch + 1) % 50 == 0:
        print("----------")
        print("Losses after {} iterations:".format(epoch))
        print("Train: {}".format(loss.detach().numpy()))
#        with torch.no_grad():
#             batch_input = val_X
#             batch = Variable(torch.from_numpy(batch_input)).type(torch.FloatTensor)
#             targets = Variable(torch.from_numpy(val_y)).type(torch.FloatTensor)
#             scores = model(batch)
#             scores = scores[:, -1].reshape((len(val_y))) # we only care about the last output
#             val_loss = loss_function(scores, targets)
#             print("Val: {}".format(val_loss))
#             val_losses.append(val_loss)
#             losses.append(train_loss/len(X))

----------
Losses after 49 iterations:
Train: 4647.05224609375
----------
Losses after 99 iterations:
Train: 4645.2138671875
----------
Losses after 149 iterations:
Train: 4619.85546875
----------
Losses after 199 iterations:
Train: 4597.67041015625
----------
Losses after 249 iterations:
Train: 4581.478515625
----------
Losses after 299 iterations:
Train: 4612.57568359375
----------
Losses after 349 iterations:
Train: 4629.36767578125
----------
Losses after 399 iterations:
Train: 4643.70458984375
----------
Losses after 449 iterations:
Train: 4632.33349609375
----------
Losses after 499 iterations:
Train: 4637.7353515625
----------
Losses after 549 iterations:
Train: 4594.4013671875
----------
Losses after 599 iterations:
Train: 4419.1787109375
----------
Losses after 649 iterations:
Train: 4283.677734375
----------
Losses after 699 iterations:
Train: 4405.4775390625
----------
Losses after 749 iterations:
Train: 4449.09912109375
----------
Losses after 799 iterations:
Train: 4474.68

In [None]:
# See what the scores are after training
with torch.no_grad():
    batch_input = val_X
    batch = Variable(torch.from_numpy(batch_input)).type(torch.FloatTensor)
    targets = Variable(torch.from_numpy(val_y)).type(torch.FloatTensor)
    scores = model(batch)
    scores = scores[:, -1].reshape((len(val_y))) # we only care about the last output
    val_loss = loss_function(scores, targets)
    print(val_loss)