# Collaborative Filtering with Neural Nets

In [1]:
# for data manipulation
import numpy as np
import pandas as pd
import os
import pickle

# use surprise for collaborative filtering
import torch
import torch.nn as nn
from torch.autograd import Variable

## Read in data

In [2]:
game_data_path = "data/neural_net_data/"
files = os.listdir(game_data_path)
with open(game_data_path + files[0], 'rb') as f:
    X, y = pickle.load(f)

In [3]:
X.shape

(1267, 3, 504)

In [4]:
y.shape

(1267,)

In [5]:
def trainValSplit(X, y):
    X = np.array(X)
    y = np.array(y)

    X = X[y > 0]
    y = y[y > 0]

    p = np.random.permutation(len(X))
    X = X[p]
    y = y[p]

    val = 0.2
    val = round(len(X) * val)
    X_val = X[:val]
    y_val = y[:val]
    X = X[val:]
    y = y[val:]
    
    return X, y, X_val, y_val

In [6]:
X, y, X_val, y_val = trainValSplit(X, y)

In [7]:
y = torch.from_numpy(y[:,np.newaxis]).type(torch.FloatTensor)
X = torch.from_numpy(X.reshape((X.shape[0], -1))).type(torch.FloatTensor)
y_val = torch.from_numpy(y_val[:,np.newaxis]).type(torch.FloatTensor)
X_val = torch.from_numpy(X_val.reshape((X_val.shape[0], -1))).type(torch.FloatTensor)

In [8]:
# Split train/test:
print(y[1:10])

tensor([[220.],
        [196.],
        [221.],
        [234.],
        [214.],
        [199.],
        [199.],
        [191.],
        [193.]])


In [None]:
import torch
import torch.nn.functional as F
import matplotlib.pyplot as plt

class Net(torch.nn.Module):
    def __init__(self, n_feature, n_output):
        super(Net, self).__init__()
        
        # layer 1 fully connected 150 units
        self.lin1 = nn.Linear(n_feature, 500)
        
        # layer 2 fully connected 50 units
        self.lin2 = nn.Linear(500, 100)
        
        # layer 3 fully connected 1 unit (output)
        self.lin3 = nn.Linear(100, n_output)
        
        # dropouts
        self.drop1 = nn.Dropout(0.5)
        self.drop2 = nn.Dropout(0.4)
        self.drop3 = nn.Dropout(0.25)

    def forward(self, x):
        # perform dropout on input vector embeddings
        # x = self.drop1(x)
        x = F.relu(self.lin1(x))
        # x = self.drop2(F.relu(self.lin1(x)))
        x = F.relu(self.lin2(x))
        # x = self.drop3(F.relu(self.lin2(x)))
        x = self.lin3(x)
        
        return x  

net = Net(n_feature=1512, n_output=1)     # define the network
print(net)  # net architecture

optimizer = torch.optim.SGD(net.parameters(), lr=0.000001)
loss_func = torch.nn.MSELoss()  # this is for regression mean squared loss

# plt.ion()   # something about plotting

for t in range(5000):
    prediction = net(X)     # input x and predict based on x

    loss = loss_func(prediction, y)     # must be (1. nn output, 2. target)

    optimizer.zero_grad()   # clear gradients for next train
    loss.backward()         # backpropagation, compute gradients
    optimizer.step()        # apply gradients
    
    # Do validation loss
    with torch.no_grad():
        pred_val = net(X_val)
        loss_val = loss_func(pred_val, y_val)

    if t % 50 == 0:
        # plot and show learning process
        '''
        plt.cla()
        plt.scatter(x.data.numpy(), y.data.numpy())
        plt.plot(x.data.numpy(), prediction.data.numpy(), 'r-', lw=5)
        plt.text(0.5, 0, 'Loss=%.4f' % loss.data.numpy(), fontdict={'size': 20, 'color':  'red'})
        plt.pause(0.1)
        '''
        print("-----")
        print(t)
        print('Loss=%.4f' % loss.data.numpy())
        print('Val Loss=%.4f' % loss_val.data.numpy())

# plt.ioff()
# plt.show()

Net(
  (lin1): Linear(in_features=1512, out_features=500, bias=True)
  (lin2): Linear(in_features=500, out_features=100, bias=True)
  (lin3): Linear(in_features=100, out_features=1, bias=True)
  (drop1): Dropout(p=0.5)
  (drop2): Dropout(p=0.4)
  (drop3): Dropout(p=0.25)
)
Loss=35566.2539
Val Loss=18328.8203
Loss=1673.8973
Val Loss=2457.8010
Loss=879.7480
Val Loss=1054.4308
Loss=700.6231
Val Loss=799.0931
Loss=613.9303
Val Loss=683.0653
Loss=564.8362
Val Loss=621.2802
Loss=529.2913
Val Loss=577.1072
Loss=495.0400
Val Loss=534.9725
Loss=468.7276
Val Loss=503.0815
Loss=443.9324
Val Loss=473.0127
Loss=426.0743
Val Loss=451.5848
Loss=406.8761
Val Loss=428.6893
Loss=389.2118
Val Loss=407.7606
Loss=376.5348
Val Loss=393.1826
Loss=366.7103
Val Loss=382.3437
Loss=354.8146
Val Loss=368.7314
Loss=344.3857
Val Loss=357.1766
Loss=338.6741
Val Loss=351.2607
Loss=333.6332
Val Loss=346.2420
Loss=326.8127
Val Loss=338.7486
Loss=320.1507
Val Loss=331.6693
Loss=313.5034
Val Loss=324.6197
Loss=307.1900
V