# Collaborative Filtering with Neural Nets

In [2]:
# for data manipulation
import numpy as np
import pandas as pd
import os
import pickle

# use surprise for collaborative filtering
import torch
import torch.nn as nn
from torch.autograd import Variable

## Read in data

In [5]:
game_data_path = "data/neural_net_data/"
files = os.listdir(game_data_path)
with open(game_data_path + files[0], 'rb') as f:
    X, y = pickle.load(f)

In [41]:
X.shape

(1004, 3, 504)

In [42]:
y.shape

(1004,)

In [39]:
def trainValSplit(X, y):
    X = np.array(X)
    y = np.array(y)

    X = X[y > 0]
    y = y[y > 0]

    p = np.random.permutation(len(X))
    X = X[p]
    y = y[p]

    val = 0.2
    val = round(len(X) * val)
    X_val = X[:val]
    y_val = y[:val]
    X = X[val:]
    y = y[val:]
    
    return X, y, X_val, y_val

In [40]:
X, y, X_val, y_val = trainValSplit(X, y)

In [43]:
y = torch.from_numpy(y[:,np.newaxis]).type(torch.FloatTensor)
X = torch.from_numpy(X.reshape((X.shape[0], -1))).type(torch.FloatTensor)
y_val = torch.from_numpy(y_val[:,np.newaxis]).type(torch.FloatTensor)
X_val = torch.from_numpy(X_val.reshape((X_val.shape[0], -1))).type(torch.FloatTensor)

In [44]:
# Split train/test:
print(y[1:10])

tensor([[213.],
        [210.],
        [202.],
        [191.],
        [247.],
        [207.],
        [167.],
        [229.],
        [179.]])


In [106]:
import torch
import torch.nn.functional as F
import matplotlib.pyplot as plt

class Net(torch.nn.Module):
    def __init__(self, n_feature, n_output):
        super(Net, self).__init__()
        
        # layer 1 fully connected 150 units
        self.lin1 = nn.Linear(n_feature, 500)
        
        # layer 2 fully connected 50 units
        self.lin2 = nn.Linear(500, 100)
        
        # layer 3 fully connected 1 unit (output)
        self.lin3 = nn.Linear(100, n_output)
        
        # dropouts
        self.drop1 = nn.Dropout(0.5)
        self.drop2 = nn.Dropout(0.4)
        self.drop3 = nn.Dropout(0.25)

    def forward(self, x):
        # perform dropout on input vector embeddings
        # x = self.drop1(x)
        x = F.relu(self.lin1(x))
        # x = self.drop2(F.relu(self.lin1(x)))
        x = F.relu(self.lin2(x))
        # x = self.drop3(F.relu(self.lin2(x)))
        x = self.lin3(x)
        
        return x  

net = Net(n_feature=1512, n_output=1)     # define the network
print(net)  # net architecture

optimizer = torch.optim.SGD(net.parameters(), lr=0.000001)
loss_func = torch.nn.MSELoss()  # this is for regression mean squared loss

# plt.ion()   # something about plotting

for t in range(500):
    prediction = net(X)     # input x and predict based on x

    loss = loss_func(prediction, y)     # must be (1. nn output, 2. target)

    optimizer.zero_grad()   # clear gradients for next train
    loss.backward()         # backpropagation, compute gradients
    optimizer.step()        # apply gradients
    
    # Do validation loss
    with torch.no_grad():
        pred_val = net(X_val)
        loss_val = loss_func(pred_val, y_val)

    if t % 5 == 0:
        # plot and show learning process
        '''
        plt.cla()
        plt.scatter(x.data.numpy(), y.data.numpy())
        plt.plot(x.data.numpy(), prediction.data.numpy(), 'r-', lw=5)
        plt.text(0.5, 0, 'Loss=%.4f' % loss.data.numpy(), fontdict={'size': 20, 'color':  'red'})
        plt.pause(0.1)
        '''
        print('Loss=%.4f' % loss.data.numpy())
        print('Val Loss=%.4f' % loss_val.data.numpy())

# plt.ioff()
# plt.show()

Net(
  (lin1): Linear(in_features=1512, out_features=500, bias=True)
  (lin2): Linear(in_features=500, out_features=100, bias=True)
  (lin3): Linear(in_features=100, out_features=1, bias=True)
  (drop1): Dropout(p=0.5)
  (drop2): Dropout(p=0.4)
  (drop3): Dropout(p=0.25)
)
Loss=40131.9453
Loss=536.7881
Loss=495.8045
Loss=489.6784
Loss=483.7281
Loss=477.7469
Loss=471.0899
Loss=465.7350
Loss=460.8159
Loss=456.0045
Loss=451.1470
Loss=446.2653
Loss=441.5219
Loss=436.9084
Loss=432.3907
Loss=427.9353
Loss=423.5046
Loss=419.0998
Loss=414.7785
Loss=410.5339
Loss=406.3308
Loss=402.1418
Loss=397.9559
Loss=393.7902
Loss=389.6584
Loss=385.5572
Loss=381.4409
Loss=377.2537
Loss=372.9801
Loss=368.7366
Loss=364.6370
Loss=360.6323
Loss=356.6897
Loss=352.7888
Loss=348.9171
Loss=345.0619
Loss=341.2221
Loss=337.3980
Loss=333.5872
Loss=329.7929
Loss=326.0143
Loss=322.2527
Loss=318.5057
Loss=314.7734
Loss=311.0550
Loss=307.3494
Loss=303.6575
Loss=299.9797
Loss=296.3138
Loss=292.6609
Loss=289.0222
Loss=285.3