In [1]:
import numpy as np
import torch
import pandas as pd
import matplotlib.pyplot as plt

In [2]:
import torch.nn as nn
import torch.nn.functional as functional
from torch.autograd import Variable

## Test - no user info

In [3]:
tmp = pd.read_csv('../../../data/input_formated/final/x_item_test.csv').iloc[:,1:]
x_test_0 = np.array(tmp[[u'movie_id', u'category_Action', u'category_Adventure',
       u'category_Animation', u'category_Children\'s', u'category_Comedy',
       u'category_Crime', u'category_Documentary', u'category_Drama',
       u'category_Fantasy', u'category_Film-Noir', u'category_Horror',
       u'category_Musical', u'category_Mystery', u'category_Romance',
       u'category_Sci-Fi', u'category_Thriller', u'category_War',
       u'category_Western']].drop_duplicates(), dtype = np.float32)[:, 1:]
print x_test_0.shape

(742, 18)


In [4]:
tmp = pd.read_csv('../../../data/input_formated/final/y_test.csv').iloc[:,1:]
y_test_0 = np.array(tmp[['movie_id', 'mean_rate']].drop_duplicates(), dtype = np.float32)[:, 1:].reshape((-1, 1))
print y_test_0.shape

(742, 1)


In [5]:
# Neural Network Model (1 hidden layer)
class ANN_1hidden(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(ANN_1hidden, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size) 
        self.activate = nn.Sigmoid()
        self.fc2 = nn.Linear(hidden_size, output_size)
        self.bn = nn.BatchNorm1d(output_size, output_size)
    
    def forward(self, x):
        out = self.fc1(x)
        out = self.activate(out)
        out = self.fc2(out)
        out = self.bn(out)
        return out

In [6]:
def rmse(y, y_hat):
    """Compute root mean squared error"""
    return torch.sqrt(torch.mean((y - y_hat).pow(2)))

In [7]:
model = torch.load('baseline.pth')

In [8]:
inputs = Variable(torch.from_numpy(x_test_0))
predict = model(Variable(torch.from_numpy(x_test_0))) * 5
targets = Variable(torch.from_numpy(np.array(y_test_0 * 5, dtype=np.float32)))
baseline = rmse(predict, targets).sum().data[0]
baseline

0.6170470714569092

### All rate

In [9]:
class ANN_1input(nn.Module):
    def __init__(self, input_0_size, hidden_size, output_size):
        super(ANN_1input, self).__init__()
        self.fc0 = nn.Linear(input_0_size, hidden_size) 
        self.activate0 = nn.Sigmoid()
        self.fc = nn.Linear(hidden_size, output_size)
        self.bn = nn.BatchNorm1d(output_size, output_size)
        self.activate = nn.Sigmoid()
        
        
    def forward(self, x):
        h = self.activate0(self.fc0(x))
        return self.activate(self.bn(self.fc(h)))

In [10]:
def test_withoutuser(model, x_0, y):
    inputs_0 = Variable(torch.from_numpy(np.array(x_0, dtype = np.float32)))
    outputs = model(inputs_0).data[:,0]
    tmp = y
    tmp['output'] = outputs
    target = np.array(tmp.groupby('movie_id')['mean_rate'].mean().reset_index() * 5, dtype = np.float32)[:, 1].reshape((-1, 1))
    output = np.array(tmp.groupby(['movie_id'])['output'].mean().reset_index() * 5, dtype = np.float32)[: ,1].reshape((-1,1))
    return np.sqrt(np.mean((target - output) * (target - output)))

In [11]:
x_item_train = pd.read_csv('../../../data/input_formated/final/x_item_train.csv').iloc[:, 1:]
x_user_train = pd.read_csv('../../../data/input_formated/final/x_user_train.csv').iloc[:, 1:]
y_train = pd.read_csv('../../../data/input_formated/final/y_train.csv').iloc[:, 1:]


In [12]:
x_item_test = pd.read_csv('../../../data/input_formated/final/x_item_test.csv').iloc[:, 1:]
x_user_test = pd.read_csv('../../../data/input_formated/final/x_user_test.csv').iloc[:, 1:]
y_test = pd.read_csv('../../../data/input_formated/final/y_test.csv').iloc[:, 1:]


In [13]:
model_withoutuser = torch.load('final_without-user/ex-1_70.pth')

In [14]:
test_withoutuser(model_withoutuser, x_item_train.iloc[:, 2:], y_train)

0.62964219

In [15]:
nouser = test_withoutuser(model_withoutuser, x_item_test.iloc[:, 2:], y_test)
nouser

0.59877807

## Test - With User

In [16]:
def test_withuser(model, x_0, x_1, y):
    inputs_0 = Variable(torch.from_numpy(np.array(x_0, dtype = np.float32)))
    inputs_1 = Variable(torch.from_numpy(np.array(x_1, dtype = np.float32)))
    outputs = model(inputs_0, inputs_1).data[:,0]
    tmp = y
    tmp['output'] = outputs
    target = np.array(tmp.groupby('movie_id')['mean_rate'].mean().reset_index() * 5, dtype = np.float32)[:, 1].reshape((-1, 1))
    output = np.array(tmp.groupby(['movie_id'])['output'].mean().reset_index() * 5, dtype = np.float32)[: ,1].reshape((-1,1))
    return np.sqrt(np.mean((target - output) * (target - output)))

In [17]:
class ANN_2input(nn.Module):
    def __init__(self, input_0_size, input_1_size, hidden_size, merge_size, output_size):
        super(ANN_2input, self).__init__()
        self.fc0 = nn.Linear(input_0_size, hidden_size) 
        self.fc1 = nn.Linear(input_1_size, hidden_size)  
        self.activate0 = nn.Sigmoid()
        self.activate1 = nn.Sigmoid()
        self.fc_merge0 = nn.Linear(hidden_size, merge_size)
        self.fc_merge1 = nn.Linear(hidden_size, merge_size)
        self.bn_merge = nn.BatchNorm1d(merge_size, merge_size)
        self.activate_merge = nn.Sigmoid()
        self.fc = nn.Linear(merge_size, output_size)
        self.bn = nn.BatchNorm1d(output_size, output_size)
        self.activate = nn.Sigmoid()
        
        
    def forward(self, x_0, x_1):
        h_0 = self.activate0(self.fc0(x_0))
        h_1 = self.activate1(self.fc1(x_1))
        h = self.activate_merge(self.bn_merge(self.fc_merge0(h_0) + self.fc_merge1(h_1)))
        return self.activate(self.bn(self.fc(h)))

In [18]:
model_withuser = torch.load('final_with-user/ex-1_170.pth')

In [19]:
test_withuser(model_withuser, x_item_train.iloc[:, 2:], x_user_train.iloc[:, 2:], y_train)

0.62205952

In [20]:
withuser = test_withuser(model_withuser, x_item_test.iloc[:, 2:], x_user_test.iloc[:, 2:], y_test)
withuser

0.5875532

In [27]:
for i in range(740, 970, 10):
    model_withuser = torch.load('final_with-user/ex-2_%d.pth' % i)
    print test_withuser(model_withuser, x_item_test.iloc[:, 2:], x_user_test.iloc[:, 2:], y_test)

0.592821
0.592196
0.591634
0.591132
0.590687
0.590295
0.589956
0.589669
0.589434
0.589252
0.589124
0.589053
0.589039
0.589084
0.589186
0.589345
0.589555
0.589813
0.59012
0.590474
0.590875
0.591319
0.591801


In [22]:
100 - withuser * 100.0 / nouser

1.8746287638382029

In [23]:
100 - nouser * 100.0 / baseline

2.960714552044891

In [24]:
100 - withuser * 100.0 / baseline

4.7798409092753218

In [25]:
print baseline, nouser, withuser

0.617047071457 0.598778 0.587553


In [26]:
print nouser - withuser, baseline - nouser

0.0112249 0.0182690024376
