In [2]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.parallel
import torch.optim as optim
import torch.utils.data
from torch.autograd import Variable

In [3]:
movies = pd.read_csv('ml-1m/movies.dat', sep='::', header = None, engine='python', encoding = 'latin-1')
user = pd.read_csv('ml-1m/users.dat', sep='::', header = None, engine='python', encoding = 'latin-1')
ratings = pd.read_csv('ml-1m/ratings.dat', sep='::', header = None, engine='python', encoding = 'latin-1')

In [4]:
# Creating training and test set
training_set = pd.read_csv('ml-100k/u1.base', delimiter='\t')
training_set = np.array(training_set, dtype='int')
test_set = pd.read_csv('ml-100k/u1.test', delimiter='\t')
test_set = np.array(test_set, dtype='int')

In [5]:
# Obtaining the total number of users and movies to create 2d matrix that contains the movies corresponding to each user

# Largest id for customer and movie might be on training or test set, thus this verification
nb_users = int(max(max(training_set[:,0]), max(test_set[:,0])))
nb_movies = int(max(max(training_set[:,1]), max(test_set[:,1])))

In [6]:
# Converting 2d array where x axis = users (observations) and y axis = movies (features), which is what the RBM expects
def convert(data):
#     Using lists for pytorch
    new_data = []
    for id_users in range(1,nb_users + 1):
#         Getting all the movies and their ratings per user, and adding it to new_data
        id_movies = data[:,1][data[:,0] == id_users]
        id_ratings = data[:,2][data[:,0] == id_users]
#         If user did not watch movie, rating must be 0
        ratings = np.zeros(nb_movies)
        ratings[id_movies - 1] = id_ratings
        new_data.append(list(ratings))
    return new_data

In [7]:
training_set = convert(training_set)
test_set = convert(test_set)

In [8]:
# Converting lists to torch tensors
training_set = torch.FloatTensor(training_set)
test_set = torch.FloatTensor(test_set)

In [9]:
# Creating model for SAE
class SAE(nn.Module):
    def __init__(self, ):
#         Inheritance from Module class
        super(SAE, self).__init__()
    
#         First full connection (layer)
#         Number of features (input values) == nb_movies, number of hidden nodes == 20 (tested diff values)
        self.fc1 = nn.Linear(nb_movies, 20)
#       Second full connection
        self.fc2 = nn.Linear(20, 10)
#       Third full connection (where decoding starts as its last layer before output layer)
        self.fc3 = nn.Linear(10, 20)
#       Last full connection (output layer, # input nodes == # output nodes)
        self.fc4 = nn.Linear(20, nb_movies)
#      Sigmoid performed better than relu
        self.activation = nn.Sigmoid()
    
#     For encoding and decoding data, and applying activation functions at each full connection
#     Returns vector of predicted ratings 
#     X == input data of features vector
    def forward(self, x):
#         Applying linear transformation to input vector and then activating function
        x = self.activation(self.fc1(x))
        x = self.activation(self.fc2(x))
        x = self.activation(self.fc3(x))
        x = self.fc4(x)
        return x

In [10]:
sae = SAE()
# Using mean square error for loss
criterion = nn.MSELoss()
# Performed better with RMSProp rather than Adam
optimizer = optim.RMSprop(sae.parameters(),lr = 0.01, weight_decay = 0.5)

In [13]:
# Training model
nb_epoch = 200
for epoch in range(1, nb_epoch + 1):
    train_loss = 0
    s = 0.
    for id_users in range(nb_users):
#         Creating the 2d vector needed for pytorch (like adding batch #)
        input = Variable(training_set[id_users]).unsqueeze(0)
        target = input.clone()
#         To save memory exclude customers that do not rate movies, for those who do have ratings call the forward fucntion to train model
        if torch.sum(target.data > 0) > 0:
            output = sae(input) 
#             Do not compute gradient based on the target (to reduce computations)
            target.require_grad = False
#            Ensure that movies that were not rated stay with 0 value 
            output[target == 0] = 0
            loss = criterion(output, target)
#         Mean corrector is used to ensure that loss is relative to each customer (total movies/movies rated by customer)
#         Added 1e-10 (small number) to ensure that denominator is never 0 and avoid infinite loops
            mean_corrector = nb_movies / float(torch.sum(target.data > 0) + 1e-10)
#           Checking if weights should be increased or decreased (w/backward method) and then getting state of the art error (1 degree loss)
            loss.backward()
            train_loss += np.sqrt(loss.data * mean_corrector)
            s += 1.
#             Computing the intensity of weight's update (amount)
            optimizer.step()
#     Loss is be mean difference in ratings from predicted to output
    print('Epoch: ' + str(epoch) + ' Loss: ' + str(train_loss / s))

Epoch: 1 Loss: tensor(1.7672)
Epoch: 2 Loss: tensor(1.0966)
Epoch: 3 Loss: tensor(1.0535)
Epoch: 4 Loss: tensor(1.0382)
Epoch: 5 Loss: tensor(1.0309)
Epoch: 6 Loss: tensor(1.0267)
Epoch: 7 Loss: tensor(1.0237)
Epoch: 8 Loss: tensor(1.0220)
Epoch: 9 Loss: tensor(1.0206)
Epoch: 10 Loss: tensor(1.0195)
Epoch: 11 Loss: tensor(1.0188)
Epoch: 12 Loss: tensor(1.0185)
Epoch: 13 Loss: tensor(1.0178)
Epoch: 14 Loss: tensor(1.0176)
Epoch: 15 Loss: tensor(1.0173)
Epoch: 16 Loss: tensor(1.0169)
Epoch: 17 Loss: tensor(1.0165)
Epoch: 18 Loss: tensor(1.0165)
Epoch: 19 Loss: tensor(1.0164)
Epoch: 20 Loss: tensor(1.0162)
Epoch: 21 Loss: tensor(1.0162)
Epoch: 22 Loss: tensor(1.0159)
Epoch: 23 Loss: tensor(1.0156)
Epoch: 24 Loss: tensor(1.0160)
Epoch: 25 Loss: tensor(1.0157)
Epoch: 26 Loss: tensor(1.0156)
Epoch: 27 Loss: tensor(1.0153)
Epoch: 28 Loss: tensor(1.0149)
Epoch: 29 Loss: tensor(1.0127)
Epoch: 30 Loss: tensor(1.0113)
Epoch: 31 Loss: tensor(1.0095)
Epoch: 32 Loss: tensor(1.0063)
Epoch: 33 Loss: t

In [14]:
# Testing SAE
test_loss = 0
s = 0.
for id_users in range(nb_users):
    input = Variable(training_set[id_users]).unsqueeze(0)
    target = Variable(test_set[id_users]).unsqueeze(0)
    if torch.sum(target.data > 0) > 0:
        output = sae(input) 
        target.require_grad = False
        output[target == 0] = 0
        loss = criterion(output, target)
        mean_corrector = nb_movies / float(torch.sum(target.data > 0) + 1e-10)
        test_loss += np.sqrt(loss.data * mean_corrector)
        s += 1.
# Loss of below 1 star
print('Loss: ' + str(test_loss / s))

Loss: tensor(0.9465)


tensor(0., grad_fn=<SelectBackward>)