In [1]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.parallel
import torch.optim as optim
import torch.utils.data
from torch.autograd import Variable

In [2]:
movies = pd.read_csv('ml-1m/movies.dat', sep='::', header = None, engine='python', encoding = 'latin-1')
user = pd.read_csv('ml-1m/users.dat', sep='::', header = None, engine='python', encoding = 'latin-1')
ratings = pd.read_csv('ml-1m/ratings.dat', sep='::', header = None, engine='python', encoding = 'latin-1')

In [3]:
# Creating training and test set
training_set = pd.read_csv('ml-100k/u1.base', delimiter='\t')
training_set = np.array(training_set, dtype='int')
test_set = pd.read_csv('ml-100k/u1.test', delimiter='\t')
test_set = np.array(test_set, dtype='int')

In [4]:
# Obtaining the total number of users and movies to create 2d matrix that contains the movies corresponding to each user

# Largest id for customer and movie might be on training or test set, thus this verification
nb_users = int(max(max(training_set[:,0]), max(test_set[:,0])))
nb_movies = int(max(max(training_set[:,1]), max(test_set[:,1])))

In [5]:
# Converting 2d array where x axis = users (observations) and y axis = movies (features), which is what the RBM expects
def convert(data):
#     Using lists for pytorch
    new_data = []
    for id_users in range(1,nb_users + 1):
#         Getting all the movies and their ratings per user, and adding it to new_data
        id_movies = data[:,1][data[:,0] == id_users]
        id_ratings = data[:,2][data[:,0] == id_users]
#         If user did not watch movie, rating must be 0
        ratings = np.zeros(nb_movies)
        ratings[id_movies - 1] = id_ratings
        new_data.append(list(ratings))
    return new_data

In [6]:
training_set = convert(training_set)
test_set = convert(test_set)

In [7]:
# Converting lists to torch tensors
training_set = torch.FloatTensor(training_set)
test_set = torch.FloatTensor(test_set)

In [8]:
# Creating model for SAE
class SAE(nn.Module):
    def __init__(self, ):
#         Inheritance from Module class
        super(SAE, self).__init__()
    
#         First full connection (layer)
#         Number of features (input values) == nb_movies, number of hidden nodes == 20 (tested diff values)
        self.fc1 = nn.Linear(nb_movies, 20)
#       Second full connection
        self.fc2 = nn.Linear(20, 10)
#       Third full connection (where decoding starts as its last layer before output layer)
        self.fc3 = nn.Linear(10, 20)
#       Last full connection (output layer, # input nodes == # output nodes)
        self.fc4 = nn.Linear(20, nb_movies)
#      Sigmoid performed better than relu
        self.activation = nn.Sigmoid()
    
#     For encoding and decoding data, and applying activation functions at each full connection
#     Returns vector of predicted ratings 
#     X == input data of features vector
    def forward(self, x):
#         Applying linear transformation to input vector and then activating function
        x = self.activation(self.fc1(x))
        x = self.activation(self.fc2(x))
        x = self.activation(self.fc3(x))
        x = self.fc4(x)
        return x

In [10]:
sae = SAE()
# Using mean square error for loss
criterion = nn.MSELoss()
# Performed better with RMSProp rather than Adam
optimizer = optim.RMSprop(sae.parameters(),lr = 0.01, weight_decay = 0.5)

In [None]:
# Training model
