In [None]:
# Todo, make a function from the two codes for loops, they are very similar

In [1]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.parallel as parallel
import torch.optim as optim
import torch.utils.data
from torch.autograd import Variable


In [2]:
# Importing the movies.dat dataset, user.dat dataset, rating.dat dataset
movies_df = pd.read_csv("../archive/ml-1m/movies.dat", 
                           sep="::", header=None, engine="python", encoding="latin-1")

users_df = pd.read_csv("../archive/ml-1m/users.dat", 
                           sep="::", header=None, engine="python", encoding="latin-1")

ratings_df = pd.read_csv("../archive/ml-1m/ratings.dat", 
                           sep="::", header=None, engine="python", encoding="latin-1")

# Preparing the training set and testing set
training_set = pd.read_csv("../archive/ml-100k/u1.base", delimiter="\t")
training_set_array = np.array(training_set, dtype='int')

testing_set = pd.read_csv("../archive/ml-100k/u1.test", delimiter="\t")
testing_set_array = np.array(testing_set, dtype='int')

In [3]:
# Getting the total number of users and movies
# The last user is user 943 and the last movie is the movie 1330

# Performing the double max to get the max from the training set and testing set
nb_users = int(max(max(training_set_array[:, 0]), max(testing_set_array[:,0]))) # All rows for column 0
nb_movies = int(max(max(training_set_array[:, 1]), max(testing_set_array[:, 1])))

In [4]:
# Converting the data into an array with users in rows and movies in columns
# We will have to create a list with the sublist being every user with info. on their ratings for each movies

def convert(data):
    new_data = []
    for id_users in range(1, nb_users+1):
        id_movies = data[:, 1][data[:,0] == id_users] # the 2nd column represents the movies and only for the i user
        id_ratings = data[:, 2][data[:,0] == id_users] # the 3rd column represents the rating for the i user
        
        # Initilializing an array with all the possible movies
        ratings = np.zeros(nb_movies) 
        
        # If we find the movie, we can replace the rating to the 0
        ratings[id_movies-1] = id_ratings
        
        # Creating a large list that contains all the information
        new_data.append(list(ratings))
    return new_data 

In [5]:
# Applying the convert function to the training and testing set

training_set_array = convert(training_set_array)
testing_set_array = convert(testing_set_array)

In [6]:
# Converting the arrays into Tensors
training_set_array = torch.FloatTensor(training_set_array)
testing_set_array = torch.FloatTensor(testing_set_array)

In [7]:
# Creating the archiecture of the neural network (stacked auto encoders)
class SAE(nn.Module):
    def __init__(self, ):
        
        # We are using the superfunction to get the inheritance from nn.Module class
        super(SAE, self).__init__()
        
        # Keep in mind, the 20 nodes that are used the first output layer, was calc. by the author trail and error
        # One of the 20 nodes could be a horror movies genre, thus, a nn could learn that a user loves horror movies
        # When the nn attempts to predict movies, if the user has a favorable look to horror movies
        # so the nn at this step will be activated when a horror movies shows up
        
        # Also, every variabe must be connected with one another. As we see, the # of inputs connect to 
        # the # outputs in the next layer (common sense?)
        
        # As we go the nn, we are finding a more granular type of feature based from the previous layer
        
        # Also, for the third layer, I mentioned I am decoding. Meaning, I am recreating the autoencoder 
        # to provide the # of movies predicted, thus, it must be symmetrical
        
        # The activation function works as a sign of whether the nn should be activated based on the input from 
        # the user. E.g a person must like a certain amount of horror movies before the nn is activated
        
        self.fc1 = nn.Linear(nb_movies, 20) # 20 nodes are in the first hidden layer and nb_movies are used to find the predictions 
        self.fc2 = nn.Linear(20, 10) # 20 is the input, and 10 is the output from the hidden layer
        self.fc3 = nn.Linear(10, 20) # 20 is the output, we are starting to decode
        self.fc4 = nn.Linear(20, nb_movies) # the last full connection
        self.activation = nn.Sigmoid() # activated with with a sigmoid function
        
        
    # forward represent forward propogation
    # the activation function is used through each step of the layer and checking which nodes should be activated
    # based by the response of the user
    
    # Also noticed how the x is updated. Well, as proceed in the nn, we are updating what the rating of the movies
    # and the final x is the our prediction of the movies based on what the nn learned
        
    def forward(self, x): # x is the input vector of features (with all the ratings of movies for a specific user)
        x = self.activation(self.fc1(x))
        x = self.activation(self.fc2(x))
        x = self.activation(self.fc3(x))
        x = self.fc4(x)
        return x
    
sae = SAE()       

- Confusion:
    - The total number of movies is inserted into the autoencoder... are we rating every single of these movies? 
    - Of course dummy. We have to figure what our model will predict for the movies we insert based on the algo that is build on the model. Thus, we need to insert every one of these movies, make a prediciton, and check how well our models does at prediction!

In [8]:
criterion = nn.MSELoss()
optimizer = optim.RMSprop(sae.parameters(), lr=0.01, weight_decay=0.5)

In [10]:
# Training the SAE 
nb_epoch = 200
for epoch in range(1, nb_epoch+1):
    train_loss = 0
    s = 0. # The number of user that rated at least one movie
    
    for id_user in range(nb_users):
        
        # We have to an additional dimension (the batch) 
        # Using the Variable function, we can create an addtional dim
        # The 0 is the index that this new dimenison will go in
        input_from_user = Variable(training_set_array[id_user]).unsqueeze(0)
        
        # Cloning the input
        target = input_from_user.clone()
        
        # We would only like to user that have rated movies...
        # Checking the movies rated in target (a copy of the movies that user rated)
        if torch.sum(target.data > 0) > 0:
            
            # Next, we next to get a vector of the predicted ratings
            output = sae(input_from_user)
            
            # Even though it seems common sense, we do not want to compute the gradient descent of the target
            # we only need the gradient descent of the input variable
            # this will make the code run quicker and more efficient
            target.requires_grad = False
            
            # We need to reassure the code that we will have the movies that were not rated as 0 
            # everything will be computed
            output[target == 0] = 0
            
            # Computing the loss function
            loss = criterion(output, target)
            
            # This represents the error average from the movies that were rated
            mean_corrector = nb_movies/float(torch.sum(target.data>0) + 1e-10)
            
            # This could either be backward or forward, depending on the goal for the loss function
            # in our case, we want to decrease the loss function, hence, we use backward
            loss.backward()
            
            # Updating the loss function, the index 0 contains the error 
            # We also need to multiply it with the mean_corrector (for adjustment)
            # np.sqrt for the one-degree loss
            train_loss += np.sqrt(loss.data[0]*mean_corrector)
            
            # Keep tracking of the users that rated the movie
            s += 1.
            
            # Need to use the optimizer to find the best way to update the weights
            # It decides the intensity of the amnt. that the weight will be updated
            optimizer.step()
            
    # The train loss needs to the average for all the movies rated
    print('epoch: {0} loss: {1}'.format(str(epoch), str(train_loss/s)))
    
    # We should a loss function less than 1

epoch: 1 loss: 1.7721426468278383
epoch: 2 loss: 1.0966669397249384
epoch: 3 loss: 1.0533056132552618
epoch: 4 loss: 1.0382905200946646
epoch: 5 loss: 1.0309276614943297
epoch: 6 loss: 1.0267279783142178
epoch: 7 loss: 1.0241626594370508
epoch: 8 loss: 1.0220179193553465
epoch: 9 loss: 1.0208516102313108
epoch: 10 loss: 1.019618077291776
epoch: 11 loss: 1.018904396326807
epoch: 12 loss: 1.018341789953803
epoch: 13 loss: 1.0180642615068993
epoch: 14 loss: 1.0176610987716836
epoch: 15 loss: 1.0172370566377509
epoch: 16 loss: 1.0170501330414188
epoch: 17 loss: 1.0165847879140542
epoch: 18 loss: 1.0165423313586974
epoch: 19 loss: 1.0163735627745438
epoch: 20 loss: 1.0161017672341948
epoch: 21 loss: 1.0162266738700232
epoch: 22 loss: 1.016090570773161
epoch: 23 loss: 1.0158842624685263
epoch: 24 loss: 1.0160723270439764
epoch: 25 loss: 1.0155523835615834
epoch: 26 loss: 1.015781173921301
epoch: 27 loss: 1.0155908856347717
epoch: 28 loss: 1.014939148401639
epoch: 29 loss: 1.0122200408728812


In [25]:
# Testing the SAE 
# Our goal is to predict whether a user will or will not like movies they have not watched
# We then use the test (sometime in the future) where we get the actual ratings
test_loss = 0
s = 0.
for id_user in range(nb_users):

    input_from_user = Variable(training_set_array[id_user]).unsqueeze(0)
    target = Variable(testing_set_array[id_user])

    if torch.sum(target.data > 0) > 0:
        output = sae(input_from_user)
        target.requires_grad = False
        output[target == 0] = 0

        # Computing the loss function
        loss = criterion(output, target)

        # This represents the error average from the movies that were rated
        mean_corrector = nb_movies/float(torch.sum(target.data>0) + 1e-10)
        test_loss += np.sqrt(loss.data[0]*mean_corrector)

        s += 1.

# The test loss needs to the average for all the movies rated
print('test loss: {0}'.format(str(test_loss/s)))

# We should a loss function less than 1

# What does it mean? That if you were to predict the movie some rating (say 3 starts)... 
# The recommendaer would have predicted the rating btw 3 and 5 stars

test loss: 0.9474370465403681


In [None]:
def run_autoencoder():
    
    

In [None]:
# Todo, make a function from the two codes for loops, they are very similar