#AutoEncoders

##Downloading the dataset

###ML-100K

In [1]:
# !wget "http://files.grouplens.org/datasets/movielens/ml-100k.zip"
# !unzip ml-100k.zip
# !ls

###ML-1M

In [2]:
# !wget "http://files.grouplens.org/datasets/movielens/ml-1m.zip"
# !unzip ml-1m.zip
# !ls

##Importing the libraries

In [3]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.parallel
import torch.optim as optim
import torch.utils.data
from torch.autograd import Variable

## Importing the dataset


In [4]:
# We won't be using this dataset.
movies = pd.read_csv('ml-1m/movies.dat', sep = '::', header = None, engine = 'python', encoding = 'latin-1')
users = pd.read_csv('ml-1m/users.dat', sep = '::', header = None, engine = 'python', encoding = 'latin-1')
ratings = pd.read_csv('ml-1m/ratings.dat', sep = '::', header = None, engine = 'python', encoding = 'latin-1')

## Preparing the training set and the test set


In [5]:
training_set = pd.read_csv('ml-100k/u1.base', delimiter = '\t')
training_set = np.array(training_set, dtype = 'int')
test_set = pd.read_csv('ml-100k/u1.test', delimiter = '\t')
test_set = np.array(test_set, dtype = 'int')

## Getting the number of users and movies


In [6]:
nb_users = int(max(max(training_set[:, 0], ), max(test_set[:, 0])))
nb_movies = int(max(max(training_set[:, 1], ), max(test_set[:, 1])))

## Converting the data into an array with users in lines and movies in columns


In [7]:
def convert(data):
  new_data = []
  for id_users in range(1, nb_users + 1):
    id_movies = data[:, 1] [data[:, 0] == id_users]
    id_ratings = data[:, 2] [data[:, 0] == id_users]
    ratings = np.zeros(nb_movies)
    ratings[id_movies - 1] = id_ratings
    new_data.append(list(ratings))
  return new_data
training_set = convert(training_set)
test_set = convert(test_set)

## Converting the data into Torch tensors


In [8]:
training_set = torch.FloatTensor(training_set)
test_set = torch.FloatTensor(test_set)

## Creating the architecture of the Neural Network


In [12]:
#in order to make an autoencoder we need to define a lot of things
#first how many layers we want to have
#how many nodes in each layer
#activation function 
#optimization function
#module contains several classes and a library contains multiple modules
# we will take advantage of inheritance of the Pytorch library
#we will make a stacked autoencoder
#we will take the parent nn.Modules because we want all the variables and modules from the parent class
#stacked autoencoder -> many hidden layers -> many encodings
#in the parenthesis we will add the parent class
class SAE(nn.Module):
    def __init__(self, ): #we always need to define our init function. No need to add anything else other than
        #the self because we will take advantage of the methods and variables of inheritance
        #super will get the inheritance methods from parent class
        super(SAE, self).__init__() 
        #full connection between the input and the first hidden layer. Shorter vector than input vector
        #we need to use self to specify that fc1 is related to our autoencoders object
        #linear is inherited from nn.module. First parameter is number of features. second is the num 
        #of nodes in the first hidden layer. Based on expirement we choose 20!
        #these 20 nodes will represent some features that from unsupervised learning the AE detects
        #from the input vector that are liked from simillar people. ex oscar or an actor
        self.fc1 = nn.Linear(nb_movies, 20)
        #second full connection of our hidden layer. neurons of first hidden and 10 the nodes of the second hidden
        #it will detect more features but will be based the previous hidden layer
        self.fc2 = nn.Linear(20, 10)
        #since we are doing deep learning lets add a third hidden layer
        self.fc3 = nn.Linear(10, 20)
        #output layer with an output layer of the total movies we had in the input. 
        #in autoencoders we are reconstructing the input vector
        self.fc4 = nn.Linear(20, nb_movies)
        #activation function. we will define it activation. We need self. We tried rectifier and Sigmoid and
        #we got better activation with the latter
        #Sigmoid is taken from the parent class. 
        self.activation = nn.Sigmoid()
        #will proceed to the different encodings and ecodings and apply the activation functions inside
        #first argument is self. We need to put it every time and we need to use it in order to access our object
        #second argument is our input vector.
    def forward(self, x):
        #self represents our object!
        #we are activating the x in each layer 
        x = self.activation(self.fc1(x))
        x = self.activation(self.fc2(x))
        x = self.activation(self.fc3(x))
        #now we are decoding and not encoding and we dont need to do an activation function since its the last node
        x = self.fc4(x)
        return x
#create the object of this class. We have to use non capital letters for our object.
#since we didn't specify any arguments during our creation process we don't have to do it now.
sae = SAE()
#Define the criterion we will need to use after
#criterion for the loss function. Mean square error. Criterion will be an object of the class.
criterion = nn.MSELoss()
#stohastic gradient decent to lower the error after each epoch.
#we have a class for each optimizer. one class for RMSpror, one for adam etc.
#we did some expirement and the rmsprop was better for our model.
#first argument -> all the parameters of our autoencoders. the parameters about the number of hidden layers
#plus the amount of neurons each layer has and the activation function. Parameters that define the architecture of
#our model
#second argument ->is the learning rate. Experimental 0.01
#third argument -> weight decay = used to reduce the learning rate after a few epochs and that's in order to regulate
#the convergence. 0.5 based on experimenting, just like lr
optimizer = optim.RMSprop(sae.parameters(), lr = 0.01, weight_decay = 0.5)

## Training the SAE


In [15]:
#we need optimized code that saves the memory if we want to do a project with a lot of ratings etc
#define our number of epoch. Weights updated after epoch. Number is based on expirementing
nb_epoch = 200
#in each epoch we will loop into our all users and the ratings each user has.
for epoch in range(1, nb_epoch + 1): #+1 because upper bound is excluded.
  #init our train loss error
  train_loss = 0
  #init a counter. it will count the number of users that rated at least one movie.
  #we don't want to computate for users that didn't give rating for any movie. 0. = float.
  s = 0.
  for id_user in range(nb_users): #we just want the indexes which start from 0 to 942 so we are ok with just nb_users
    #for each user. we get the input vector that contains all the ratings 
    #take our training_set[id_user]. Pytorch cannot accept a single vector but only a batch. The functions like forward
    #cannot take a single vector of one dimension. We need to add a fake dimension like in keras which will correspond
    #to the batch. Pytorch tecnique. Variable function with imput training_set[id_user].
    #. unsqueeze(index of the new dimension). means where do we want the new dimension? se poia thesi? first index
    input = Variable(training_set[id_user]).unsqueeze(0)
    #what about the target? We need to do the same. We will modify the origiran input and since we want 
    #the original input before the modification we will create target var. clone -> copy of input.
    target = input.clone()
    #MEMORY OPTIMIZATION : we won't care if our user who didn't rate any movie.
    #take all the values of target which is the input vector and sum them up(all the ratings) larget than 0
    #check if the sums of 1,2,3,4,5 is larger than 0. If it is then it contains at least one rating
    if torch.sum(target.data > 0) > 0:
        #get the ouptut vector of predicted ratings, by applying the sae in our input vector.
      output = sae(input)
        #we want to make sure the gradient is calculated only with respect to the input and not the target
        #reduce the computation. Require_grad = this will make sure we don't compute the gradient with respect 
        #to the target. That optimizes our code.
      target.require_grad = False
        #another optimization. in the future computations of our stohasstic gradient descent we only want to include in
        #the computation the non zero values. We don;t want to deal with the movies the user didn't rate.
        #only for the output vector.
        #take the values of the output such as target == 0 and zero them.
        #these values will not count in the computation of the error and will not have an impact after we measure
        #the error because they will not count in the rsmprop computation.
      output[target == 0] = 0
        #loss error. 
      loss = criterion(output, target)
      mean_corrector = nb_movies/float(torch.sum(target.data > 0) + 1e-10)
      loss.backward()
      train_loss += np.sqrt(loss.data*mean_corrector)
      s += 1.
      optimizer.step()
  print('epoch: '+str(epoch)+'loss: '+ str(train_loss/s))

epoch: 1loss: tensor(0.8504)
epoch: 2loss: tensor(0.8499)
epoch: 3loss: tensor(0.8501)
epoch: 4loss: tensor(0.8496)
epoch: 5loss: tensor(0.8494)
epoch: 6loss: tensor(0.8490)
epoch: 7loss: tensor(0.8488)
epoch: 8loss: tensor(0.8485)
epoch: 9loss: tensor(0.8482)
epoch: 10loss: tensor(0.8482)
epoch: 11loss: tensor(0.8477)
epoch: 12loss: tensor(0.8477)
epoch: 13loss: tensor(0.8473)
epoch: 14loss: tensor(0.8472)
epoch: 15loss: tensor(0.8470)
epoch: 16loss: tensor(0.8484)
epoch: 17loss: tensor(0.8463)
epoch: 18loss: tensor(0.8462)
epoch: 19loss: tensor(0.8458)
epoch: 20loss: tensor(0.8458)
epoch: 21loss: tensor(0.8454)
epoch: 22loss: tensor(0.8451)
epoch: 23loss: tensor(0.8450)
epoch: 24loss: tensor(0.8447)
epoch: 25loss: tensor(0.8449)
epoch: 26loss: tensor(0.8444)
epoch: 27loss: tensor(0.8447)
epoch: 28loss: tensor(0.8443)
epoch: 29loss: tensor(0.8438)
epoch: 30loss: tensor(0.8437)
epoch: 31loss: tensor(0.8434)
epoch: 32loss: tensor(0.8432)
epoch: 33loss: tensor(0.8428)
epoch: 34loss: tens

epoch: 269loss: tensor(0.7920)
epoch: 270loss: tensor(0.7911)
epoch: 271loss: tensor(0.7911)
epoch: 272loss: tensor(0.7915)
epoch: 273loss: tensor(0.7905)
epoch: 274loss: tensor(0.7904)
epoch: 275loss: tensor(0.7900)
epoch: 276loss: tensor(0.7901)
epoch: 277loss: tensor(0.7899)
epoch: 278loss: tensor(0.7903)
epoch: 279loss: tensor(0.7897)
epoch: 280loss: tensor(0.7903)
epoch: 281loss: tensor(0.7888)
epoch: 282loss: tensor(0.7895)
epoch: 283loss: tensor(0.7889)
epoch: 284loss: tensor(0.7889)
epoch: 285loss: tensor(0.7882)
epoch: 286loss: tensor(0.7888)
epoch: 287loss: tensor(0.7896)
epoch: 288loss: tensor(0.7884)
epoch: 289loss: tensor(0.7876)
epoch: 290loss: tensor(0.7875)
epoch: 291loss: tensor(0.7871)
epoch: 292loss: tensor(0.7873)
epoch: 293loss: tensor(0.7868)
epoch: 294loss: tensor(0.7863)
epoch: 295loss: tensor(0.7863)
epoch: 296loss: tensor(0.7866)
epoch: 297loss: tensor(0.7863)
epoch: 298loss: tensor(0.7856)
epoch: 299loss: tensor(0.7853)
epoch: 300loss: tensor(0.7853)
epoch: 3

## Testing the SAE


In [0]:
test_loss = 0
s = 0.
for id_user in range(nb_users):
  input = Variable(training_set[id_user]).unsqueeze(0)
  target = Variable(test_set[id_user]).unsqueeze(0)
  if torch.sum(target.data > 0) > 0:
    output = sae(input)
    target.require_grad = False
    output[target == 0] = 0
    loss = criterion(output, target)
    mean_corrector = nb_movies/float(torch.sum(target.data > 0) + 1e-10)
    test_loss += np.sqrt(loss.data*mean_corrector)
    s += 1.
print('test loss: '+str(test_loss/s))

test loss: tensor(0.9681)
