#AutoEncoders

##Downloading the dataset

###ML-100K

###ML-1M

##Importing the libraries

In [1]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.parallel
import torch.optim as optim
from torch.autograd import Variable

## Importing the dataset


In [2]:
movies= pd.read_csv('movies.dat', sep= '::', header= None, engine= 'python', encoding='latin-1')
users= pd.read_csv('users.dat', sep= '::', header= None, engine= 'python', encoding='latin-1')
ratings= pd.read_csv('ratings.dat', sep= '::', header= None, engine= 'python', encoding='latin-1')

## Preparing the training set and the test set


In [4]:
training_data= pd.read_csv('u1.base', delimiter= '\t')
training_data= np.array(training_data)
test_data= pd.read_csv('u1.test', delimiter= '\t')
test_data= np.array(test_data)

## Getting the number of users and movies


In [5]:
nb_users= max(max(training_data[:,0]), max(test_data[:,0]))
nb_movies= max(max(training_data[:,1]), max(test_data[:,1]))

## Converting the data into an array with users in lines and movies in columns


In [6]:
def convert(data):
  new_data=[]
  for id_users in range(1, nb_users+1):
    id_movies= data[:,1][data[:,0]==id_users]
    id_ratings= data[:,2][data[:,0]==id_users]
    ratings= np.zeros( nb_movies)
    ratings[id_movies-1]= id_ratings
    new_data.append(list(ratings))
  return new_data

training_data= convert(training_data)
test_data= convert(test_data)

## Converting the data into Torch tensors


In [7]:
training_data= torch.FloatTensor(training_data)
test_data= torch.FloatTensor(test_data)

## Creating the architecture of the Neural Network


In [11]:
class SAE(nn.Module):
  def __init__(self, ):
    super(SAE, self).__init__()
    self.fc1= nn.Linear(nb_movies, 20)
    self.fc2= nn.Linear(20, 10)
    self.fc3= nn.Linear(10, 20)
    self.fc4= nn.Linear(20, nb_movies)
    self.activation= nn.Sigmoid()
  def forward(self, x):
    x= self.activation(self.fc1(x))
    x= self.activation(self.fc2(x))
    x= self.activation(self.fc3(x))
    x= self.fc4(x)
    return x

sae= SAE()
criterion= nn.MSELoss()
optimizer= optim.RMSprop(sae.parameters(), lr=0.01, weight_decay=0.5)

## Training the SAE


In [15]:
nb_epoch= 200

for epoch in range(0, nb_epoch+1):
  train_loss=0
  s=0. 
  for id_user in range(nb_users):
    input= Variable(training_data[id_user]).unsqueeze(0)
    target= input.clone()
    if torch.sum(target.data > 0) > 0:
      output= sae(input)
      target.require_grad= False
      output[target==0]=0
      loss= criterion(output, target)
      mean_corrector= nb_movies/float(torch.sum(target.data>0)+1e-10)
      loss.backward()
      train_loss+= np.sqrt(loss.data*mean_corrector)
      s+=1. 
      optimizer.step()
  print('Epoch: '+str(epoch)+" Loss: "+str(train_loss/s))

Epoch: 0 Loss: tensor(1.7658)
Epoch: 1 Loss: tensor(1.0966)
Epoch: 2 Loss: tensor(1.0536)
Epoch: 3 Loss: tensor(1.0383)
Epoch: 4 Loss: tensor(1.0309)
Epoch: 5 Loss: tensor(1.0267)
Epoch: 6 Loss: tensor(1.0239)
Epoch: 7 Loss: tensor(1.0218)
Epoch: 8 Loss: tensor(1.0208)
Epoch: 9 Loss: tensor(1.0197)
Epoch: 10 Loss: tensor(1.0189)
Epoch: 11 Loss: tensor(1.0184)
Epoch: 12 Loss: tensor(1.0179)
Epoch: 13 Loss: tensor(1.0177)
Epoch: 14 Loss: tensor(1.0172)
Epoch: 15 Loss: tensor(1.0170)
Epoch: 16 Loss: tensor(1.0167)
Epoch: 17 Loss: tensor(1.0166)
Epoch: 18 Loss: tensor(1.0162)
Epoch: 19 Loss: tensor(1.0163)
Epoch: 20 Loss: tensor(1.0160)
Epoch: 21 Loss: tensor(1.0159)
Epoch: 22 Loss: tensor(1.0158)
Epoch: 23 Loss: tensor(1.0157)
Epoch: 24 Loss: tensor(1.0156)
Epoch: 25 Loss: tensor(1.0156)
Epoch: 26 Loss: tensor(1.0155)
Epoch: 27 Loss: tensor(1.0148)
Epoch: 28 Loss: tensor(1.0122)
Epoch: 29 Loss: tensor(1.0118)
Epoch: 30 Loss: tensor(1.0095)
Epoch: 31 Loss: tensor(1.0091)
Epoch: 32 Loss: te

## Testing the SAE


In [19]:
test_loss=0
s=0. 
for id_user in range(nb_users):
  input= Variable(training_data[id_user]).unsqueeze(0)
  target= Variable(test_data[id_user]).unsqueeze(0)
  if torch.sum(target.data > 0) > 0:
    output= sae(input)
    target.require_grad= False
    output[target==0]=0
    loss= criterion(output, target)
    mean_corrector= nb_movies/float(torch.sum(target.data>0)+1e-10)
    test_loss+= np.sqrt(loss.data*mean_corrector)
    s+=1. 
print("Loss: "+str(test_loss/s))

Loss: tensor(0.9525)
