### Recommender Systems using Autoencoder
-- predicting the rating of a movies on the scale of 5 not visited by user. 

In [42]:
import pandas as pd 
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.parallel
import torch.utils.data
from torch.autograd import Variable

### Importing Data 

In [43]:
movies = pd.read_csv('DataSet/ml-1m/movies.dat', sep = '::', header = None, engine= 'python', 
                     encoding='latin-1')
user = pd.read_csv('DataSet/ml-1m/users.dat', sep = '::', header = None, engine= 'python', 
                     encoding='latin-1')
rating = pd.read_csv('DataSet/ml-1m/ratings.dat', sep = '::', header = None, engine= 'python', 
                     encoding='latin-1')
train = pd.read_csv('DataSet/ml-100k/u1.base', sep= '\t')
test = pd.read_csv('DataSet/ml-100k/u1.test', sep= '\t')

### Data Preprocessing 

In [44]:
train = np.array(train, dtype='int')
test = np.array(test, dtype='int')

Number of users and movies 

In [45]:
nb_users = int(max(max(train[:, 0]), max(test[:,0])))
nb_movies = int(max(max(train[:, 1]), max(test[:,1])))


In [46]:
def convert(data):
    new_df = []
    for id_users in range(1, nb_users + 1):
        id_movies = data[:, 1][data[:, 0] == id_users]
        id_rating = data[:, 2][data[:, 0] == id_users]
        ratings = np.zeros(nb_movies)
        ratings[id_movies-1] = id_rating
        new_df.append(list(ratings))
    return new_df

In [47]:
train = convert(train)
test = convert(test)
train = torch.FloatTensor(train)
test = torch.FloatTensor(test)

### Building Autoencoder

--we are making stacked Autoencoder 

In [48]:
# inheritance from a parent class 
class SAE(nn.Module):
    def __init__(self, ):
        super(SAE, self).__init__()
        self.fc1 = nn.Linear(nb_movies, 20)
        self.fc2 = nn.Linear(20, 10)
        self.fc3 = nn.Linear(10, 20)
        self.fc4 = nn.Linear(20, nb_movies)
        self.activation = nn.Sigmoid()
    
    def forward(self, x):
        x = self.activation(self.fc1(x))
        x = self.activation(self.fc2(x))
        x = self.activation(self.fc3(x))
        x = self.fc4(x)
        return x

In [49]:
ae = SAE() 

In [50]:
# loss function
criterion = nn.MSELoss()

In [51]:
# optimizer
optimizer = optim.RMSprop(ae.parameters(), lr=0.01, weight_decay= 0.5)

### Training

In [52]:
epoch = 500
for i in range(0, epoch):
    train_loss = 0
    cnt = 0.
    for id_user in range(nb_users):
        input = Variable(train[id_user]).unsqueeze(0) # for creating a batch(2D) of 1 input vector and change it dimension 
        target = input.clone()
        if torch.sum(target.data > 0) > 0: # considering only users that rated atleast one movie
            output = ae.forward(input)
            target.requires_grad = False # don't compute the gradient wrt target
            output[target == 0] = 0
            loss = criterion(output , target)
            mean_corrector = nb_movies/float(torch.sum(target.data > 0) + 1e-10) # only consider rated movies(viewed by user)
            loss.backward() # backward decide the direction of step to which the weights are update 
            train_loss += np.sqrt(loss.data*mean_corrector)
            cnt += 1.
            optimizer.step() # decide the value by which the update happens 
    print(f" Epoch : {i+1} || Loss : {train_loss/cnt}") 

 Epoch : 1 || Loss : 1.7708677053451538
 Epoch : 2 || Loss : 1.0965454578399658
 Epoch : 3 || Loss : 1.053270697593689
 Epoch : 4 || Loss : 1.0383684635162354
 Epoch : 5 || Loss : 1.0308600664138794
 Epoch : 6 || Loss : 1.02657151222229
 Epoch : 7 || Loss : 1.0235885381698608
 Epoch : 8 || Loss : 1.0220330953598022
 Epoch : 9 || Loss : 1.0206735134124756
 Epoch : 10 || Loss : 1.0194190740585327
 Epoch : 11 || Loss : 1.018898367881775
 Epoch : 12 || Loss : 1.0183336734771729
 Epoch : 13 || Loss : 1.0178663730621338
 Epoch : 14 || Loss : 1.0175273418426514
 Epoch : 15 || Loss : 1.0173407793045044
 Epoch : 16 || Loss : 1.0169599056243896
 Epoch : 17 || Loss : 1.0168157815933228
 Epoch : 18 || Loss : 1.0161656141281128
 Epoch : 19 || Loss : 1.016269326210022
 Epoch : 20 || Loss : 1.0159651041030884
 Epoch : 21 || Loss : 1.016068458557129
 Epoch : 22 || Loss : 1.0158530473709106
 Epoch : 23 || Loss : 1.0159090757369995
 Epoch : 24 || Loss : 1.015783429145813
 Epoch : 25 || Loss : 1.01539790

### Testing

In [54]:
test_loss = 0
cnt  = 0
for id_user in range(nb_users):
    input = Variable(train[id_user]).unsqueeze(0)
    target = Variable(test[id_user]).unsqueeze(0)
    if torch.sum(target.data > 0) > 0: # considering only users that rated atleast one movie
        output = ae.forward(input)
        target.requires_grad = False # don't compute the gradient wrt target
        output[target == 0] = 0
        loss = criterion(output , target)
        mean_corrector = nb_movies/float(torch.sum(target.data > 0) + 1e-10)
        test_loss += np.sqrt(loss.data*mean_corrector)
        cnt += 1.
print(f"Loss : {test_loss/cnt}")     

Loss : 0.9549281597137451
