[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/alok8663/Deep_Learning/blob/main/Unsupervised_Deep_Learning/AutoEncoders.ipynb)


#Importing the libraries

In [None]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.parallel
import torch.optim as optim
import torch.utils.data
from torch.autograd import variable

## Importing the dataset


In [None]:
movies=pd.read_csv('/content/drive/MyDrive/P16-AutoEncoders/AutoEncoders/ml-1m/ml-1m/movies.dat',sep='::',header=None,engine='python',encoding='latin-1')
users=pd.read_csv('/content/drive/MyDrive/P16-AutoEncoders/AutoEncoders/ml-1m/ml-1m/users.dat',sep='::',header=None,engine='python',encoding='latin-1')
ratings=pd.read_csv('/content/drive/MyDrive/P16-AutoEncoders/AutoEncoders/ml-1m/ml-1m/ratings.dat',sep='::',header=None,engine='python',encoding='latin-1')

In [None]:
pd.DataFrame(movies)

In [None]:
pd.DataFrame(users)

In [None]:
pd.DataFrame(ratings)

## Preparing the training set and the test set


In [None]:
training_set=pd.read_csv('/content/drive/MyDrive/P16-AutoEncoders/AutoEncoders/ml-100k/ml-100k/u1.base',delimiter='\t')
training_set=np.array(training_set,dtype='int')
test_set=pd.read_csv('/content/drive/MyDrive/P16-AutoEncoders/AutoEncoders/ml-100k/ml-100k/u1.test',delimiter='\t')
test_set=np.array(test_set,dtype='int')

In [None]:
print(pd.DataFrame(training_set))

In [None]:
print(pd.DataFrame(test_set))

## Getting the number of users and movies


In [None]:
nb_users=int(max(max(training_set[:,0]),max(test_set[:,0])))
nb_movies=int(max(max(training_set[:,1]),max(test_set[:,1])))

In [None]:
print(nb_users)

In [None]:
print(nb_movies)

## Converting the data into an array with users in lines and movies in columns


In [None]:
def convert(data):
  new_data=[]
  for id_users in range(1,nb_users+1):
    id_movies=data[:,1][data[:,0]==id_users]
    id_ratings=data[:,2][data[:,0]==id_users]
    ratings=np.zeros(nb_movies)
    ratings[id_movies-1]=id_ratings
    new_data.append(list(ratings))
  return new_data
training_set=convert(training_set)
test_set=convert(test_set)

## Converting the data into Torch tensors


In [None]:
training_set=torch.FloatTensor(training_set)
test_set=torch.FloatTensor(test_set)

## Creating the architecture of the Neural Network


In [None]:
class SAE(nn.Module):
    def __init__(self, ):
        super(SAE, self).__init__()
        self.fc1 = nn.Linear(nb_movies, 20)
        self.fc2 = nn.Linear(20, 10)
        self.fc3 = nn.Linear(10, 20)
        self.fc4 = nn.Linear(20, nb_movies)
        self.activation = nn.Sigmoid()
    def forward(self, x):
        x = self.activation(self.fc1(x))
        x = self.activation(self.fc2(x))
        x = self.activation(self.fc3(x))
        x = self.fc4(x)
        return x
sae = SAE()
criterion = nn.MSELoss()
optimizer = optim.RMSprop(sae.parameters(), lr = 0.01, weight_decay = 0.5)

# Training the SAE


In [15]:
nb_epoch = 200
for epoch in range(1, nb_epoch + 1):
    train_loss = 0
    s = 0.0
    for id_user in range(nb_users):
        input = training_set[id_user].unsqueeze(0)
        target = input.clone().detach()
        if torch.sum(target > 0) > 0:
            output = sae(input)
            output[target == 0] = 0
            loss = criterion(output, target)
            mean_corrector = nb_movies / float(torch.sum(target > 0) + 1e-10)
            loss.backward()
            train_loss += np.sqrt(loss.item() * mean_corrector)
            s += 1.0
        optimizer.step()
    print('epoch: ' + str(epoch) + ' loss: ' + str(train_loss / s))


epoch: 1 loss: 1.7713223356454868
epoch: 2 loss: 1.0966304928315376
epoch: 3 loss: 1.0535944978208638
epoch: 4 loss: 1.0384002321124979
epoch: 5 loss: 1.0309284818712234
epoch: 6 loss: 1.0263672901386272
epoch: 7 loss: 1.0237764360146901
epoch: 8 loss: 1.0218855034994003
epoch: 9 loss: 1.0208011875316014
epoch: 10 loss: 1.0196094550490975
epoch: 11 loss: 1.018809610438656
epoch: 12 loss: 1.0183963817306936
epoch: 13 loss: 1.0177505257752046
epoch: 14 loss: 1.0174010313866118
epoch: 15 loss: 1.0172639032233477
epoch: 16 loss: 1.016886197304884
epoch: 17 loss: 1.0165491029277773
epoch: 18 loss: 1.0163684886184365
epoch: 19 loss: 1.016356834800117
epoch: 20 loss: 1.0159727307975073
epoch: 21 loss: 1.0161603975969502
epoch: 22 loss: 1.0159111838081905
epoch: 23 loss: 1.0160097242021031
epoch: 24 loss: 1.015989761434032
epoch: 25 loss: 1.015707202161073
epoch: 26 loss: 1.0153073331566524
epoch: 27 loss: 1.0152640724748483
epoch: 28 loss: 1.015080468922573
epoch: 29 loss: 1.0129257474202653


# Testing the SAE



In [17]:
test_loss = 0
s = 0.0
for id_user in range(nb_users):
    input = training_set[id_user].unsqueeze(0)
    target = test_set[id_user].unsqueeze(0).clone().detach()
    if torch.sum(target > 0) > 0:
        output = sae(input)
        output[target == 0] = 0
        loss = criterion(output, target)
        mean_corrector = nb_movies / float(torch.sum(target > 0) + 1e-10)
        test_loss += np.sqrt(loss.item() * mean_corrector)
        s += 1.0
print('test loss: ' + str(test_loss / s))


test loss: 0.9518670224530947
