# AutoEncoders
## Importing the libraries

In [1]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.parallel
import torch.optim as optim
import torch.utils.data
from torch.autograd import Variable

In [2]:
## Importing the dataset
movies = pd.read_csv('ml-1m/movies.dat', sep = '::', header = None, engine = 'python', encoding = 'latin-1')
users = pd.read_csv('ml-1m/users.dat', sep = '::', header = None, engine = 'python', encoding = 'latin-1')
ratings = pd.read_csv('ml-1m/ratings.dat', sep = '::', header = None, engine = 'python', encoding = 'latin-1')

In [3]:
## Preparing the training set and the test set
training_set = pd.read_csv('ml-100k/u1.base', delimiter = '\t')
training_set = np.array(training_set, dtype = 'int')
test_set = pd.read_csv('ml-100k/u1.test', delimiter = '\t')
test_set = np.array(test_set, dtype = 'int')

In [4]:
## Getting the number of users and movies
nb_users = int(max(max(training_set[:,0]), max(test_set[:,0])))
nb_movies = int(max(max(training_set[:,1]), max(test_set[:,1])))

In [5]:
## Converting the data into an array with users in lines and movies in columns
def convert(data):
    new_data = []
    for id_users in range(1, nb_users + 1):
        id_movies = data[:,1][data[:,0] == id_users]
        id_ratings = data[:,2][data[:,0] == id_users]
        ratings = np.zeros(nb_movies)
        ratings[id_movies - 1] = id_ratings
        new_data.append(list(ratings))
    return new_data
training_set = convert(training_set)
test_set = convert(test_set)

In [6]:
## Converting the data into Torch tensors
training_set = torch.FloatTensor(training_set)
test_set = torch.FloatTensor(test_set)

In [7]:
## Creating the architecture of the Neural Network
class SAE(nn.Module):
    def __init__(self, ):
        super(SAE, self).__init__()#nn module methodlarını kullanmak için
        #fc full connection
        self.fc1 = nn.Linear(nb_movies, 20) #feature number of movies 20 first hidden layer farklı koyabilirsin
        self.fc2 = nn.Linear(20, 10) #20 hiddenden 10 hiddene geliyor 2.hidden layer
        self.fc3 = nn.Linear(10, 20) #stacked autoencoders hidden layer 3.hidden layer decoding e geçiyoruz
        self.fc4 = nn.Linear(20, nb_movies) #20 den number of movies kadar çıktıya input vector kadar yani
        self.activation = nn.Sigmoid()
    def forward(self, x):
        x = self.activation(self.fc1(x)) #fc1 e uyguladık x burada vector or left on fully connection encoded 
        x = self.activation(self.fc2(x)) #fc2 ye uyguladık
        x = self.activation(self.fc3(x)) #fc3 e uyguladık
        x = self.fc4(x) #reconstructed output vector için activasyon fonk kullanmıyoruz
        return x
sae = SAE() #parantez içine bişe tanımlamadık çünkü class içinde tanımladık
criterion = nn.MSELoss() 
optimizer = optim.RMSprop(sae.parameters(), lr = 0.01, weight_decay = 0.5)

In [8]:
# Eğitim
nb_epoch = 200
for epoch in range(1, nb_epoch + 1):
    train_loss = 0
    s = 0.
    for id_user in range(nb_users):
        input = training_set[id_user].unsqueeze(0)  # Variable kaldırıldı
        target = input.clone()
        if torch.sum(target > 0) > 0:  # target.data yerine doğrudan target
            output = sae(input)
            target.requires_grad_(False)  # require_grad yerine requires_grad_
            output[target == 0] = 0
            loss = criterion(output, target)
            mean_corrector = nb_movies / float(torch.sum(target > 0) + 1e-10)
            loss.backward()
            train_loss += np.sqrt(loss.item() * mean_corrector)  # loss.data[0] yerine loss.item()
            s += 1.
            optimizer.step()
    print(f'epoch: {epoch}, loss: {train_loss/s}')

epoch: 1, loss: 1.7716632528717389
epoch: 2, loss: 1.09666233936325
epoch: 3, loss: 1.0534029033191739
epoch: 4, loss: 1.0383492509876635
epoch: 5, loss: 1.0306713919418444
epoch: 6, loss: 1.0264345399096706
epoch: 7, loss: 1.0238181472001553
epoch: 8, loss: 1.022003711900394
epoch: 9, loss: 1.0208191949156702
epoch: 10, loss: 1.019689263164721
epoch: 11, loss: 1.0188434326821711
epoch: 12, loss: 1.0183907293428283
epoch: 13, loss: 1.0177371669908757
epoch: 14, loss: 1.017373927537887
epoch: 15, loss: 1.017208351649657
epoch: 16, loss: 1.016749642237119
epoch: 17, loss: 1.0167955341424215
epoch: 18, loss: 1.0163705649547654
epoch: 19, loss: 1.016370230842194
epoch: 20, loss: 1.0161142656354274
epoch: 21, loss: 1.016131707423949
epoch: 22, loss: 1.015870928117219
epoch: 23, loss: 1.0156605692006393
epoch: 24, loss: 1.0155340236739125
epoch: 25, loss: 1.0156701097799163
epoch: 26, loss: 1.015424710216633
epoch: 27, loss: 1.0152304593487083
epoch: 28, loss: 1.014950406440314
epoch: 29, lo

In [9]:
# Test
test_loss = 0
s = 0.
for id_user in range(nb_users):
    input = training_set[id_user].unsqueeze(0)  # Variable kaldırıldı
    target = test_set[id_user].unsqueeze(0)
    if torch.sum(target > 0) > 0:
        output = sae(input)
        target.requires_grad_(False)
        output[target == 0] = 0
        loss = criterion(output, target)
        mean_corrector = nb_movies / float(torch.sum(target > 0) + 1e-10)
        test_loss += np.sqrt(loss.item() * mean_corrector)  # loss.data yerine loss.item()
        s += 1.
print(f'test loss: {test_loss/s}')

test loss: 0.9495550151221323
