In [0]:
# http://pytorch.org/
from os import path
from wheel.pep425tags import get_abbr_impl, get_impl_ver, get_abi_tag
platform = '{}{}-{}'.format(get_abbr_impl(), get_impl_ver(), get_abi_tag())

accelerator = 'cu80' if path.exists('/opt/bin/nvidia-smi') else 'cpu'

!pip install -q http://download.pytorch.org/whl/{accelerator}/torch-0.3.0.post4-{platform}-linux_x86_64.whl torchvision
import torch

In [0]:
import numpy as np
import pandas as pd
import torch.nn as nn
import torch.nn.parallel
import torch.optim as optim
import torch.utils.data
from torch.autograd import Variable

In [0]:
movies=pd.read_csv('movies.dat',sep='::', header=None, engine='python',encoding='latin-1')

In [15]:
movies.head()

Unnamed: 0,0,1,2
0,1,Toy Story (1995),Animation|Children's|Comedy
1,2,Jumanji (1995),Adventure|Children's|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama
4,5,Father of the Bride Part II (1995),Comedy


In [16]:
movies.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3883 entries, 0 to 3882
Data columns (total 3 columns):
0    3883 non-null int64
1    3883 non-null object
2    3883 non-null object
dtypes: int64(1), object(2)
memory usage: 91.1+ KB


In [0]:
users=pd.read_csv('users.dat',sep='::',header=None,engine='python',encoding='latin-1')

In [18]:
users.head()

Unnamed: 0,0,1,2,3,4
0,1,F,1,10,48067
1,2,M,56,16,70072
2,3,M,25,15,55117
3,4,M,45,7,2460
4,5,M,25,20,55455


In [0]:
ratings=pd.read_csv('ratings.dat',sep='::',header=None,engine='python',encoding='latin-1')

In [20]:
ratings.head()

Unnamed: 0,0,1,2,3
0,1,1193,5,978300760
1,1,661,3,978302109
2,1,914,3,978301968
3,1,3408,4,978300275
4,1,2355,5,978824291


In [0]:
training_set=pd.read_csv('u1.base', delimiter='\t')

In [23]:
training_set.head()

Unnamed: 0,1,1.1,5,874965758
0,1,2,3,876893171
1,1,3,4,878542960
2,1,4,3,876893119
3,1,5,3,889751712
4,1,7,4,875071561


In [0]:
training_set=np.array(training_set, dtype= 'int')

In [0]:
test_set=pd.read_csv('u1.test', delimiter='\t')

In [0]:
test_set=np.array(test_set, dtype= 'int')

In [0]:
#getting total users and movies
nb_users=int(max(max(training_set[:,0]),max(test_set[:,0])))
nb_movies=int(max(max(training_set[:,1]),max(test_set[:,1])))

In [31]:
nb_users


943

In [32]:
nb_movies

1682

In [0]:
#converting training_set and test_set to list
#users in lines and features in columns
def convert(data):
    new_data=[]
    for id_users in range(1,nb_users+1):
        id_movies=data[:,1][data[:,0]==id_users]
        id_ratings= data[:,2][data[:,0]==id_users]
        ratings = np.zeros(nb_movies)
        ratings[id_movies-1]= id_ratings
        new_data.append(list(ratings))
    return new_data
training_set= convert(training_set)
test_set= convert(test_set)

In [0]:
##converting to torch tensors
training_set= torch.FloatTensor(training_set)
test_set= torch.FloatTensor(test_set)

In [0]:
#constructing architecture of auto encoder
#using inheritence
class SAE(nn.Module):
  def __init__(self, ):
    super(SAE, self).__init__()
    self.fc1= nn.Linear(nb_movies , 25)
    self.fc2= nn.Linear(25, 12)
    self.fc3= nn.Linear(12, 25)
    self.fc4= nn.Linear(25, nb_movies)
    self.activation = nn.Sigmoid()
  def forward(self, x):
    x=self.activation(self.fc1(x))
    x=self.activation(self.fc2(x))
    x=self.activation(self.fc3(x))
    x=self.fc4(x)    #last layer no activation
    return x
sae= SAE()
criterion= nn.MSELoss()
optimizer= optim.RMSprop(sae.parameters(), lr=0.01, weight_decay= 0.5)

In [44]:
#Training
n_epochs=225
for epoch in range(1,n_epochs+1):
  train_loss=0
  s=0.    #no. of users rating at least 1 movie
  for id_user in range(nb_users):
    input= Variable(training_set[id_user]).unsqueeze(0)  #create batch as torch doesn't take single dim input vector
    target = input.clone()
    if torch.sum(target.data>0) > 0:
      output= sae.forward(input)
      target.require_grad= False
      output[target==0] = 0   #not counting unrated movies
      loss= criterion(output, target)
      mean_corrector= nb_movies/float(torch.sum(target.data >0) + 1e-10)
      loss.backward()      #direction
      train_loss+= np.sqrt(loss.data[0]*mean_corrector)
      s+=1.
      optimizer.step()        #intensity of updation
  print ('epoch: '+ str(epoch)+ ' '+ str(train_loss/s))
    
  

epoch: 1 0.9727046899674473
epoch: 2 0.9798302252696824
epoch: 3 0.9693332119458502
epoch: 4 0.9744958729370341
epoch: 5 0.9689651236982545
epoch: 6 0.9704900535953759
epoch: 7 0.9642330438186607
epoch: 8 0.9665403902358554
epoch: 9 0.9620947239056835
epoch: 10 0.9648763961643179
epoch: 11 0.9618907861487668
epoch: 12 0.962982212119985
epoch: 13 0.9585611061040789
epoch: 14 0.9644789963256566
epoch: 15 0.9596381759953139
epoch: 16 0.9661265019771612
epoch: 17 0.9580698872074519
epoch: 18 0.9608729091551164
epoch: 19 0.9576318810379613
epoch: 20 0.9586348819071934
epoch: 21 0.9544755943877478
epoch: 22 0.9549098385139864
epoch: 23 0.9522146541353239
epoch: 24 0.9528976601585006
epoch: 25 0.9494414399725603
epoch: 26 0.951180978503307
epoch: 27 0.9498363914531317
epoch: 28 0.9551807656158404
epoch: 29 0.9471474668228739
epoch: 30 0.9565654798803198
epoch: 31 0.9547278932151197
epoch: 32 0.9542470946622663
epoch: 33 0.9531122756088505
epoch: 34 0.9504487840552988
epoch: 35 0.9483424060136

In [45]:
test_loss=0
s=0.    #no. of users rating at least 1 movie
for id_user in range(nb_users):
    input= Variable(training_set[id_user]).unsqueeze(0)  #create batch as torch doesn't take single dim input vector
    target = Variable(test_set[id_user])
    if torch.sum(target.data>0) > 0:
      output= sae.forward(input)
      target.require_grad= False
      output[target==0] = 0   #not counting unrated movies
      loss= criterion(output, target)
      mean_corrector= nb_movies/float(torch.sum(target.data >0) + 1e-10)
      test_loss+= np.sqrt(loss.data[0]*mean_corrector)
      s+=1.
      optimizer.step()        #intensity of updation
print ('test_loss: '+ str(test_loss/s))


test_loss: 0.9415147210832776
