#Boltzmann Machine

##Downloading the dataset

###ML-100K

In [66]:
# !wget "http://files.grouplens.org/datasets/movielens/ml-100k.zip"
# !unzip ml-100k.zip
# !ls

###ML-1M

In [67]:
# !wget "http://files.grouplens.org/datasets/movielens/ml-1m.zip"
# !unzip ml-1m.zip
# !ls

##Importing the libraries

In [77]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.parallel
import torch.optim as optim
import torch.utils.data
from torch.autograd import Variable
import torch; torch.backends.mps.is_available()#check if mps is available

True

## Importing the dataset


In [78]:
# We won't be using this dataset.
#cannot use , for seperator due to the nature of movie names. we use ::
#header default is there is value. We need to specify there is no column names with header = None
#engine -> we make sure we the dataset is imported correctly. 
#encoding = different than usual because some files contain some special chars that cannot be 
#encoded with utf 8 so we need to use latin-1
movies = pd.read_csv('ml-1m/movies.dat', sep = '::', header = None, engine = 'python', encoding = 'latin-1')
users = pd.read_csv('ml-1m/users.dat', sep = '::', header = None, engine = 'python', encoding = 'latin-1')
ratings = pd.read_csv('ml-1m/ratings.dat', sep = '::', header = None, engine = 'python', encoding = 'latin-1')

## Preparing the training set and the test set


In [79]:
training_set = pd.read_csv('ml-100k/u1.base', delimiter = '\t') #we get a dataframe here
#convert to array. best way to convert 
training_set = np.array(training_set, dtype = 'int')
test_set = pd.read_csv('ml-100k/u1.test', delimiter = '\t')
test_set = np.array(test_set, dtype = 'int')

## Getting the number of users and movies


In [80]:
#get the max number for users and max number for movies
#MAKE SURE THE OUTPUT IS INTEGER BECAUSE WE WILL GET AN ERROR OTHERWISE
#max of max from training set and max of test set. Data is in first column
nb_users = int(max(max(training_set[:, 0], ), max(test_set[:, 0])))
#max of max from training set and max of test set. Data is in second column
nb_movies = int(max(max(training_set[:, 1], ), max(test_set[:, 1])))

## Converting the data into an array with users in lines and movies in columns


In [81]:
#create 2 matrixes , one for training and one for test set
#same number of users and same number of ratings
#if a user didn't rate we will put a 0. 
#We will create a list within a list instead of a 2d array because we will feed our input
#into the pytorch. List per user and each list will have the ratings for each movie
#
def convert(data):
  new_data = [] #initialize an empty list
  for id_users in range(1, nb_users + 1): #we add 1 more because the upper bound is excluded but we want the last user too!
    id_movies = data[:, 1] [data[:, 0] == id_users] #get all the movies id that was rated by the user id_users.
    id_ratings = data[:, 2] [data[:, 0] == id_users] #doing the same for the ratings
    ratings = np.zeros(nb_movies)#add zeros everywhere because we want to make sure every movie is rated. So if the user hasn't watched a movie we assign 0
    ratings[id_movies - 1] = id_ratings #first movie has id of 1 but since python starts with 0 we need to -1 so we start from 0
    new_data.append(list(ratings))#we add list() just to make sure it's a list.
  return new_data
training_set = convert(training_set)
test_set = convert(test_set)
#TORCH EXPECTS LIST OF LISTS, that's why we do all this preprocessing

## Converting the data into Torch tensors


In [87]:
#TORCH EXPECTS LIST OF LISTS
#pytorch array. COnvert into torch sensors
#torch is a multidimensional matrix with a single type
#one argument which has to be a list of lists!
training_set = torch.FloatTensor(training_set)
test_set = torch.FloatTensor(test_set)

## Converting the ratings into binary ratings 1 (Liked) or 0 (Not Liked)


In [89]:
#from this point on everything is RBM specific
#RBM will need input binary because we want our RBN output to be binary. 1 liked 0 not likes
training_set[training_set == 0] = -1 #replace 0 with -1. if value is 0 replace with -1 etc
training_set[training_set == 1] = 0
training_set[training_set == 2] = 0
training_set[training_set >= 3] = 1
test_set[test_set == 0] = -1
test_set[test_set == 1] = 0
test_set[test_set == 2] = 0
test_set[test_set >= 3] = 1

## Creating the architecture of the Neural Network


In [92]:
#classes are the most practical way in python. It's a mode of something we want to build
#
class RBM():
  def __init__(self, nv, nh): #first argument is self and corresponds to the object that
    #will be created afterwards, #nv number of visible nodes, nh number of hidden nodes
    #init the parameters of our feature object.
    #self.name of var in order to initialize.
    self.W = torch.randn(nh, nv) #init the weights randomnly according to normal distribution. Mean of 0 and std of 1
    #we need to init the biases.
    #there is some bias for the probability of the hidden node given the visible node
    #and there is some bias for the probability of the visible node given the hidden node
    self.a = torch.randn(1, nh)#bias for prob of hidden given the visible
    #we need to create a vector for the batch and the bias, so that's why we add 1,nh
    self.b = torch.randn(1, nv)#bias for prob of visible node given the hidden
#creating the function that will sample our hidden nodes based on the probabilities 
#p_h_given_v h hiden v visible
#sample our activation for each hidden node.
#self corresponds to the object to be able to access the variable above self.w,self.a etc
  def sample_h(self, x): #x will correspond to the visible node v in the prob p_h_given_v
    #wx is the vector of weight * vector of visible nodes
    wx = torch.mm(x, self.W.t())#two torch tensors matrix multiplication
    #activation is wx + bias. linear function of the neurons
    #self.a is a variable of the vector
    #.expand_as(wx) -> make sure the bias is applied in each line of the vector (1, nh)
    #we want to expand it as wx. The bias is applied in each line of the minibatch
    activation = wx + self.a.expand_as(wx)
    #the probability that the hidden node will be activated according the value of the 
    #visible is just the sigmoid func
    #very high probability of someone who likes drama movies to get the drama neuron to light up
    p_h_given_v = torch.sigmoid(activation)  
    #return the probability and a sample of h all the hidden neurons according to the prob
    #p_h_given_v is a vector of nh elements each of each corresponds to the 1 of the hidden nodes
    #ith hidden vector probabillity that it is activated given the values of the visible nodes
    #and in our example given the user we are dealing with
    #take a random numbe etc ->0.70 if more than 0.70 we activate else we dont. thats why we use 
    # torch.bernoulli(p_h_given_v) in order to get a vector of 0 and 1. 0 not activated
    #1 corresponds to the ones activated by the sampling
    return p_h_given_v, torch.bernoulli(p_h_given_v)
  def sample_v(self, y):
    wy = torch.mm(y, self.W)
    activation = wy + self.b.expand_as(wy)
    p_v_given_h = torch.sigmoid(activation)
    return p_v_given_h, torch.bernoulli(p_v_given_h)
  def train(self, v0, vk, ph0, phk):
    self.W += (torch.mm(v0.t(), ph0) - torch.mm(vk.t(), phk)).t()
    self.b += torch.sum((v0 - vk), 0)
    self.a += torch.sum((ph0 - phk), 0)
nv = len(training_set[0])
nh = 100
batch_size = 100
rbm = RBM(nv, nh)

## Training the RBM


In [15]:
nb_epoch = 10
for epoch in range(1, nb_epoch + 1):
  train_loss = 0
  s = 0.
  for id_user in range(0, nb_users - batch_size, batch_size):
    vk = training_set[id_user : id_user + batch_size]
    v0 = training_set[id_user : id_user + batch_size]
    ph0,_ = rbm.sample_h(v0)
    for k in range(10):
      _,hk = rbm.sample_h(vk)
      _,vk = rbm.sample_v(hk)
      vk[v0<0] = v0[v0<0]
    phk,_ = rbm.sample_h(vk)
    rbm.train(v0, vk, ph0, phk)
    train_loss += torch.mean(torch.abs(v0[v0 >= 0] - vk[v0 >= 0]))
    s += 1.
  print('epoch: '+str(epoch)+' loss: '+str(train_loss/s))


epoch: 1 loss: tensor(0.3414)
epoch: 2 loss: tensor(0.2318)
epoch: 3 loss: tensor(0.2548)
epoch: 4 loss: tensor(0.2443)
epoch: 5 loss: tensor(0.2508)
epoch: 6 loss: tensor(0.2453)
epoch: 7 loss: tensor(0.2516)
epoch: 8 loss: tensor(0.2468)
epoch: 9 loss: tensor(0.2500)
epoch: 10 loss: tensor(0.2475)


## Testing the RBM


In [16]:
test_loss = 0
s = 0.
for id_user in range(nb_users):
    v = training_set[id_user:id_user+1]
    vt = test_set[id_user:id_user+1]
    if len(vt[vt>=0]) > 0:
        _,h = rbm.sample_h(v)
        _,v = rbm.sample_v(h)
        test_loss += torch.mean(torch.abs(vt[vt>=0] - v[vt>=0]))
        s += 1.
print('test loss: '+str(test_loss/s))

test loss: tensor(0.2465)
