#Boltzmann Machine

##Downloading the dataset

###ML-100K

In [1]:
!wget "http://files.grouplens.org/datasets/movielens/ml-100k.zip"
!unzip ml-100k.zip
!ls

--2021-08-28 14:30:48--  http://files.grouplens.org/datasets/movielens/ml-100k.zip
Resolving files.grouplens.org (files.grouplens.org)... 128.101.65.152
Connecting to files.grouplens.org (files.grouplens.org)|128.101.65.152|:80... connected.
HTTP request sent, awaiting response... 200 OK
Length: 4924029 (4.7M) [application/zip]
Saving to: ‘ml-100k.zip’


2021-08-28 14:30:49 (24.4 MB/s) - ‘ml-100k.zip’ saved [4924029/4924029]

Archive:  ml-100k.zip
   creating: ml-100k/
  inflating: ml-100k/allbut.pl       
  inflating: ml-100k/mku.sh          
  inflating: ml-100k/README          
  inflating: ml-100k/u.data          
  inflating: ml-100k/u.genre         
  inflating: ml-100k/u.info          
  inflating: ml-100k/u.item          
  inflating: ml-100k/u.occupation    
  inflating: ml-100k/u.user          
  inflating: ml-100k/u1.base         
  inflating: ml-100k/u1.test         
  inflating: ml-100k/u2.base         
  inflating: ml-100k/u2.test         
  inflating: ml-100k/u3.base    

###ML-1M

In [2]:
!wget "http://files.grouplens.org/datasets/movielens/ml-1m.zip"
!unzip ml-1m.zip
!ls

--2021-08-28 14:30:49--  http://files.grouplens.org/datasets/movielens/ml-1m.zip
Resolving files.grouplens.org (files.grouplens.org)... 128.101.65.152
Connecting to files.grouplens.org (files.grouplens.org)|128.101.65.152|:80... connected.
HTTP request sent, awaiting response... 200 OK
Length: 5917549 (5.6M) [application/zip]
Saving to: ‘ml-1m.zip’


2021-08-28 14:30:50 (32.7 MB/s) - ‘ml-1m.zip’ saved [5917549/5917549]

Archive:  ml-1m.zip
   creating: ml-1m/
  inflating: ml-1m/movies.dat        
  inflating: ml-1m/ratings.dat       
  inflating: ml-1m/README            
  inflating: ml-1m/users.dat         
ml-100k  ml-100k.zip  ml-1m  ml-1m.zip	sample_data


##Importing the libraries

In [3]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.parallel
import torch.optim as optim
import torch.utils.data
from torch.autograd import Variable

## Importing the dataset


In [4]:
# We won't be using this dataset.
# import the dataset from ml-1m (large dataset)
movies = pd.read_csv("ml-1m/movies.dat", sep="::", header=None, engine="python", encoding="latin-1")
#columns: id_movies, movie_names, movie_genre
users = pd.read_csv("ml-1m/users.dat", sep="::", header=None, engine="python", encoding="latin-1")
#users column: id_user, gender, age, job code
ratings = pd.read_csv("ml-1m/ratings.dat", sep="::", header=None, engine="python", encoding="latin-1")
#ratings column: id_user, id_movie, ratings, timestamps

## Preparing the training set and the test set


In [5]:
training_set = pd.read_csv("ml-100k/u1.base", delimiter="\t",) #contains user, id_movie, rating, and timestamp
training_set = np.array(training_set, dtype="int") #torch accepts numpy arrays
test_set = pd.read_csv("ml-100k/u1.base", delimiter="\t",) #contains user, id_movie, rating, and timestamp
test_set = np.array(test_set, dtype="int") #torch accepts numpy arrays

## Getting the number of users and movies


In [6]:
#maximum number of users and movies
nb_users = int(max(max(training_set[:,0]), max(test_set[:,0])))
nb_movies = int(max(max(training_set[:,1]), max(test_set[:,1])))

## Converting the data into an array with users in lines and movies in columns


In [7]:
#users in rows, movies in columns and ratings in cells
# needed for pytorch, create a list of lists
def convert(data):
    new_data=[]
    for id_users in range(1, nb_users+1):
        id_movies = data[:, 1] [data[: ,0] == id_users]
        id_ratings = data[:,2][data[:,0]==id_users]
        ratings = np.zeros(nb_movies)
        ratings[id_movies-1]=id_ratings #movies not rated are filled with zero
        new_data.append(list(ratings))
    return new_data

training_set = convert(training_set)
test_set = convert(test_set)

## Converting the data into Torch tensors


In [8]:
#needed for ML and DL, better to build tensor arrays (more efficient that numpy arrays)
#FloatTensor expects a list of lists
training_set = torch.FloatTensor(training_set)
test_set = torch.FloatTensor(test_set)

## Converting the ratings into binary ratings 1 (Liked) or 0 (Not Liked)


In [9]:
#replace all zero ratings to -1 (no rating)
training_set[training_set ==0]= -1
training_set[training_set ==1]= 0
training_set[training_set ==2]= 0
training_set[training_set >=0]= 1

test_set[test_set ==0]= -1
test_set[test_set ==1]= 0
test_set[test_set ==2]= 0
test_set[test_set >=0]= 1

## Creating the architecture of the Neural Network


In [10]:
#probabilistic graphical model (use classes)
#class should define hidden nodes, weights of prob of visible nodes given the hidden nodes,
#bias, bias of visible nodes given the hidden nodes(Created 4 functions)
class RBM():
    def __init__(self, nv, nh):
        self.W = torch.randn(nh, nv) #prob of visible nodes given the hidden nodes
        self.a = torch.randn(1, nh) #bias of prob of hidden nodes given the visible nodes
        self.b = torch.randn(1, nv) #bias of prob of visible nodes given the hidden nodes
#created an additional fake dimension to the biases using 1 because pytorch cant accept a single 
#input vector of 1D as argument(1=  fake batch)

#sampling the hidden nodes according to the prob p(h) given v. This is the same as the 
#Sigmoid Activation Function******
#we need the sample h function  because during the trainingm we'll need to approximate
#the log likelihood gradient using Gibb's Sampling
    def sample_h(self, x):  
#x = given visible neurons
#prob of h given v is the Sigmoid Activation Function applied to wx+a (a = bias of hidden node)
        wx = torch.mm(x, self.W.t())    
#transpose cos hidden nodes, mm for pytorch tensor product, activation=wx+a(hidden)
        activation = wx + self.a.expand_as(wx)  
#expand cos each input vector will not be treated individually but inside batches
#we have mini batches from self.a. To make sure that the bias(self.a) is applied to each line
#of the mini batch[our dimensions in self.a=torch.randn(1,nh)], we add the function expand_as()
        p_h_given_v = torch.sigmoid(activation)
#using a bernoulli RBM because we are predicting a binary outcome
        return p_h_given_v, torch.bernoulli(p_h_given_v)
    
#sampling the visible nodes given the hidden nodes
    def sample_v(self,y):
        wy = torch.mm(y, self.W) #no transpose
        activation = wy + self.b.expand_as(wy)
        p_v_given_h = torch.sigmoid(activation)
        return p_v_given_h, torch.bernoulli(p_v_given_h)

#creating CONTRASTIVE DIVERGENCE in train
#CD is about approximating the log-likelihood gradient. RBM is an energy-based model and can
#be seen as a Probability Graphical Model.
#GOAL is to MINIMISE the energy (Energy_Based_Model) or MAXIMISE the the Log-Likelihood 
#function (Prob Graphical Model). In both cases we compute the Gradient. CD comes with Gibbs Sampling
#GS consists of creating a Gibbs Chain in k-steps by sampling k-times the hidden nodes and visible nodes
#Algorithm in Page 25 of the article(An introduction to RBMs)[codes 8-10]
    def train(self, v0, vk, ph0, phk):
#v0=input vector containing the ratings of all movies by one user (loop for all users)
#vk=visible nodes obtained after k-sampling[visible-hidden-visible (round trip)]
#ph0=vector of prob that at the first iteration, the hidden nodes equal one(1) given the values of v0
#phk=prob of hidden nodes after k-sampling given the values of the visible nodes
#line 8
        self.W += (torch.mm(v0.t(), ph0) - torch.mm(vk.t(), phk)).t() #hidden so transpose
#line 9
        self.b += torch.sum((v0-vk), 0) #0 in order to keep the dimension
#line 10
        self.a += torch.sum((ph0-phk), 0)
#we can add more parameters like learning rate to improve and tune the model

#creating an object of our RBM class
nv = len(training_set[0])
nh = 100 #tunable
batch_size = 100 #tunable (total train data = 943)
rbm = RBM(nv, nh)

## Training the RBM


In [11]:
nb_epochs = 10 #tunable
for epoch in range(1, nb_epochs +1):
    train_loss = 0 #using Simple Difference in Abs Value (others RMSE)
    s = 0. #need a counter to normalise the train loss
    for id_user in range(0, nb_users-batch_size, batch_size): 
        vk = training_set[id_user:id_user+batch_size] #here, vk is the input at the start 
#vk=input vector going into the Gibbs chain whcih will be updated at each round trip
        v0 = training_set[id_user:id_user+batch_size]
#v0=targets(real), ratings given already
        ph0,_ = rbm.sample_h(v0) #v0=visible nodes
#ph0=initial prob, from the sample_h, we return only the first result
#for loop for k steps of CD for Gibbs Chain[MCMC technique]
        for k in range(10):
            _,hk = rbm.sample_h(vk) #hk=hidden nodes at kth step of CD
            _,vk = rbm.sample_v(hk) #first update of the visible node after 1st sampling
            vk[v0<0] = v0[v0<0] #exclude cells with no ratings [-1]
#compute for phk before applying train function
        phk,_ = rbm.sample_h(vk) #last sample of visible nodes after 10th step
        rbm.train(v0, vk, ph0, phk)
        train_loss += torch.mean(torch.abs(v0[v0>=0]-vk[v0>=0]))
# USING RMSE for training
        #train_loss += np.sqrt(torch.mean((v0[v0>=0]-vk[v0>0])**2))
        s += 1.
    print('epoch: ' + str(epoch) + ' loss: '+ str(train_loss/s))

nb_epoch = 10
for epoch in range(1, nb_epoch + 1):
  train_loss = 0
  s = 0.
  for id_user in range(0, nb_users - batch_size, batch_size):
    vk = training_set[id_user : id_user + batch_size]
    v0 = training_set[id_user : id_user + batch_size]
    ph0,_ = rbm.sample_h(v0)
    for k in range(10):
      _,hk = rbm.sample_h(vk)
      _,vk = rbm.sample_v(hk)
      vk[v0<0] = v0[v0<0]
    phk,_ = rbm.sample_h(vk)
    rbm.train(v0, vk, ph0, phk)
    train_loss += torch.mean(torch.abs(v0[v0 >= 0] - vk[v0 >= 0]))
    s += 1.
  print('epoch: '+str(epoch)+' loss: '+str(train_loss/s))

epoch: 1 loss: tensor(0.2082)
epoch: 2 loss: tensor(0.0103)
epoch: 3 loss: tensor(0.0032)
epoch: 4 loss: tensor(0.0020)
epoch: 5 loss: tensor(0.0013)
epoch: 6 loss: tensor(0.0010)
epoch: 7 loss: tensor(0.0009)
epoch: 8 loss: tensor(0.0008)
epoch: 9 loss: tensor(0.0007)
epoch: 10 loss: tensor(0.0006)
epoch: 1 loss: tensor(0.0007)
epoch: 2 loss: tensor(0.0005)
epoch: 3 loss: tensor(0.0004)
epoch: 4 loss: tensor(0.0006)
epoch: 5 loss: tensor(0.0004)
epoch: 6 loss: tensor(0.0005)
epoch: 7 loss: tensor(0.0003)
epoch: 8 loss: tensor(0.0005)
epoch: 9 loss: tensor(0.0005)
epoch: 10 loss: tensor(0.0005)


## Testing the RBM


In [12]:
#testing the rbm on test data
test_loss = 0
s = 0.
for id_user in range(nb_users):
    v = training_set[id_user:id_user+1] #inputs
    vt = test_set[id_user:id_user+1] #targets
    if len(vt[vt>=0])>0:
        _,h = rbm.sample_h(v)
        _,v = rbm.sample_v(h)
        test_loss += torch.mean(torch.abs(vt[vt>=0]-v[vt>=0]))  
# USING RMSE for testing
        #test_loss += np.sqrt(torch.mean((vt[vt>=0]-v[vt>0])**2))
        s +=1
print('test loss: ' +str(test_loss/s))

# Test Loss Value should be approximately equal to the Train Loss Value

test loss: tensor(0.0004)
