In [1]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
cd drive/My Drive/AtoZ/Deep_Learning_A_Z/Boltzmann_Machines

/content/drive/My Drive/AtoZ/Deep_Learning_A_Z/Boltzmann_Machines


In [0]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.parallel
import torch.optim as optim
import torch.utils.data
from torch.autograd import Variable

In [0]:
#Importing dataset
movies = pd.read_csv('ml-1m/movies.dat', sep='::', header=None, engine='python', encoding = 'latin-1')
users = pd.read_csv('ml-1m/users.dat', sep='::', header=None, engine='python', encoding = 'latin-1')
ratings = pd.read_csv('ml-1m/ratings.dat', sep='::', header=None, engine='python', encoding = 'latin-1')

In [0]:
#prepare training set and test set
training_set = pd.read_csv('ml-100k/u1.base', delimiter='\t', header=None)
training_set = np.array(training_set, dtype='int')


test_set = pd.read_csv('ml-100k/u1.test', delimiter='\t', header=None)
test_set = np.array(test_set, dtype='int')

In [0]:
#insert number of users and movies
combined = np.append(training_set, test_set, axis=0)
nb_users = len(np.unique(combined[:,0])) #943
nb_movies = len(np.unique(combined[:,1])) #1682

In [0]:
def convert(data):
  new_data=[]
  for id_users in range(1, nb_users + 1):
    id_movies = data[:,1][data[:,0]==id_users]
    #print(id_movies)
    id_ratings = data[:, 2][data[:,0]==id_users]
    #print(id_ratings)
    ratings = np.zeros(nb_movies)
    ratings[id_movies-1] = id_ratings
    #print(ratings)
    new_data.append(list(ratings))
  return new_data

In [0]:
training_set = convert(training_set)
test_set = convert(test_set)


In [0]:
training_set = torch.FloatTensor(training_set)
test_set = torch.FloatTensor(test_set)

In [0]:
training_set[training_set == 0] = -1
training_set[training_set == 1] = 0
training_set[training_set == 2] = 0
training_set[training_set >= 3] = 1
test_set[test_set == 0] = -1
test_set[test_set == 1] = 0
test_set[test_set == 2] = 0
test_set[test_set >= 3] = 1

In [11]:
training_set

tensor([[ 1.,  1.,  1.,  ..., -1., -1., -1.],
        [ 1., -1., -1.,  ..., -1., -1., -1.],
        [-1., -1., -1.,  ..., -1., -1., -1.],
        ...,
        [ 1., -1., -1.,  ..., -1., -1., -1.],
        [-1., -1., -1.,  ..., -1., -1., -1.],
        [-1.,  1., -1.,  ..., -1., -1., -1.]])

In [0]:
class RBM():
  def __init__(self, nv, nh):
    self.W = torch.randn(nh,nv) #Vector of weights
    self.a = torch.randn(1,nh) #bias of the hidden nodes
    self.b = torch.randn(1,nv) #bias of the visible nodes
  
  def sample_h(self,x): #x - visible neurons
    wx = torch.mm(x, self.W.t())
    activation = wx + self.a.expand_as(wx)
    p_h_given_v = torch.sigmoid(activation)
    return p_h_given_v, torch.bernoulli(p_h_given_v)
    
  def sample_v(self,y): #y - hidden neurons
    wy = torch.mm(y, self.W)
    activation = wy + self.b.expand_as(wy)
    p_v_given_h = torch.sigmoid(activation)
    return p_v_given_h, torch.bernoulli(p_v_given_h)
  
  def train(self, v0, vk, ph0, phk):
    self.W += (torch.mm(v0.t(),ph0) - torch.mm(vk.t(),phk)).t()
    self.b += torch.sum((v0 - vk),0)
    self.a += torch.sum((ph0 - phk),0)

In [0]:
nv = len(training_set[0])
nh = 100
batch_size = 100

rbm = RBM(nv,nh)

In [14]:
nb_epoch = 10
for epoch in range(1, nb_epoch + 1):
  train_loss = 0
  s = 0.
  for id_user in range(0, nb_users - batch_size, batch_size):
    vk = training_set[id_user: id_user+batch_size]
    v0 = training_set[id_user: id_user+batch_size]
    ph0,_ = rbm.sample_h(v0)
    for k in range(10):
      _,hk = rbm.sample_h(vk)
      _,vk = rbm.sample_v(hk)
      vk[v0<0] = v0[v0<0]  
      
      #if id_user == 0:
        #print(vk)
    phk,_ = rbm.sample_h(vk)
    rbm.train(v0, vk, ph0, phk)
    train_loss += torch.mean(torch.abs(v0[v0>=0]-vk[v0>=0]))
    s+=1.
  print('epoch: '+str(epoch)+' loss: '+str(train_loss/s))

epoch: 1 loss: tensor(0.3468)
epoch: 2 loss: tensor(0.2510)
epoch: 3 loss: tensor(0.2478)
epoch: 4 loss: tensor(0.2476)
epoch: 5 loss: tensor(0.2487)
epoch: 6 loss: tensor(0.2477)
epoch: 7 loss: tensor(0.2542)
epoch: 8 loss: tensor(0.2454)
epoch: 9 loss: tensor(0.2449)
epoch: 10 loss: tensor(0.2455)


In [15]:
test_loss = 0
s = 0.
for id_user in range(nb_users):
  v = training_set[id_user: id_user+1]
  vt = test_set[id_user: id_user+1]
  if len(vt[vt>=0]) > 0:
    _,h = rbm.sample_h(v)
    _,v = rbm.sample_v(h)
    test_loss += torch.mean(torch.abs(vt[vt>=0]-v[vt>=0]))
  if id_user == 1:
    print("User : ")
    print(vt)
    print("Prediction : ")
    print(v)
  s+=1.
print('test loss: '+str(test_loss/s))

User : 
tensor([[-1., -1., -1.,  ..., -1., -1., -1.]])
Prediction : 
tensor([[1., 0., 0.,  ..., 0., 1., 0.]])
test loss: tensor(0.1247)
