### RBM
recommender system based on Restricted Boltzmann Machine architecture implemented with python.
Model predicts binary value wheter user will like (1) or not like (0) a movie. To generate test data data_preprocessing notebook shall be exected first.

In [3]:
import numpy as np

In [5]:
training_data_original = np.loadtxt('training_set_flat.csv', delimiter=',')

In [155]:
np.shape(training_data_original)

(6040, 3952)

In [8]:
test_data_original = np.loadtxt('test_set_flat.csv', delimiter=',')

In [156]:
np.shape(test_data_original)

(6040, 3952)

In [9]:
def convert_to_binary(data_set):
    data_set[data_set == 2] = 0
    data_set[data_set >= 3] = 1
    return data_set

In [10]:
training_data = convert_to_binary(training_data_original)
training_data[0]

array([ 1., -1., -1., ..., -1., -1., -1.])

In [11]:
test_data = convert_to_binary(test_data_original)
test_data[0]

array([ 1., -1., -1., ..., -1., -1., -1.])

In [160]:
class RBM():
    def __init__(self, number_visible_nodes, number_hidden_nodes):
        # Weights W (visible*hidden) => (rows*columns)
        self.W = np.random.randn(number_visible_nodes, number_hidden_nodes)
        # print('W shape: '+ str(np.shape(self.W)))
        # print('W [0]: '+ str(self.W[0]))
        # hidden_bias (1*hidden )
        self.hidden_bias = np.random.randn(1, number_hidden_nodes)
        # print('hidden_bias shape: '+ str(np.shape(self.hidden_bias)))
        # print('hidden_bias [0]: '+ str(self.hidden_bias[0]))
        # visible_bias (1*visible)
        self.visible_bias = np.random.randn(1, number_visible_nodes)
        # print('visible_bias shape: '+ str(np.shape(self.visible_bias)))
        # print('visible_bias [0]: '+ str(self.visible_bias[0]))
        
    def sample_hidden_nodes(self, visible_nodes):
        # visible_nodes (batch_size*v_nodes), W (v_nodes*h_nodes), hidden_nodes (batch_size*h_nodes)
        hidden_nodes = np.dot(visible_nodes, self.W) 
        # print('hidden_nodes shape: '+ str(np.shape(hidden_nodes)))
        # weights plus biases: hidden_nodes (batch_size, h_nodes) + hidden_bias (1, h_nodes)
        activation_hidden = hidden_nodes + self.hidden_bias # add biases to hidden nodes values
        # print('activation_hidden shape: '+ str(np.shape(activation_hidden)))
        
        # apply sigmoid function to get hidden nodes values between 0 and 1, p_hidden_given_visible (batch_size*h_nodes)
        p_hidden_given_visible = 1/(1 + np.exp(-activation_hidden))
        # print('p_hidden_given_visible shape: '+ str(np.shape(p_hidden_given_visible)))
        
        rows = p_hidden_given_visible.shape[0]
        columns = p_hidden_given_visible.shape[1]
        # convert probabilities to 0/1 with respoct to bernouli distribution
        hidden_given_visible = np.random.binomial(1, p_hidden_given_visible, size=[rows,columns])
        # print('hidden_given_visible shape: '+ str(np.shape(hidden_given_visible)))
        # print('hidden_given_visible[0]: '+ str(hidden_given_visible[0]))
        
        return p_hidden_given_visible, hidden_given_visible
        
    def sample_visible_nodes(self, hidden_nodes):
        # hidden_nodes (batch_size*h_nodes), np.transpose(W) (h_nodes*v_nodes), visible_nodes (batch_size*v_nodes)
        visible_nodes = np.dot(hidden_nodes, np.transpose(self.W))
        
        activation_visible = visible_nodes + self.visible_bias
        
        p_visible_given_hidden = 1/(1 + np.exp(-activation_visible))
        
        rows = p_visible_given_hidden.shape[0]
        columns = p_visible_given_hidden.shape[1]
        visible_given_hidden = np.random.binomial(1, p_visible_given_hidden, size=[rows,columns])
        # print('visible_given_hidden shape: '+ str(np.shape(visible_given_hidden)))
        # print('visible_given_hidden[0]: '+ str(visible_given_hidden[0]))
        
        return p_visible_given_hidden, visible_given_hidden
    
    def train(self, original_v, current_v, p_original_h, p_current_h, learning_rate):
        
        # np.transpose(original_v) (v_nodes*batch_size), p_original_v (batch_size*h_nodes), diff_original (v_nodes*h_nodes)
        dot_original = np.dot(np.transpose(original_v), p_original_h)
        # print('dot_original shape: '+ str(np.shape(dot_original)))
        # diff_current (v_nodes*h_nodes)
        dot_current = np.dot(np.transpose(current_v), p_current_h)
        # print('dot_current shape: '+ str(np.shape(dot_current)))
        
        # print('initial W shape: '+ str(np.shape(self.W)))
        self.W +=  learning_rate * (dot_original - dot_current)
        # print('updated W shape: '+ str(np.shape(self.W)))
        
        self.visible_bias += np.sum(original_v - current_v)
        # print('updated visible_bias shape: '+ str(np.shape(self.visible_bias)))
        self.hidden_bias += np.sum(p_original_h - p_current_h)
        # print('updated hidden_bias shape: '+ str(np.shape(self.hidden_bias)))

In [164]:
number_visible_nodes = len(training_data[0])
number_hidden_nodes = 100
batch_size = 250
learning_rate= 0.1
nb_epoch = 5
rbm = RBM(number_visible_nodes, number_hidden_nodes)

In [165]:
for epoch in range(nb_epoch):
    train_loss = 0
    s = 0
    for n_user in range(0, len(training_data), batch_size):
        # visible nodes are the actual ratings
        current_v = training_data[n_user:n_user+batch_size] # this one will be updated => reconstructed nodes
        original_v = training_data[n_user:n_user+batch_size] # this is used to calculate error of reconstructed nodes
        # hidden nodes after 1 sampling (before learning), with values that are between 0 and 1 (probability of being activated)
        p_original_h,_ = rbm.sample_hidden_nodes(original_v)
        
        # Markov chain Monte Carlo (MCMC) algorithm
        for k in range(5):
            # run forward - sample hidden nodes
            _,current_h = rbm.sample_hidden_nodes(current_v)
            # run backward - reconstruct visible nodes
            _,current_v = rbm.sample_visible_nodes(current_h)
            
            # revert to -1 values that were not rated originally
            current_v[original_v<0] = original_v[original_v<0]
            # print('current_v[0]: '+ str(current_v[0]))
            # print('original_v[0]: '+ str(original_v[0]))
            
        # run forward, get predicted probabilities of hidden node to be activated (between 0 and 1)
        p_current_h,_ = rbm.sample_hidden_nodes(current_v)
        
        # train the model - reduce difference between generated and original visible nodes
        rbm.train(original_v, current_v, p_original_h, p_current_h, learning_rate)
        # calculate diff between original and recretaed node values for current batch
        diff_original_current = np.mean(np.absolute(original_v[original_v>=0] - current_v[original_v>=0]))
        # update train loss per epoch
        train_loss += diff_original_current
        s += 1
        #print('epoch: '+str(epoch +1)+' batch: ' + str(s) +' current loss: '+str(diff_original_current))
    #print('current_v[0]: '+ str(current_v[0]))
    #print('original_v[0]: '+ str(original_v[0]))
    print('epoch: '+str(epoch +1)+' loss: '+str(train_loss/s))

epoch: 1 loss: 0.18295419937161095
epoch: 2 loss: 0.2022736752272624
epoch: 3 loss: 0.16836269168209206
epoch: 4 loss: 0.20274841976666752
epoch: 5 loss: 0.20108287342494016


In [166]:
test_loss = 0
for user in range(len(training_data)):
    v_nodes_train = training_data[user:user+1]
    v_nodes_test = test_data[user:user+1]
    # create hidden nodes based on visible nodes for given user from training set
    _,hidden_nodes = rbm.sample_hidden_nodes(v_nodes_train)
    # reconstruct visible nodes based on calculated hidden nodes
    _,reconstructed_visible_nodes = rbm.sample_visible_nodes(hidden_nodes)
    # compare if model guessed correctly if user will like the movies based on train set data
    # (some movies rated by same user are in train set, other are in test set)
    # calculate loss for each user and devide by number of users
    test_loss += np.mean(np.absolute(v_nodes_test[v_nodes_test>=0] - reconstructed_visible_nodes[v_nodes_test>=0]))
    s += 1.
print('test loss: '+str(test_loss/s))

test loss: 0.09149395531496649
