# Restricted Bolztman Machines (RBM)
Recommendations based on Deep Learning.

In [1]:
import os
import csv
import sys
import re

from surprise import Dataset
from surprise import Reader

from collections import defaultdict
import numpy as np

In [2]:
# define some paths to download data
ratingsPath = "../data/ml-latest-small/ratings.csv"
moviesPath = "../data/ml-latest-small/movies.csv"

# define reader instance to download data
reader = Reader(line_format='user item rating timestamp', sep=',', skip_lines=1)
# download dataset to path
ratingsDataset = Dataset.load_from_file(ratingsPath, reader=reader)

# now parse movies dataset
movieID_to_name = {}
name_to_movieID = {}
with open(moviesPath, newline='', encoding='ISO-8859-1') as csvfile:
    movieReader = csv.reader(csvfile)
    next(movieReader)
    for row in movieReader:
        movieID = int(row[0])
        movieName = row[1]
        movieID_to_name[movieID] = movieName
        name_to_movieID[movieName] = movieID

In [3]:
# get user ratings from ratingsDataset based on user
def getUserRatings(user):
    userRatings = []
    hitUser = False
    with open(ratingsPath, newline='') as csvfile:
        ratingReader = csv.reader(csvfile)
        next(ratingReader)
        for row in ratingReader:
            userID = int(row[0])
            if (user == userID):
                movieID = int(row[1])
                rating = float(row[2])
                userRatings.append((movieID, rating))
                hitUser = True
            if (hitUser and (user != userID)):
                break

    return userRatings

In [4]:
# now it is important to get popularity ranks to get some metrics
ratings = defaultdict(int)
rankings = defaultdict(int)
with open(ratingsPath, newline='') as csvfile:
    ratingReader = csv.reader(csvfile)
    next(ratingReader)
    for row in ratingReader:
        movieID = int(row[1])
        ratings[movieID] += 1
    rank = 1
    for movieID, ratingCount in sorted(ratings.items(), key=lambda x: x[1], reverse=True):
        rankings[movieID] = rank
        rank +=1

## Define the RBM arquitecture

In [5]:
import numpy as np
import tensorflow as tf

In [6]:
print("Tensorflow version: ", tf.__version__)

Tensorflow version:  2.17.0


In [7]:
class RBM(object):
    def __init__(
            self,
            visibleDimensions,
            epochs=20,
            hiddenDimensions=50,
            ratingValues=10,
            learningRate=0.001,
            batchSize=100
            ):
        
       self.visibleDimensions = visibleDimensions
       self.epochs = epochs
       self.hiddenDimensions = hiddenDimensions
       self.ratingValues = ratingValues
       self.learningRate = learningRate
       self.batchSize = batchSize

    def Train(self, X):
        # init weights as done in original paper
        maxWeight = -4.0 * np.sqrt(6.0/(self.hiddenDimensions + self.visibleDimensions))
        self.weights = tf.Variable(
            tf.random.uniform([self.visibleDimensions, self.hiddenDimensions],minval=-maxWeight, maxval=maxWeight),
            tf.float32,
            name='weights'
        )
        self.hiddenBias = tf.Variable(
            tf.zeros([self.hiddenDimensions]),
            tf.float32,
            name='hiddenBias'
        )
        self.visibleBias = tf.Variable(
            tf.zeros([self.visibleDimensions]),
            tf.float32,
            name='visibleBias'
        )

        for epoch in range(self.epochs):
            trX = np.array(X)
            for i in range(0, trX.shape[0], self.batchSize):
                epochX = trX[i:i+self.batchSize]
                self.MakeGraph(epochX)
            print("Trained epoch ", epoch)
    
    def GetRecommendations(self, inputUser):
        feed = self.MakeHidden(inputUser)
        rec = self.MakeVisible(feed)
        return rec[0]
    
    def MakeGraph(self, inputUser):
        """
        Performs Gibbs sampling for contrastive divergence. The paper it is assuime k=1 of iterating
        over the forward pass multiple times since it seems to work just fine
        """
        # ----------- Forward pass ----------- #
        # get tensor of hidden probabilities
        hProb0 = tf.nn.sigmoid(tf.matmul(inputUser, self.weights) + self.hiddenBias)
        # sample from all distributions
        hSample = tf.nn.relu(tf.sign(hProb0 - tf.random.uniform(tf.shape(hProb0))))
        # Stitch it toguether
        forward = tf.matmul(tf.transpose(inputUser), hSample)

        # ----------- Backward pass ----------- #
        # reconstruct visible layer given hidden layer sample
        v = tf.matmul(hSample, tf.transpose(self.weights)) + self.visibleBias
        # build up mask for hidden ratings
        vMask = tf.sign(inputUser)
        VMask3D = tf.reshape(vMask, [tf.shape(v)[0], -1, self.ratingValues]) # reshape of individual ratings
        VMask3D = tf.reduce_max(VMask3D, axis=[2], keepdims=True) # ensure 1 for exiting ratings and 0 for missing ones

        # extract rating vectors for each individual
        v = tf.reshape(v, [tf.shape(v)[0], -1, self.ratingValues])
        vProb = tf.nn.softmax(v*VMask3D)
        vProb = tf.reshape(vProb, [tf.shape(v)[0], -1])
        hProb1 = tf.nn.sigmoid(tf.matmul(vProb, self.weights) + self.hiddenBias)
        backward = tf.matmul(tf.transpose(vProb), hProb1)

        # ----------- Run passes ----------- #
        weighupdate = self.weights.assign_add(self.learningRate * (forward - backward))
        hiddenBiasUpdate = self.hiddenBias.assign_add(self.learningRate * tf.reduce_mean(hProb0 - hProb1, 0))
        visibleBiasUpdate = self.visibleBias.assign_add(self.learningRate * tf.reduce_mean(inputUser - vProb, 0))

        self.update = [weighupdate, hiddenBiasUpdate, visibleBiasUpdate]

    def MakeHidden(self, inputUser):
        hidden = tf.nn.sigmoid(tf.matmul(inputUser, self.weights) + self.hiddenBias)
        self.MakeGraph(inputUser)
        return hidden
    
    def MakeVisible(self, feed):
        visible = tf.nn.sigmoid(tf.matmul(feed, tf.transpose(self.weights)) + self.visibleBias)
        return visible

### Define RBM wrapper with surprise

In [8]:
from surprise import AlgoBase
from surprise import PredictionImpossible

In [9]:
class RBMAlgorithm(AlgoBase):
    def __init__(self, epochs=20, hiddenDim=100, learningRate=0.001, batchSize=100, sim_options={}):
        AlgoBase.__init__(self)
        self.epochs = epochs
        self.hiddenDim = hiddenDim
        self.learningRate = learningRate
        self.batchSize = batchSize

    def softmax(self, x):
        return np.exp(x) / np.sum(np.exp(x), axis=0)

    def fit(self, trainset):
        AlgoBase.fit(self, trainset)

        numUsers = trainset.n_users
        numItems = trainset.n_items

        trainingMatrix = np.zeros([numUsers, numItems, 10], dtype=np.float32)
        for (uid, iid, rating) in trainset.all_ratings():
            adjustRating = int(float(rating)*2.0) - 1
            trainingMatrix[int(uid), int(iid), adjustRating] = 1
        
        trainingMatrix = np.reshape(trainingMatrix, [trainingMatrix.shape[0], -1])

        # create RBM with (n_items * rating_values) visible nodes
        rbm = RBM(
            trainingMatrix.shape[1],
            hiddenDimensions=self.hiddenDim,
            learningRate=self.learningRate,
            batchSize=self.batchSize,
            epochs=self.epochs
        )
        rbm.Train(trainingMatrix)
        
        self.predictedRatings = np.zeros([numUsers, numItems], dtype=np.float32)
        for uiid in range(trainset.n_users):
            if (uiid % 50 == 0):
                print("Processing user ", uiid)
            recs = rbm.GetRecommendations([trainingMatrix[uiid]])
            recs = np.reshape(recs, [numItems, 10])
            # take a normalized rating using it as epectation (prediction)
            for itemID, rec in enumerate(recs):
                normalized = self.softmax(rec)
                rating = np.average(np.arange(10), weights=normalized)
                self.predictedRatings[uiid, itemID] = (rating + 1) * 0.5
        
        return self
    
    def estimate(self, u, i):
        if not (self.trainset.knows_user(u) and self.trainset.knows_item(i)):
            raise PredictionImpossible('User and/or item is unknown')
        
        rating = self.predictedRatings[u, i]
        if (rating < 0.001):
            raise PredictionImpossible('No valid prediction exists')
        
        return rating


## Get some recomendations based on RBM

In [10]:
import sys
sys.path.append('..')
from Framework.EvaluationData import EvaluationData
from Framework.RecommenderMetrics import RecommenderMetrics
import random

In [11]:
np.random.seed(0)
random.seed(0)

In [12]:
# define evaluation data
# get evaluation data
evaluationData = EvaluationData(ratingsDataset, rankings)
# define RBM instance
rbm = RBMAlgorithm(epochs=20)
rbm.fit(ratingsDataset.build_full_trainset())
predictions = rbm.test(evaluationData.GetTestSet())
print("RMSE SVD", RecommenderMetrics.RMSE(predictions))
print("MAE SVD", RecommenderMetrics.MAE(predictions))
print()

Estimating biases using als...
Computing the cosine similarity matrix...
Done computing similarity matrix.
Trained epoch  0
Trained epoch  1
Trained epoch  2
Trained epoch  3
Trained epoch  4
Trained epoch  5
Trained epoch  6
Trained epoch  7
Trained epoch  8
Trained epoch  9
Trained epoch  10
Trained epoch  11
Trained epoch  12
Trained epoch  13
Trained epoch  14
Trained epoch  15
Trained epoch  16
Trained epoch  17
Trained epoch  18
Trained epoch  19
Processing user  0
Processing user  50
Processing user  100
Processing user  150
Processing user  200
Processing user  250
Processing user  300
Processing user  350
Processing user  400
Processing user  450
Processing user  500
Processing user  550
Processing user  600
RMSE SVD 1.150206709882802
MAE SVD 0.956557487078616



In [13]:
# funtion to ger movie name based on movie ID
def getMovieName(movieID):
    if movieID in movieID_to_name:
        return movieID_to_name[movieID]
    else:
        return ""

In [16]:
# see recomendations
# let's see some recommendations
testSubject = 85
k = 10

trainSet = evaluationData.GetFullTrainSet()
testSet = evaluationData.GetAntiTestSetForUser(testSubject)

predictions = rbm.test(testSet)
recommendations = []
for userID, movieID, actualRating, EstimatedRating, _ in predictions:
    intMovieID = int(movieID)
    recommendations.append((intMovieID, EstimatedRating))

recommendations.sort(key=lambda x: x[1], reverse=True)

print("#"*10, "RBM recommendations", "#"*10)
for ratings in recommendations[:k]:
    print(getMovieName(ratings[0]), ratings[1])
print()

########## RBM recommendations ##########
Lives of Others, The (Das leben der Anderen) (2006) 3.2998502
Limitless (2011) 3.2895017
Logan (2017) 3.2853334
Maltese Falcon, The (1941) 3.2836244
Hustler, The (1961) 3.2824562
Captain Phillips (2013) 3.2796102
Boot, Das (Boat, The) (1981) 3.278512
Harold and Maude (1971) 3.2742562
Untitled Spider-Man Reboot (2017) 3.2712727
Touch of Evil (1958) 3.2711432



## Tunning RBM

In [17]:
from surprise.model_selection import GridSearchCV

In [18]:
# define a param grid
param_grid = {'hiddenDim': [20, 10], 'learningRate': [0.1, 0.01]}
gs = GridSearchCV(RBMAlgorithm, param_grid, measures=['rmse', 'mae'], cv=3)
gs.fit(ratingsDataset)

Trained epoch  0
Trained epoch  1
Trained epoch  2
Trained epoch  3
Trained epoch  4
Trained epoch  5
Trained epoch  6
Trained epoch  7
Trained epoch  8
Trained epoch  9
Trained epoch  10
Trained epoch  11
Trained epoch  12
Trained epoch  13
Trained epoch  14
Trained epoch  15
Trained epoch  16
Trained epoch  17
Trained epoch  18
Trained epoch  19
Processing user  0
Processing user  50
Processing user  100
Processing user  150
Processing user  200
Processing user  250
Processing user  300
Processing user  350
Processing user  400
Processing user  450
Processing user  500
Processing user  550
Processing user  600
Trained epoch  0
Trained epoch  1
Trained epoch  2
Trained epoch  3
Trained epoch  4
Trained epoch  5
Trained epoch  6
Trained epoch  7
Trained epoch  8
Trained epoch  9
Trained epoch  10
Trained epoch  11
Trained epoch  12
Trained epoch  13
Trained epoch  14
Trained epoch  15
Trained epoch  16
Trained epoch  17
Trained epoch  18
Trained epoch  19
Processing user  0
Processing 

In [19]:
# now explore best metrics and params
print("Best RMSE score attained: ", gs.best_score['rmse'])
print(gs.best_params['rmse'])

Best RMSE score attained:  1.1342662605735316
{'hiddenDim': 10, 'learningRate': 0.1}
