<a href="https://colab.research.google.com/github/ivoryRabbit/RecSys/blob/master/RBM.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
import glob
import numpy as np
import pandas as pd
from typing import Callable, Tuple, List

from sklearn.model_selection import train_test_split

import tensorflow as tf

from tensorflow.data import Dataset
from tensorflow.keras.utils import get_file
import zipfile

In [3]:
def load_data(data_size: str) -> pd.DataFrame:
    ''' load Movie Lens data '''

    if data_size == '1m':
        fname = 'ml-1m.zip'
        data = 'ml-1m/ratings.dat'
    elif data_size == '10m':
        fname = 'ml-10m.zip'
        data = 'ml-10M100K/ratings.dat'
    elif data_size == '20m':
        fname = 'ml-20m.zip'
        data = 'ml-20m/ratings.csv'
    elif data_size == '25m':
        fname = 'ml-25m.zip'
        data = 'ml-25m/ratings.csv'
    if not glob.glob(data):
        origin = f'http://files.grouplens.org/datasets/movielens/{fname}'
        file = get_file(fname, origin)
        zip_ref = zipfile.ZipFile(file, 'r')
        zip_ref.extractall()

    col_names = ['userId', 'movieId', 'rating', 'timestamp']
    if data_size in ['20m', '25m']:
        ratings = pd.read_csv(data, engine = 'python')
    else:
        ratings = pd.read_csv(data, sep = '|', delimiter = '::', names = col_names, engine = 'python')
    print(ratings.shape)
    return ratings

In [4]:
ratings = load_data('1m')
ratings.head()

Downloading data from http://files.grouplens.org/datasets/movielens/ml-1m.zip
(1000209, 4)


Unnamed: 0,userId,movieId,rating,timestamp
0,1,1193,5,978300760
1,1,661,3,978302109
2,1,914,3,978301968
3,1,3408,4,978300275
4,1,2355,5,978824291


In [5]:
n_user = ratings.userId.nunique()
n_item = ratings.movieId.nunique()

print('# of user: ', n_user)
print('# of item: ', n_item)

# of user:  6040
# of item:  3706


In [6]:
user_rating = ratings.pivot(index = 'userId', columns = 'movieId', values = 'rating')
norm_user_rating = user_rating.fillna(0) / 5.0

X = norm_user_rating.values.astype(np.float32)

In [7]:
X_train, X_test = train_test_split(X, test_size = 0.2)

In [10]:
class RBM(tf.keras.Model):
    def __init__(
        self, 
        n_visible,
        n_hidden,
        learning_rate = 1e-2,
        momentum = 0.9,
        gamma = 1e-2,
        n_cd_step = 1
    ):
        super(RBM, self).__init__()

        self.n_visible = n_visible
        self.n_hidden = n_hidden
        self.learning_rate = learning_rate
        self.momentum = momentum
        self.gamma = gamma
        self.n_cd_step = n_cd_step

        self.W = tf.Variable(tf.random.normal(
            shape = (self.n_visible, self.n_hidden),
            mean = 0.0, 
            stddev = 1.0 / self.n_visible
        ))
        self.B_v = tf.Variable(tf.zeros(shape = (self.n_visible, )))
        self.B_h = tf.Variable(tf.zeros(shape = (self.n_hidden, )))

    def init_moment(self):
        self.dW = tf.zeros(shape = tf.shape(self.W))
        self.dB_v = tf.zeros(shape = tf.shape(self.B_v))
        self.dB_h = tf.zeros(shape = tf.shape(self.B_h))

    def get_hidden_prob(self, v):
        logit_h = tf.matmul(v, self.W) + self.B_h
        return tf.nn.sigmoid(logit_h)
    
    def get_visible_prob(self, h):
        logit_v = tf.matmul(h, tf.transpose(self.W)) + self.B_v
        return tf.nn.sigmoid(logit_v)

    def get_white_noise(self, shape):
        return tf.random.normal(shape, 0.0, 1e-2)
    
    def get_hidden_sample(self, v, std = 1.0):
        logit_h = tf.matmul(v, self.W) + self.B_h
        noise = self.get_white_noise(tf.shape(logit_h))
        return tf.nn.sigmoid(logit_h + std * noise)

    def get_visible_sample(self, h, std = 1.0):
        logit_v = tf.matmul(h, tf.transpose(self.W)) + self.B_v
        noise = self.get_white_noise(tf.shape(logit_v))
        return tf.nn.sigmoid(logit_v + std * noise)

    def energy(self, v, h):
        e1 = -tf.einsum('ni,ij,nj->n', v, self.W, h)
        e2 = -tf.einsum('i,ni->n', self.B_v, v)
        e3 = -tf.einsum('j,nj->n', self.B_h, h)
        return tf.reduce_mean(e1 + e2 + e3)

    def free_energy(self, v):
        e1 = -tf.einsum('i,ni->n', self.B_v, v)
        e2 = tf.math.log(1 + tf.math.exp(tf.matmul(v, self.W) + self.B_h))
        e2 = -tf.math.reduce_sum(e2, axis = -1)
        return (e1 + e2).numpy()

    def get_grad_energy(self, v, h):
        dW_dE = tf.einsum('ni,nj->ij', v, h)
        dB_v_dE = tf.math.reduce_sum(v, axis = 0)
        dB_h_dE = tf.math.reduce_sum(h, axis = 0)
        return (dW_dE, dB_v_dE, dB_h_dE)

    def gibbs_sampling(self, v):
        neg_v = tf.identity(v)
        for _ in range(self.n_cd_step):
            neg_h = self.get_hidden_sample(neg_v)
            neg_v = self.get_visible_sample(neg_h)
        return neg_v

    def train_one_step(self, pos_v): # Contrastive Divergence
        pos_h = self.get_hidden_prob(pos_v)
        neg_v = self.gibbs_sampling(pos_v)
        neg_h = self.get_hidden_prob(neg_v)

        pos_W, pos_B_v, pos_B_h = self.get_grad_energy(pos_v, pos_h)
        neg_W, neg_B_v, neg_B_h = self.get_grad_energy(neg_v, neg_h)

        lr = self.learning_rate / pos_v.shape[0]
        self.dW = self.momentum * self.dW \
            + lr * (pos_W - neg_W - self.gamma * self.W)
        self.dB_v = self.momentum * self.dB_v \
            + lr * (pos_B_v - neg_B_v - self.gamma * self.B_v)
        self.dB_h = self.momentum * self.dB_h \
            + lr * (pos_B_h - neg_B_h - self.gamma * self.B_h)

        # update
        self.W = self.W.assign_add(self.dW)
        self.B_v = self.B_v.assign_add(self.dB_v)
        self.B_h = self.B_h.assign_add(self.dB_h)

        score = tf.keras.losses.mse(
            self.free_energy(pos_v),
            self.free_energy(neg_v)
        )
        return tf.math.sqrt(score).numpy()

    def train(self, X, batch_size, epochs):
        X_train = Dataset.from_tensor_slices(X)
        for epoch in range(epochs):
            X_batch = X_train.shuffle(batch_size).batch(batch_size)
            
            self.init_moment()
            for batch in X_batch:
                score = self.train_one_step(batch)

            if (epoch+1) % 10 == 0:
                print(f'epoch: {epoch+1:>3}, score: {score:.3f}')

    def transform(self, v):
        return self.get_hidden_prob(v).numpy()

    def predict(self, v):
        h = self.get_hidden_prob(v)
        return self.get_visible_prob(h).numpy()

In [11]:
n_visible = n_item
n_hidden = 512
learning_rate = 1e-2

rbm = RBM(n_visible, n_hidden, learning_rate, gamma = 0.01)

In [12]:
%%time
batch_size = 258
epochs = 500

rbm.train(X_train, batch_size, epochs)

epoch:  10, score: 142.751
epoch:  20, score: 69.630
epoch:  30, score: 54.482
epoch:  40, score: 42.146
epoch:  50, score: 35.503
epoch:  60, score: 37.950
epoch:  70, score: 35.729
epoch:  80, score: 29.421
epoch:  90, score: 33.277
epoch: 100, score: 30.596
epoch: 110, score: 29.257
epoch: 120, score: 28.852
epoch: 130, score: 25.651
epoch: 140, score: 25.961
epoch: 150, score: 24.625
epoch: 160, score: 26.502
epoch: 170, score: 23.915
epoch: 180, score: 25.666
epoch: 190, score: 23.853
epoch: 200, score: 24.223
epoch: 210, score: 23.260
epoch: 220, score: 24.113
epoch: 230, score: 22.170
epoch: 240, score: 23.823
epoch: 250, score: 22.133
epoch: 260, score: 21.480
epoch: 270, score: 21.456
epoch: 280, score: 24.668
epoch: 290, score: 22.699
epoch: 300, score: 22.640
epoch: 310, score: 22.977
epoch: 320, score: 22.807
epoch: 330, score: 21.336
epoch: 340, score: 21.671
epoch: 350, score: 22.385
epoch: 360, score: 21.095
epoch: 370, score: 21.977
epoch: 380, score: 20.217
epoch: 390,

In [21]:
pred = rbm.predict(X_test)

rmse = np.mean(np.sqrt(np.mean(np.square(X_test - pred), axis = -1)))
print(rmse)

0.085405126
