In [2]:
import tensorflow as tf
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from itertools import islice

## Initial datasets

In [3]:
movies_df = pd.read_csv('./ml-latest-small/movies.csv')
print(movies_df)
len(movies_df)

      movieId                                      title  \
0           1                           Toy Story (1995)   
1           2                             Jumanji (1995)   
2           3                    Grumpier Old Men (1995)   
3           4                   Waiting to Exhale (1995)   
4           5         Father of the Bride Part II (1995)   
...       ...                                        ...   
9737   193581  Black Butler: Book of the Atlantic (2017)   
9738   193583               No Game No Life: Zero (2017)   
9739   193585                               Flint (2017)   
9740   193587        Bungo Stray Dogs: Dead Apple (2018)   
9741   193609        Andrew Dice Clay: Dice Rules (1991)   

                                           genres  
0     Adventure|Animation|Children|Comedy|Fantasy  
1                      Adventure|Children|Fantasy  
2                                  Comedy|Romance  
3                            Comedy|Drama|Romance  
4                  

9742

In [4]:
ratings_df = pd.read_csv('./ml-latest-small/ratings.csv')
ratings_df

Unnamed: 0,userId,movieId,rating,timestamp
0,1,1,4.0,964982703
1,1,3,4.0,964981247
2,1,6,4.0,964982224
3,1,47,5.0,964983815
4,1,50,5.0,964982931
...,...,...,...,...
100831,610,166534,4.0,1493848402
100832,610,168248,5.0,1493850091
100833,610,168250,5.0,1494273047
100834,610,168252,5.0,1493846352


In [5]:
# userId as rows and movieId as columns
user_rating_df = ratings_df.pivot(index='userId', columns='movieId', values='rating')

# Normalizing
norm_user_rating_df = user_rating_df.fillna(0) / 5.0
trX = norm_user_rating_df.values
print(len(trX))

610


array([[0.8, 0. , 0.8, ..., 0. , 0. , 0. ],
       [0. , 0. , 0. , ..., 0. , 0. , 0. ],
       [0. , 0. , 0. , ..., 0. , 0. , 0. ],
       ...,
       [0. , 0.8, 0. , ..., 0. , 0. , 0. ],
       [0. , 0. , 0. , ..., 0. , 0. , 0. ],
       [0. , 0. , 0. , ..., 0. , 0. , 0. ]])

In [6]:
hiddenUnits = 20 # need to test tbh
visibleUnits = len(user_rating_df.columns)
vb = tf.Variable(tf.zeros([visibleUnits]), tf.float32) # visible layers are the movies
hb = tf.Variable(tf.zeros([hiddenUnits]), tf.float32) # number of features/hidden layers
W = tf.Variable(tf.zeros([visibleUnits, hiddenUnits]), tf.float32)

v0 = tf.zeros([visibleUnits], tf.float32)
tf.matmul([v0], W)

<tf.Tensor: shape=(1, 20), dtype=float32, numpy=
array([[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0.]], dtype=float32)>

In [7]:
def hidden_layer(v0_state, W, hb):
    h0_prob = tf.nn.sigmoid(tf.matmul([v0_state], W) + hb) # the stochastic update rule
    h0_state = tf.nn.relu(tf.sign(h0_prob - tf.random.uniform(tf.shape(h0_prob))))
    return h0_state

h0 = hidden_layer(v0, W, hb)

def reconstruction(h0, W, vb):
    v1_prob = tf.nn.sigmoid(tf.matmul(h0, tf.transpose(W)) + vb) 
    v1_state = tf.nn.relu(tf.sign(v1_prob - tf.random.uniform(tf.shape(v1_prob)))) 
    return v1_state[0]

v1 = reconstruction(h0, W, vb)

In [8]:
def error(v0_state, v1_state):
    return tf.reduce_mean(tf.square(v0_state - v1_state))

err = tf.reduce_mean(tf.square(v0 - v1))
print("error" , err.numpy())

error 0.4975319


In [9]:
epochs = 5
batchsize = 100
errors = []
weights = []
K=1
alpha = 0.1

In [None]:
#creating datasets
train_ds = tf.data.Dataset.from_tensor_slices((np.float32(trX))).batch(batchsize)

for epoch in range(epochs):
    batch_number = 0
    for batch_x in train_ds:

        for i_sample in range(len(batch_x)):           
            for k in range(K):
                # positive phase
                v0_state = batch_x[i_sample]
                h0_state = hidden_layer(v0_state, W, hb)

                # negative phase
                v1_state = reconstruction(h0_state, W, vb)
                h1_state = hidden_layer(v1_state, W, hb)

                delta_W = tf.matmul(tf.transpose([v0_state]), h0_state) - tf.matmul(tf.transpose([v1_state]), h1_state)
                W = W + alpha * delta_W

                vb = vb + alpha * tf.reduce_mean(v0_state - v1_state, 0)
                hb = hb + alpha * tf.reduce_mean(h0_state - h1_state, 0) 

                v0_state = v1_state

            if i_sample == len(batch_x)-1:
                err = error(batch_x[i_sample], v1_state)
                errors.append(err)
                weights.append(W)
                print(f"Epoch: {epoch + 1}, batch #: {batch_number} of {int(len(trX) / batchsize)}, sample #: {i_sample}, reconstruction error: {err}")

        batch_number += 1

plt.plot(errors)
plt.ylabel('Error')
plt.xlabel('Epoch')
plt.show()

Epoch: 1, batch #: 0 of 6, sample #: 99, reconstruction error: 0.023741258308291435
Epoch: 1, batch #: 1 of 6, sample #: 99, reconstruction error: 0.03888215124607086
Epoch: 1, batch #: 2 of 6, sample #: 99, reconstruction error: 0.005447346717119217
Epoch: 1, batch #: 3 of 6, sample #: 99, reconstruction error: 0.004874536767601967


## Testing

In [17]:
user_id = 400

# Reshaping for 2D
inputUser = trX[user_id].reshape(1, -1)

inputUser = tf.convert_to_tensor(trX[user_id],"float32")

v0test = tf.zeros([visibleUnits], tf.float32)
h0test = tf.nn.sigmoid(tf.matmul([inputUser], W) + hb)

v1test = tf.nn.sigmoid(tf.matmul(h0test, tf.transpose(W)) + vb)

rec = v1test

tf.maximum(rec,1)

scored_movies_df = movies_df[movies_df['movieId'].isin(user_rating_df.columns)]
scored_movies_df = scored_movies_df.assign(recommendation = rec[0])

user_movies_df = ratings_df[ratings_df['userId'] == user_id]
merged_df = scored_movies_df.merge(user_movies_df, on='movieId', how='outer')
merged_df.sort_values(["recommendation"], ascending=False).head(20)

Unnamed: 0,movieId,title,genres,recommendation,userId,rating,timestamp
3563,4886,"Monsters, Inc. (2001)",Adventure|Animation|Children|Comedy|Fantasy,0.955603,,,
8677,122918,Guardians of the Galaxy 2 (2017),Action|Adventure|Sci-Fi,0.9357,,,
7022,68954,Up (2009),Adventure|Animation|Children|Drama,0.930065,,,
7449,81845,"King's Speech, The (2010)",Drama,0.922048,,,
6755,60069,WALL·E (2008),Adventure|Animation|Children|Romance|Sci-Fi,0.91727,,,
6388,50872,Ratatouille (2007),Animation|Children|Drama,0.875055,,,
7195,72998,Avatar (2009),Action|Adventure|Sci-Fi|IMAX,0.833704,,,
314,356,Forrest Gump (1994),Comedy|Drama|Romance|War,0.828228,400.0,4.5,1498870000.0
7285,76093,How to Train Your Dragon (2010),Adventure|Animation|Children|Fantasy|IMAX,0.80244,400.0,4.5,1498870000.0
3189,4306,Shrek (2001),Adventure|Animation|Children|Comedy|Fantasy|Ro...,0.778695,,,
