In [None]:
import pandas as pd
import tensorflow as tf
from tensorflow import Variable

In [None]:
movie = pd.read_csv("movie.csv")
rating = pd.read_csv("rating.csv")

In [None]:
movie.head()

Unnamed: 0,movieId,title,genres
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,2,Jumanji (1995),Adventure|Children|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama|Romance
4,5,Father of the Bride Part II (1995),Comedy


In [None]:
movie.info

<bound method DataFrame.info of        movieId                               title  \
0            1                    Toy Story (1995)   
1            2                      Jumanji (1995)   
2            3             Grumpier Old Men (1995)   
3            4            Waiting to Exhale (1995)   
4            5  Father of the Bride Part II (1995)   
...        ...                                 ...   
27273   131254        Kein Bund für's Leben (2007)   
27274   131256       Feuer, Eis & Dosenbier (2002)   
27275   131258                  The Pirates (2014)   
27276   131260                 Rentun Ruusu (2001)   
27277   131262                    Innocence (2014)   

                                            genres  
0      Adventure|Animation|Children|Comedy|Fantasy  
1                       Adventure|Children|Fantasy  
2                                   Comedy|Romance  
3                             Comedy|Drama|Romance  
4                                           Comedy  
.

In [None]:
# drop genres column
movie.drop(['genres'],axis=1,inplace=True)

In [None]:
movie.head()

Unnamed: 0,movieId,title
0,1,Toy Story (1995)
1,2,Jumanji (1995)
2,3,Grumpier Old Men (1995)
3,4,Waiting to Exhale (1995)
4,5,Father of the Bride Part II (1995)


In [None]:
rating.columns

Index(['userId', 'movieId', 'rating', 'timestamp'], dtype='object')

In [None]:
# we need user id, movie id and rating
rating.drop(['timestamp'],axis=1,inplace=True)
rating.head()

Unnamed: 0,userId,movieId,rating
0,1,2,3.5
1,1,29,3.5
2,1,32,3.5
3,1,47,3.5
4,1,50,3.5


In [None]:
df = pd.merge(movie,rating)

In [None]:
# DataFrame is too big, causing overflow. we are going to take 1M rows

df = df.iloc[:1000000]

In [None]:
# merge movie and rating 
user_rating = df.pivot(index='userId', columns='movieId', values='rating')

In [None]:
# normalizing data
norm_user_rating = user_rating.fillna(0) / 5.0
trX= norm_user_rating.values
trX[0:5]

array([[0. , 0.7, 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. ,
        0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. ,
        0. , 0. , 0.7, 0. , 0. , 0.7, 0. , 0. , 0. , 0. , 0. , 0. , 0. ,
        0. , 0. , 0. , 0. , 0. , 0. , 0. , 0.7, 0. , 0. , 0.7, 0. , 0. ,
        0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. ,
        0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. ,
        0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. ,
        0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. ,
        0. , 0. , 0. , 0. , 0. , 0. , 0.7, 0. , 0. , 0. , 0. , 0. , 0. ,
        0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. ,
        0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. ,
        0. , 0. , 0. ],
       [0. , 0. , 0.8, 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. ,
        0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. ,
        0. , 0. , 0. , 0. ,

In [None]:
df.head()

Unnamed: 0,movieId,title,userId,rating
0,1,Toy Story (1995),3,4.0
1,1,Toy Story (1995),6,5.0
2,1,Toy Story (1995),8,4.0
3,1,Toy Story (1995),10,4.0
4,1,Toy Story (1995),11,4.5


In [None]:
# setting the parameters for the hidden and visible units 

hiddenUnits = 20
visibleUnits =len(user_rating.columns)

vb = tf.Variable(tf.zeros([visibleUnits]), tf.float32) #unique movies
hb = tf.Variable(tf.zeros([hiddenUnits]), tf.float32) #features we're going to learn
w = tf.Variable(tf.zeros([visibleUnits, hiddenUnits], tf.float32))

In [None]:
v0 = tf.zeros([visibleUnits], tf.float32)

#testing to see if the matrix product works
tf.matmul([v0], w)

<tf.Tensor: shape=(1, 20), dtype=float32, numpy=
array([[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0.]], dtype=float32)>

In [None]:
#phase 1: input processing 

def hidden_layer(v0_state, w, hb):
    
    '''
    Function that only returns the generated hidden states
    '''
    
    h0_prob = tf.nn.sigmoid(tf.matmul([v0_state], w)+ hb) #probabilities of the hidden units
    h0_state = tf.nn.relu(tf.sign(h0_prob -tf.random.uniform(tf.shape(h0_prob)))) # sample_h_given_x
    
    return h0_state 

#printing output of zeros input

h0 = hidden_layer(v0, w, hb)
print("first 15 hidden states: ", h0[0][0:15])

first 15 hidden states:  tf.Tensor([0. 0. 1. 0. 0. 1. 0. 1. 1. 0. 0. 1. 1. 0. 0.], shape=(15,), dtype=float32)


In [None]:
def reconstructed_output(h0_state, w, vb):
    v1_prob = tf.nn.sigmoid(tf.matmul(h0_state, tf.transpose(w)) + vb)
    v1_state =  tf.nn.relu(tf.sign(v1_prob -tf.random.uniform(tf.shape(v1_prob)))) #sample_v_given_h
    return v1_state[0]

v1 = reconstructed_output(h0, w, vb)
print("hidden state shape: ", h0.shape)
print("v0 state shape: ", v0.shape)
print("v1 state shape: ", v1.shape)

hidden state shape:  (1, 20)
v0 state shape:  (146,)
v1 state shape:  (146,)


In [None]:
# train the model

epochs = 15
batchsize = 100
errors = []
weights = []
K=1
alpha = 0.1

In [None]:
# CONTINUE...