The code for this recommender system was adapted from: https://www.youtube.com/watch?v=V2h3IOBDvrA&feature=youtu.be&list=PLfYUBJiXbdtS2UQRzyrxmyVHoGW0gmLSM&t=6147

In [14]:
import tensorflow as tf
import numpy as np
import pandas as pd
import sys
from tf_print import tf_print

from tensorflow.keras.layers import ( Input, Embedding, Flatten, Dot, Add, Concatenate, 
        Dense, Dropout, Activation, Multiply )
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.regularizers import l2
from tensorflow.keras.activations import sigmoid
from tensorflow.keras.initializers import RandomUniform
from tensorflow.metrics import mean_squared_error

In [15]:
ratings = pd.read_csv('ml-100k/ratings.csv', sep=',',encoding='latin-1')
ratings.head()

Unnamed: 0,userId,movieId,rating,timestamp
0,1,1,4.0,964982703
1,1,3,4.0,964981247
2,1,6,4.0,964982224
3,1,47,5.0,964983815
4,1,50,5.0,964982931


In [16]:
u_unique = ratings.userId.unique()
user2Id = { userId : index for index, userId in enumerate(u_unique)}
ratings.userId = ratings.userId.apply(lambda x: user2Id[x])

m_unique = ratings.movieId.unique()
movie2Id = { movieId : index for index, movieId in enumerate(m_unique)}
ratings.movieId = ratings.movieId.apply(lambda x: movie2Id[x])

In [17]:
n_users = ratings.userId.nunique()
n_movies = ratings.movieId.nunique()
n_users, n_movies

(610, 9724)

In [18]:
n_factors = 50

In [19]:
np.random.seed = 42

In [20]:
msk = np.random.rand(len(ratings)) < 0.8

In [21]:
trn = ratings[msk]
val = ratings[~msk]

# Embeddings Model

In [25]:
user_in = Input(shape=(1,), dtype='int64', name='user_input')
u = Embedding(n_users, n_factors, input_length=1, embeddings_regularizer=l2(1e-5))(user_in)
movie_in = Input(shape=(1,), dtype='int64', name='movie_input')
m = Embedding(n_movies, n_factors, input_length=1, embeddings_regularizer=l2(1e-5))(movie_in)

In [26]:
x = Dot(axes=-1)([u,m])
x = Flatten()(x)
model = tf.keras.Model(inputs=[user_in, movie_in], outputs=x)
model.compile(optimizer=Adam(lr=0.005), loss='mse')

In [27]:
model.fit([trn.userId, trn.movieId], trn.rating, batch_size=64, epochs=8,
          validation_data=([val.userId, val.movieId], val.rating))

Train on 80608 samples, validate on 20228 samples
Epoch 1/8
Epoch 2/8
Epoch 3/8
Epoch 4/8
Epoch 5/8
Epoch 6/8
Epoch 7/8
Epoch 8/8


<tensorflow.python.keras.callbacks.History at 0xb2bcbcac8>

# Bias with Embeddings Model

In [28]:
def embedding_input(name, n_in, n_out, reg):
    inp = Input(shape=(1,), dtype='int64', name=name)
    return inp, Embedding(n_in, n_out, input_length=1, embeddings_regularizer=l2(reg))(inp)

In [29]:
user_in, u = embedding_input('user_input', n_users, n_factors, 0.1)
movie_in, m = embedding_input('movie_input', n_movies, n_factors, 0.1)

In [30]:
def create_bias(inp, n_in):
    return Flatten()(Embedding(n_in, 1, input_length=1)(inp))

In [31]:
ub = create_bias(user_in, n_users)
mb = create_bias(movie_in, n_movies)

In [32]:
x = Dot(axes=-1)([u,m])
x = Flatten()(x)
x = Add()([x, ub])
x = Add()([x, mb])
model = tf.keras.Model(inputs=[user_in, movie_in], outputs=x)
model.compile(optimizer=Adam(lr=0.005), loss='mse')

In [33]:
model.fit([trn.userId, trn.movieId], trn.rating, batch_size=64, epochs=6,
          validation_data=([val.userId, val.movieId], val.rating))

Train on 80608 samples, validate on 20228 samples
Epoch 1/6
Epoch 2/6
Epoch 3/6
Epoch 4/6
Epoch 5/6
Epoch 6/6


<tensorflow.python.keras.callbacks.History at 0xb2d6f3080>

# Neural Net

In [34]:
user_in, u = embedding_input('user_input', n_users , n_factors, 1e-5)
movie_in, m = embedding_input('movie_input', n_movies, n_factors, 1e-5)

In [35]:
x = Concatenate()([u, m])
x = Flatten()(x)
x = Dense(units=100, activation='relu', use_bias=True)(x)
x = Dropout(rate=0.75)(x)
x = Dense(1)(x)
model = tf.keras.Model(inputs=[user_in, movie_in], outputs=x)
model.compile(optimizer=Adam(lr=0.001), loss='mse')

Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.


In [36]:
model.fit([trn.userId, trn.movieId], trn.rating, batch_size=64, epochs=4,
          validation_data=([val.userId, val.movieId], val.rating))

Train on 80608 samples, validate on 20228 samples
Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4


<tensorflow.python.keras.callbacks.History at 0xb2ecd7ef0>

# Evaluation Metrics

In [38]:
tf_print(tf.math.sqrt(0.7836))

0.8852118
