A Colloborative Filtering (MF) implementation in Keras. 

Problem structure is that of Movie Lens Recommender System.


1) http://www.fenris.org/2016/03/07/index-html
2) https://github.com/bradleypallen/keras-movielens-cf

In [35]:
%matplotlib inline
import math
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from surprise import Dataset

from keras.layers import Embedding, Reshape, Merge, Dropout, Dense
from keras.models import Sequential


# define the base CF model
class CFModelDef(Sequential):

    def __init__(self, n_users, m_items, k_factors, **kwargs):
        user_nw = Sequential()
        user_nw.add(Embedding(n_users, k_factors, input_length=1))
        user_nw.add(Reshape((k_factors,)))
        item_nw = Sequential()
        item_nw.add(Embedding(m_items, k_factors, input_length=1))
        item_nw.add(Reshape((k_factors,)))
        super(CFModelDef, self).__init__(**kwargs)
        self.add(Merge([user_nw, item_nw], mode='dot', dot_axes=1))

    def rate(self, user_id, item_id):
        return self.predict([np.array([user_id]), np.array([item_id])])[0][0]

In [2]:
# get a list of (user_id,item_id,rating, time_stamp)
data = Dataset.load_builtin('ml-100k')
# convert into into a pd data frame
df = pd.DataFrame(data.raw_ratings, columns=['userid', 'movieid','rating','timestamp'])
# create unique id for user and movie
df['userid'] = pd.to_numeric(df['userid'])
df['movieid'] = pd.to_numeric(df['movieid'])
df['user_emb_id'] = df['userid'] - 1
df['movie_emb_id'] = df['movieid'] - 1
df['rating'] = pd.to_numeric(df['rating'])
df.head()

Unnamed: 0,userid,movieid,rating,timestamp,user_emb_id,movie_emb_id
0,196,242,3.0,881250949,195,241
1,186,302,3.0,891717742,185,301
2,22,377,1.0,878887116,21,376
3,244,51,2.0,880606923,243,50
4,166,346,1.0,886397596,165,345


In [3]:
K_FACTORS = 100
RNG_SEED = 1446557

max_userid = df['userid'].drop_duplicates().max()
max_movieid = df['movieid'].drop_duplicates().max()
print(str(len(df))+ ' ratings loaded.')
print('# of users: ' + str(max_userid))
print('# of items: ' + str(max_movieid))
max_emb_userid = df['user_emb_id'].drop_duplicates().max()
max_emb_movieid = df['movie_emb_id'].drop_duplicates().max()
print('# of users: ' + str(max_emb_userid))
print('# of items: ' + str(max_emb_movieid))
print(df['movieid'].max())

100000 ratings loaded.
# of users: 943
# of items: 1682
# of users: 942
# of items: 1681
1682


In [4]:
shuffled_df = df.sample(frac=1., random_state=RNG_SEED)
Users = shuffled_df['user_emb_id'].values
print('Users shape ='+ str(Users.shape))
Movies = shuffled_df['movie_emb_id'].values
print('Movies shape ='+ str(Movies.shape))
Ratings = shuffled_df['rating'].values
print('Ratings shape ='+str(df.shape))
print(Users[:5])
print(Movies[:5])
print(Ratings[:5])

Users shape =(100000,)
Movies shape =(100000,)
Ratings shape =(100000, 6)
[304 902 928 902  57]
[ 732 1069  134   88  432]
[ 3.  4.  5.  4.  5.]


In [5]:
# baseline
from surprise import SVD
from surprise import evaluate, print_perf
# We'll use the famous SVD algorithm.
data.split(n_folds=3)
algo = SVD()
# Evaluate performances of our algorithm on the dataset.
perf = evaluate(algo, data, measures=['RMSE', 'MAE'])
print_perf(perf)

Evaluating RMSE, MAE of algorithm SVD.

------------
Fold 1
RMSE: 0.9516
MAE:  0.7505
------------
Fold 2
RMSE: 0.9415
MAE:  0.7426
------------
Fold 3
RMSE: 0.9445
MAE:  0.7448
------------
------------
Mean RMSE: 0.9459
Mean MAE : 0.7460
------------
------------
        Fold 1  Fold 2  Fold 3  Mean    
RMSE    0.9516  0.9415  0.9445  0.9459  
MAE     0.7505  0.7426  0.7448  0.7460  


In [6]:
# define a Sequential model from scratch
n_users = max_userid
k_factors = 100
m_items = max_movieid

user_nw = Sequential()
user_nw.add(Embedding(n_users, k_factors, input_length=1))
user_nw.add(Reshape((k_factors,)))

item_nw = Sequential()
item_nw.add(Embedding(m_items, k_factors, input_length=1))
item_nw.add(Reshape((k_factors,)))

merged = Merge([user_nw, item_nw], mode='dot', dot_axes=1)

CF_model = Sequential()
CF_model.add(merged)
CF_model.compile(loss='mse', optimizer='adam',metrics=['mae'])

  


In [36]:
# fit a pre-defined model
CFmodel = CFModelDef(n_users, m_items,k_factors)
CFmodel.compile(loss='mse',optimizer='adam')
CFmodel.fit([Users, Movies], Ratings, epochs=5,verbose=2)
CFmodel.rate(196,242)



Epoch 1/5
 - 11s - loss: 5.2853
Epoch 2/5
 - 10s - loss: 0.9325
Epoch 3/5
 - 10s - loss: 0.8184
Epoch 4/5
 - 10s - loss: 0.7260
Epoch 5/5
 - 10s - loss: 0.6219


2.4884138

In [11]:
# fit custom model
CF_model.fit([Users, Movies], Ratings, epochs=5,verbose=2)
CF_model.predict([np.array([196]), np.array([242])])[0][0]

Epoch 1/5
 - 11s - loss: 5.3402 - mean_absolute_error: 1.8023
Epoch 2/5
 - 10s - loss: 0.9374 - mean_absolute_error: 0.7605
Epoch 3/5
 - 10s - loss: 0.8175 - mean_absolute_error: 0.7117
Epoch 4/5
 - 10s - loss: 0.7186 - mean_absolute_error: 0.6653
Epoch 5/5
 - 10s - loss: 0.6142 - mean_absolute_error: 0.6144


2.3698106

In [12]:
# save a model into JSON
# serialize model to JSON
from keras.models import model_from_json

model_json = CF_model.to_json()
with open("CF_model.json", "w") as json_file:
    json_file.write(model_json)
# serialize weights to HDF5
CF_model.save_weights("CF_model.h5")
print("Saved model to disk")



    
    
 # load json and create model
json_file = open('CF_model.json', 'r')
loaded_model_json = json_file.read()
json_file.close()
loaded_model = model_from_json(loaded_model_json)
# load weights into new model
loaded_model.load_weights("CF_model.h5")
print("Loaded model from disk")

Saved model to disk
Loaded model from disk


  return cls(**config)


In [37]:
#loaded_model.compile(loss='mse', optimizer='adam',metrics=['mae'])
#loaded_model.summary()
#loaded_model.fit([Users, Movies], Ratings, epochs=5,verbose=2,batch_size=20)
loaded_model.predict([np.array([196]), np.array([242])])[0][0]
model_json = CFmodel.to_json()
with open("CFModel.json", "w") as json_file:
    json_file.write(model_json)
# serialize weights to HDF5
CFmodel.save_weights("CFModel.h5")
print("Saved model to disk")

Saved model to disk


In [17]:
 # load json and create model
json_file = open('CFModel.json', 'r')
loaded_model_json = json_file.read()
json_file.close()
loaded_model = model_from_json(loaded_model_json)
# load weights into new model
loaded_model.load_weights("CFModel.h5")
print("Loaded model from disk")

ValueError: Unknown layer: CFModel

In [38]:
CFmodel.save('CFModel_01.h5')  # creates a HDF5 file 'my_model.h5'

In [51]:
from keras.models import load_model
M2 = load_model('CFModel_01.h5',custom_objects={'CFModelDef':CFmodel})

TypeError: __init__() missing 3 required positional arguments: 'n_users', 'm_items', and 'k_factors'