In [4]:
# residual learning from MF and NN 
import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt 
from sklearn.utils import shuffle 

from keras.models import Model 
from keras.layers import Input, Embedding, Flatten, Dense, Concatenate
from keras.layers import Dropout, BatchNormalization, Activation, Dot
from keras.regularizers import l2
from keras.optimizers import SGD, Adam

In [5]:
df = pd.read_csv("./data/edited_ratings.csv")

N = df.userId.max() + 1 # number of users 
M = df.movie_idx.max() + 1 # number of movies 

# split in train and test 
df = shuffle(df) 
cutoff = int(0.8*len(df))
df_train = df.iloc[:cutoff] 
df_test = df.iloc[cutoff:] 

# init vars 
K = 10 # latent dimensionality 
mu = df_train.rating.mean() 
epoch = 25
reg = 0 # regularization penalty

# keras model 
u = Input(shape=(1, ))
m = Input(shape=(1, ))
u_embedding = Embedding(N, K)(u) # (N, 1, K) 
m_embedding = Embedding(M, K)(m) # (N, 1, K) 

# main branch MF
u_bias = Embedding(N, 1)(u) # (N, 1, 1) 
m_bias = Embedding(M, 1)(m) # (N, 1, 1) 
x = Dot(axes=2)([u_embedding, m_embedding]) # (N, 1, 1) 
x = Add()([x, u_bias, m_bias])
x = Flatten()(x) # (N, 1) 

# side branch Deep NN 
u_embedding = Flatten()(u_embedding) # (N, K) 
m_embedding = Flatten()(m_embedding) # (N, K) 
y = Concatenate()([u_embedding, m_embedding]) # (N, 2K) 
y = Dense(400)(y) 
y = Activation('elu')(y) 
# y = Dropout(0.5)(y) 
y = Dense(1)(y)

# merge the 2 models using add layer 
x = Add()([x, y])

NameError: name 'Add' is not defined

In [None]:
model = Model(inputs=[u, m], outputs= x)
model.compile(
    loss='mse', 
    optimizer=SGD(lr=0.01, momentum=0.9)
    metrics=['mse']
)

r = model.fit(x = [df_train.userId.values, df_train.movie_idx.values],
               y= df_train.rating.values - mu,
               epochs=epoch, 
               batch_size=128, 
               validation_data=(
                [df_test.userId.values, df_test.movie_idx.values], 
                df_test.rating.values - mu 
               ))

In [None]:
# plot losses 
plt.plot(r.history['loss'], label="train loss")
plt.plot(r.history['val_loss'], label='test loss')
plt.legend()
plt.show()

# plot mse 
plt.plot(r.history['mean_squared_error'], label='train mse')
plt.plot(r.history['val_mean_squared_error'], label="test mse")
plt.legend()
plt.show()