In [1]:
from __future__ import print_function, division
from builtins import range, input

import pickle
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.utils import shuffle

from keras.models import Model
from keras.layers import Input, Embedding, Flatten, Dense, Concatenate
from keras.layers import Dropout, BatchNormalization, Activation
from keras.regularizers import l2
from keras.optimizers import SGD, Adam

In [2]:
# load in the data
df = pd.read_csv('./cleaned_nf_dataset.csv')

N = df.Cust_Id.max() + 1 # number of users
M = df.Movie_Id.max() + 1 # number of movies

# split into train and test
df = shuffle(df)
cutoff = int(0.8*len(df))
df_train = df.iloc[:cutoff]
df_test = df.iloc[cutoff:]

In [3]:
# initialize variables
K = 10 # latent dimensionality
mu = df_train.Rating.mean()
epochs = 15
# reg = 0.0001 # regularization penalty

In [4]:
# keras model
u = Input(shape=(1,))
m = Input(shape=(1,))
u_embedding = Embedding(N, K)(u) # (N, 1, K)
m_embedding = Embedding(M, K)(m) # (N, 1, K)
u_embedding = Flatten()(u_embedding) # (N, K)
m_embedding = Flatten()(m_embedding) # (N, K)
x = Concatenate()([u_embedding, m_embedding]) # (N, 2K)

In [None]:
# the neural network
x = Dense(400)(x)
x = Activation('relu')(x)
x = Dense(1)(x)

model = Model(inputs=[u, m], outputs=x)
model.compile(
  loss='mse',
  # optimizer='adam',
  # optimizer=Adam(lr=0.01),
  optimizer=SGD(lr=0.08, momentum=0.9),
  metrics=['mse'],
)

r = model.fit(
  x=[df_train.Cust_Id.values, df_train.Movie_Id.values],
  y=df_train.Rating.values - mu,
  epochs=epochs,
  batch_size=128,
  validation_data=(
    [df_test.Cust_Id.values, df_test.Movie_Id.values],
    df_test.Rating.values - mu
  )
)

Epoch 1/15
Epoch 2/15
  7722/108360 [=>............................] - ETA: 11:00 - loss: nan - mse: nan

In [None]:
# plot losses
plt.plot(r.history['loss'], label="train loss")
plt.plot(r.history['val_loss'], label="test loss")
plt.legend()
plt.show()

In [None]:
# plot mse
plt.plot(r.history['mse'], label="train mse")
plt.plot(r.history['val_mse'], label="test mse")
plt.legend()
plt.show()