In [121]:
import tensorflow as tf
import numpy as np
import pandas as pd

from tensorflow.keras.layers import Input, Embedding, Flatten, Dot, Add
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.regularizers import l2

ratings = pd.read_csv('ml-100k/ratings.csv', sep=',',encoding='latin-1')

In [44]:
ratings.head()

Unnamed: 0,userId,movieId,rating,timestamp
0,1,1,4.0,964982703
1,1,3,4.0,964981247
2,1,6,4.0,964982224
3,1,47,5.0,964983815
4,1,50,5.0,964982931


In [3]:
n_users = ratings.userId.nunique()
n_movies = ratings.movieId.nunique()
n_users, n_movies

(610, 9724)

In [99]:
movieIds = ratings.movieId.unique()
dictOfMovies = { movieIds[i] : i  for i in range(0, len(movieIds) ) }
actMovies = []
for movieId in ratings.movieId:
    actMovies.append(int(dictOfMovies[movieId]))
actMovies = np.array(actMovies)

In [100]:
n_factors = 50

In [101]:
np.random.seed = 42

In [102]:
msk = np.random.rand(len(ratings)) < 0.8

In [107]:
trn = ratings[msk]
val = ratings[~msk]
trnMovies = actMovies[msk]
valMovies = actMovies[~msk]
len(trnMovies)

80728

# Dot Product Matrix Factorization

In [112]:
user_in = Input(shape=(1,), dtype='int64', name='user_input')
u = Embedding(n_users + 1, n_factors, input_length=1, embeddings_regularizer=l2(1e-5))(user_in)
movie_in = Input(shape=(1,), dtype='int64', name='movie_input')
m = Embedding(n_movies, n_factors, input_length=1, embeddings_regularizer=l2(1e-5))(movie_in)

In [113]:
x = Dot(axes=-1)([u,m])
x = Flatten()(x)
model = tf.keras.Model(inputs=[user_in, movie_in], outputs=x)
model.compile(optimizer=Adam(lr=0.001), loss='mse')

In [115]:
model.fit([trn.userId, trnMovies], trn.rating, batch_size=64, epochs=6,
          validation_data=([val.userId, valMovies], val.rating))

Train on 80728 samples, validate on 20108 samples
Epoch 1/6
Epoch 2/6
Epoch 3/6
Epoch 4/6
Epoch 5/6
Epoch 6/6


<tensorflow.python.keras.callbacks.History at 0xb37eb2c88>

# Bias Model

In [128]:
def embedding_input(name, n_in, n_out, reg):
    inp = Input(shape=(1,), dtype='int64', name=name)
    return inp, Embedding(n_in, n_out, input_length=1, embeddings_regularizer=l2(reg))(user_in)

In [129]:
user_in, u = embedding_input('user_input', n_users + 1, n_factors, 1e-5)
movie_in, m = embedding_input('movie_input', n_movies, n_factors, 1e-5)

In [130]:
def create_bias(inp, n_in):
    return Flatten()(Embedding(n_in, 1, input_length=1)(inp))

In [131]:
ub = create_bias(user_in, n_users + 1)
mb = create_bias(movie_in, n_movies)

In [132]:
x = Dot(axes=-1)([u,m])
x = Flatten()(x)
x = Add()([x, ub])
x = Add()([x, mb])
model = tf.keras.Model(inputs=[user_in, movie_in], outputs=x)
model.compile(optimizer=Adam(lr=0.001), loss='mse')

ValueError: Graph disconnected: cannot obtain value for tensor Tensor("user_in:0", shape=(?, 1), dtype=int64) at layer "user_in". The following previous layers were accessed without issue: ['user_input']