In [1]:
import pandas as pd
import numpy as np

results = pd.read_csv("data/ml-25m/ratings.csv").dropna().head(10000)
results['rating'] = results['rating'] / results['rating'].max()

results

Unnamed: 0,userId,movieId,rating,timestamp
0,1,296,1.0,1147880044
1,1,306,0.7,1147868817
2,1,307,1.0,1147868828
3,1,665,1.0,1147878820
4,1,899,0.7,1147868510
...,...,...,...,...
9995,75,736,0.8,1537207939
9996,75,778,0.6,1537208100
9997,75,783,0.6,1537348896
9998,75,805,0.7,1537348872


In [2]:
from tensorflow.keras.layers import StringLookup, Embedding, Flatten, ReLU, Dot
from tensorflow.keras import Input, Model
from tensorflow.keras.utils import set_random_seed
from tensorflow.keras.activations import sigmoid
from tensorflow.keras import regularizers

set_random_seed(42)
K = 2
user_regularization_lambda = 0.1
movie_regularization_lambda = 0.1

user = Input(shape = (1,), dtype = 'string', name = 'user')
user_id_to_int = StringLookup(vocabulary = results.userId.astype(str).unique(), name = 'user_id_to_int')
user_ints = user_id_to_int(user)
user_vector = Embedding(user_id_to_int.vocabulary_size(), K, name = 'user_encoder', embeddings_regularizer = regularizers.L2(user_regularization_lambda))(user_ints)
user_vector_flat = Flatten(name = 'user_vector')(user_vector)

movies = Input(shape = (1,), dtype = 'string', name = 'movie')
movie_id_to_int = StringLookup(vocabulary = results.movieId.astype(str).unique(), name = 'movie_id_to_int')
movie_ints = movie_id_to_int(movies)
movie_vector = Embedding(movie_id_to_int.vocabulary_size(), K, name = 'movie_encoder', embeddings_regularizer = regularizers.L2(movie_regularization_lambda))(movie_ints)
movie_vector_flat = Flatten(name = 'movie_vector')(movie_vector)

dot_product = Dot(axes = (1, 1), name = 'dot_product')([user_vector_flat, movie_vector_flat])
outputs = sigmoid(dot_product)

model = Model([user, movies], outputs)
model.summary()

In [3]:
model.compile(optimizer = "sgd", loss = "mse")
history = model.fit([results.userId.astype(str), results.movieId.astype(str)], results.rating, epochs=10, verbose = 1)

Epoch 1/10
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 952us/step - loss: 0.5164
Epoch 2/10
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 970us/step - loss: 0.2123
Epoch 3/10
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 0.1254
Epoch 4/10
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 981us/step - loss: 0.1006
Epoch 5/10
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 0.0935
Epoch 6/10
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 0.0915
Epoch 7/10
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 0.0909
Epoch 8/10
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 956us/step - loss: 0.0907
Epoch 9/10
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 964us/step - loss: 0.0907
Epoch 10/10
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 810