In [1]:
import sys
sys.path.append("../src/")
import os
import pandas as pd
import numpy as np
import tensorflow as tf
from sklearn.model_selection import train_test_split
from config import *

In [2]:
pd.options.display.max_columns=999
pd.options.display.max_rows=999

In [3]:
df_train = pd.read_parquet("../data/processed/df_rating_movie_train.parquet")
df_val = pd.read_parquet("../data/processed/df_rating_movie_val.parquet")

In [4]:
df_train.head()

Unnamed: 0,userId,movieId,rating,timestamp,title,genres
5355991,34760,4963,4.0,1008626350,Ocean's Eleven (2001),Crime|Thriller
16979199,110106,1183,1.5,1301889569,"English Patient, The (1996)",Drama|Romance|War
14781790,95711,4571,2.0,1009726295,Bill & Ted's Excellent Adventure (1989),Adventure|Comedy|Sci-Fi
5159343,33523,1411,4.0,1039513864,Hamlet (1996),Crime|Drama|Romance
18503349,119883,33660,4.0,1353051590,Cinderella Man (2005),Drama|Romance


In [5]:
df_train.dtypes

userId         int64
movieId        int64
rating       float64
timestamp      int64
title         object
genres        object
dtype: object

In [6]:
for col in ["userId", "movieId", "rating"]:
    df_train[col] = df_train[col].astype(np.float32)
    df_val[col] = df_val[col].astype(np.float32)

In [7]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

In [8]:
class NeuralCollaborativeFiltering(keras.Model):
    def __init__(self, num_unique_users, num_unique_movies, embedding_size, min_rating, max_rating):
        super(NeuralCollaborativeFiltering, self).__init__()
        self.num_unique_users = num_unique_users
        self.num_unique_movies = num_unique_movies
        self.embedding_size = embedding_size
        self.min_rating = min_rating
        self.max_rating = max_rating

        self.users_embedding = layers.Embedding(num_unique_users, embedding_size, embeddings_initializer="he_normal",
                                               embeddings_regularizer=keras.regularizers.l2(1e-6))
        self.users_bias = layers.Embedding(num_unique_users, 1)

        self.movies_embedding = layers.Embedding(num_unique_movies, embedding_size, embeddings_initializer="he_normal",
                                                embeddings_regularizer=keras.regularizers.l2(1e-6))
        self.movies_bias = layers.Embedding(num_unique_movies, 1)

    def call(self, inputs):
        users_emb = self.users_embedding(inputs[0])
        users_bias = self.users_bias(inputs[0])
        movies_emb = self.movies_embedding(inputs[1])
        movies_bias = self.movies_bias(inputs[1])
        dot_product_users_movies = tf.tensordot(users_emb, movies_emb, 2)
        x = dot_product_users_movies + users_bias + movies_bias
        x = tf.nn.sigmoid(x)
        x = layers.Lambda(lambda x: x * (self.max_rating - self.min_rating) + self.min_rating)
        return x

In [9]:
model = NeuralCollaborativeFiltering(num_unique_users=len(set(list(df_train.userId.unique()) + list(df_val.userId.unique()))),
                                    num_unique_movies=len(set(list(df_train.movieId.unique()) + list(df_val.movieId.unique()))),
                                    embedding_size=EMBEDDING_SIZE, min_rating=min(df_train.rating.min(), df_val.rating.min()),
                                    max_rating=max(df_train.rating.max(), df_val.rating.max()))

In [10]:
model.compile(
    loss=tf.keras.losses.MeanSquaredError(), optimizer=keras.optimizers.Adam(lr=0.001)
)

In [11]:
model.fit(x=[df_train.userId.values, df_train.movieId.values], y=df_train.rating.values, batch_size=64, epochs=5, verbose=1,    
          validation_data=([df_val.userId.values, df_val.movieId.values], df_val.rating.values))

Epoch 1/5


AttributeError: in user code:

    /Users/anton/opt/anaconda3/lib/python3.8/site-packages/tensorflow/python/keras/engine/training.py:805 train_function  *
        return step_function(self, iterator)
    /Users/anton/opt/anaconda3/lib/python3.8/site-packages/tensorflow/python/keras/engine/training.py:795 step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    /Users/anton/opt/anaconda3/lib/python3.8/site-packages/tensorflow/python/distribute/distribute_lib.py:1259 run
        return self._extended.call_for_each_replica(fn, args=args, kwargs=kwargs)
    /Users/anton/opt/anaconda3/lib/python3.8/site-packages/tensorflow/python/distribute/distribute_lib.py:2730 call_for_each_replica
        return self._call_for_each_replica(fn, args, kwargs)
    /Users/anton/opt/anaconda3/lib/python3.8/site-packages/tensorflow/python/distribute/distribute_lib.py:3417 _call_for_each_replica
        return fn(*args, **kwargs)
    /Users/anton/opt/anaconda3/lib/python3.8/site-packages/tensorflow/python/keras/engine/training.py:788 run_step  **
        outputs = model.train_step(data)
    /Users/anton/opt/anaconda3/lib/python3.8/site-packages/tensorflow/python/keras/engine/training.py:755 train_step
        loss = self.compiled_loss(
    /Users/anton/opt/anaconda3/lib/python3.8/site-packages/tensorflow/python/keras/engine/compile_utils.py:201 __call__
        y_t, y_p, sw = match_dtype_and_rank(y_t, y_p, sw)
    /Users/anton/opt/anaconda3/lib/python3.8/site-packages/tensorflow/python/keras/engine/compile_utils.py:616 match_dtype_and_rank
        if ((y_t.dtype.is_floating and y_p.dtype.is_floating) or

    AttributeError: 'str' object has no attribute 'is_floating'
