**REF:** The following implementation is adapted and inspired by
https://keras.io/examples/structured_data/collaborative_filtering_movielens/
Further details are provided in the associated report.


In [None]:
#Imports
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tabulate import tabulate
import matplotlib.pyplot as plt
import pandas as pd

## **Processing Final Dataset**

In [None]:
# Defining Functions
def preprocess(rating_df):
  user_encode = {x: i for i, x in enumerate(rating_df["user_id"].unique().tolist())}
  book_ids = rating_df["book_id"].unique().tolist()
  book_encode = {x: i for i, x in enumerate(book_ids)}
  t_book_encode = {i: x for i, x in enumerate(book_ids)}
  rating_df["user"] = rating_df["user_id"].map(user_encode)
  rating_df["book"] = rating_df["book_id"].map(book_encode)
  rating_df["rating"] = rating_df["rating"].values.astype(np.float32)
  min_rating = min(rating_df["rating"])
  max_rating = max(rating_df["rating"])
  return rating_df, user_encode, book_encode, t_book_encode, min_rating, max_rating

In [None]:
rating_df = pd.read_csv("goodreads_ratings.csv")
rating_df, user_encode, book_encode, t_book_encode, min_rating, max_rating = preprocess(df)

num_users = len(user_encode)
num_books = len(t_book_encode)
num_ratings = len(rating_df)

#This code segment was taken from https://stackoverflow.com/questions/38708621/how-to-calculate-percentage-of-sparsity-for-a-numpy-array-matrix
user_item_df = rating_df.pivot(index='user_id', columns='book_id',  values='rating').fillna(0)
non_zero = np.count_nonzero(user_item_df)
total_val = np.product(user_item_df.shape)
sparsity = ((total_val - non_zero) / total_val)*100

print(f"# Users: {num_users}")
print(f"# Movies: {num_books}")
print(f"# Ratings: {num_ratings}")
print(f"Data sparsity: {sparsity}")

# Users: 228
# Movies: 59139
# Ratings: 100000
Data sparsity: 99.67277508267024


## **Model Training & Evaluation**

In [None]:
rating_df = rating_df.sample(frac=1, random_state=42)
x = rating_df[["user", "book"]].values

# Normalize rating targets between 0 and 1
y = rating_df["rating"].apply(lambda x: (x - min_rating) / (max_rating - min_rating)).values
# Training on 90% of the data and validating on 10%.
train_indices = int(0.9 * rating_df.shape[0])
x_train, x_val, y_train, y_val = (
    x[:train_indices],
    x[train_indices:],
    y[:train_indices],
    y[train_indices:],
)

In [None]:
class NCF(keras.Model):
    def __init__(self, num_users, num_books, embedding_size, **kwargs):
        super().__init__(**kwargs)

        # Define all the layers of the model in the constructor
        self.user_embedding = layers.Embedding(num_users, embedding_size, embeddings_initializer="he_normal", embeddings_regularizer=keras.regularizers.l2(1e-6))
        self.user_bias = layers.Embedding(num_users, 1)
        self.book_embedding = layers.Embedding(num_books, embedding_size, embeddings_initializer="he_normal", embeddings_regularizer=keras.regularizers.l2(1e-6))
        self.book_bias = layers.Embedding(num_books, 1)
        self.dot = layers.Dot(axes=-1)
        self.sigmoid = layers.Activation('sigmoid')

    def call(self, inputs):
        # Use the defined layers in the call method
        user_vector = self.user_embedding(inputs[:, 0])
        user_bias = self.user_bias(inputs[:, 0])
        book_vector = self.book_embedding(inputs[:, 1])
        book_bias = self.book_bias(inputs[:, 1])

        dot_user_book = self.dot([user_vector, book_vector])
        x = layers.add([dot_user_book, user_bias, book_bias])
        return self.sigmoid(x)

In [None]:
# Define the model
EMBEDDING_SIZE = 50
model = NCF(num_users, num_books, EMBEDDING_SIZE)

# Use this segment for RMSE Evaluation
model.compile(loss=tf.keras.losses.BinaryCrossentropy(), optimizer=tf.keras.optimizers.Adam(learning_rate=0.001), metrics=[tf.keras.metrics.RootMeanSquaredError()])

# Use this segment for Precision and Recall Evaluation
#model.compile(loss=tf.keras.losses.BinaryCrossentropy(), optimizer=tf.keras.optimizers.Adam(learning_rate=0.001), metrics=[tf.keras.metrics.Recall(), tf.keras.metrics.Precision()])

# Train the model based on data split
model.fit(x_train, y_train, batch_size=64, epochs=40, verbose=1, validation_data=(x_val, y_val),)

Epoch 1/40
Epoch 2/40
Epoch 3/40
Epoch 4/40
Epoch 5/40
Epoch 6/40
Epoch 7/40
Epoch 8/40
Epoch 9/40
Epoch 10/40
Epoch 11/40
Epoch 12/40
Epoch 13/40
Epoch 14/40
Epoch 15/40
Epoch 16/40
Epoch 17/40
Epoch 18/40
Epoch 19/40
Epoch 20/40
Epoch 21/40
Epoch 22/40
Epoch 23/40
Epoch 24/40
Epoch 25/40
Epoch 26/40
Epoch 27/40
Epoch 28/40
Epoch 29/40
Epoch 30/40
Epoch 31/40
Epoch 32/40
Epoch 33/40
Epoch 34/40
Epoch 35/40
Epoch 36/40
Epoch 37/40
Epoch 38/40
Epoch 39/40
Epoch 40/40


<keras.callbacks.History at 0x7f9ecdc3c280>

In [None]:
# Save the model
model.save("ncf-model")