In [9]:
import tensorflow as tf
from tensorflow.keras import layers, Model
import pandas as pd
import numpy as np
import time
import pickle
from sklearn.model_selection import train_test_split
from recommenders.evaluation.python_evaluation import (
    map, ndcg_at_k, precision_at_k, recall_at_k
)
from keras.saving import register_keras_serializable

print("TensorFlow version:", tf.__version__)


TensorFlow version: 2.18.0


In [10]:
TOP_K = 10
EPOCHS = 5
BATCH_SIZE = 256

In [11]:
ratings = pd.read_csv('M:/Movie-Recommendation-Engine/notebooks/NCF Local/NCF/ml-1m_dataset.csv')
train_data, test_data = train_test_split(ratings, test_size=0.2, random_state=42)

user_mapping = {user_id: idx for idx, user_id in enumerate(train_data['userID'].unique())}
item_mapping = {item_id: idx for idx, item_id in enumerate(train_data['itemID'].unique())}

train_data['userID'] = train_data['userID'].map(user_mapping)
train_data['itemID'] = train_data['itemID'].map(item_mapping)

test_data['userID'] = test_data['userID'].map(user_mapping)
test_data['itemID'] = test_data['itemID'].map(item_mapping)

test_data['userID'].fillna(0, inplace=True)
test_data['itemID'].fillna(0, inplace=True)

test_data['userID'] = test_data['userID'].astype(int)
test_data['itemID'] = test_data['itemID'].astype(int)

n_users = len(user_mapping)
n_items = len(item_mapping)

print(f"Users: {n_users}, Items: {n_items}")

Users: 6040, Items: 3683


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  test_data['userID'].fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  test_data['itemID'].fillna(0, inplace=True)


In [13]:
@register_keras_serializable(package="Custom", name="NeuralCollaborativeFiltering")
class NeuralCollaborativeFiltering(Model):
    def __init__(self, n_users, n_items, embedding_dim=8):
        super(NeuralCollaborativeFiltering, self).__init__()
        self.user_embedding = layers.Embedding(n_users, embedding_dim, input_length=1)
        self.item_embedding = layers.Embedding(n_items, embedding_dim, input_length=1)
        self.dense_layers = tf.keras.Sequential([
            layers.Flatten(),
            layers.Dense(64, activation='relu'),
            layers.Dense(32, activation='relu'),
            layers.Dense(1, activation='sigmoid')
        ])

    def call(self, inputs):
        user_input, item_input = inputs
        user_emb = self.user_embedding(user_input)
        item_emb = self.item_embedding(item_input)
        concatenated = tf.concat([user_emb, item_emb], axis=-1)
        return self.dense_layers(concatenated)


model = NeuralCollaborativeFiltering(n_users=n_users, n_items=n_items)
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

print("Model defined successfully.")

Model defined successfully.




In [14]:
if __name__ == "__main__":    
    user_input = train_data['userID'].values
    item_input = train_data['itemID'].values
    labels = train_data['rating'].values > 3.5

    start_time = time.time()

    model.fit([user_input, item_input], labels, epochs=EPOCHS, batch_size=BATCH_SIZE, validation_split=0.2)

    end_time = time.time()
    elapsed_time = end_time - start_time
    print(f"Total training time: {elapsed_time:.2f} seconds")

    with open('user_mapping.pkl', 'wb') as f:
        pickle.dump(user_mapping, f)

    with open('item_mapping.pkl', 'wb') as f:
        pickle.dump(item_mapping, f)

    user_input_test = test_data['userID'].values
    item_input_test = test_data['itemID'].values
    predictions = model.predict([user_input_test, item_input_test])

    test_data['prediction'] = predictions

    eval_map = map(test_data, test_data, col_prediction='prediction', k=TOP_K)
    eval_ndcg = ndcg_at_k(test_data, test_data, col_prediction='prediction', k=TOP_K)
    eval_precision = precision_at_k(test_data, test_data, col_prediction='prediction', k=TOP_K)
    eval_recall = recall_at_k(test_data, test_data, col_prediction='prediction', k=TOP_K)

    print(
        f"MAP: {eval_map:.6f}\n"
        f"NDCG: {eval_ndcg:.6f}\n"
        f"Precision@K: {eval_precision:.6f}\n"
        f"Recall@K: {eval_recall:.6f}"
    )

    model.save('M:/Movie-Recommendation-Engine/models/ncf_model1.keras')
    print("Model saved as 'ncf_model1.keras'")

Epoch 1/5
[1m2501/2501[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 1ms/step - accuracy: 0.6831 - loss: 0.5865 - val_accuracy: 0.7257 - val_loss: 0.5402
Epoch 2/5
[1m2501/2501[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 1ms/step - accuracy: 0.7313 - loss: 0.5297 - val_accuracy: 0.7298 - val_loss: 0.5342
Epoch 3/5
[1m2501/2501[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 1ms/step - accuracy: 0.7371 - loss: 0.5196 - val_accuracy: 0.7333 - val_loss: 0.5290
Epoch 4/5
[1m2501/2501[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 1ms/step - accuracy: 0.7453 - loss: 0.5088 - val_accuracy: 0.7353 - val_loss: 0.5262
Epoch 5/5
[1m2501/2501[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 1ms/step - accuracy: 0.7513 - loss: 0.5000 - val_accuracy: 0.7359 - val_loss: 0.5259
Total training time: 15.56 seconds
[1m6252/6252[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 804us/step
MAP: 0.573335
NDCG: 1.000274
Precision@K: 0.888688
Recall@K: 0.573335
M