### Download trainning and test data

In [1]:
import tensorflow as tf

training_samples_file_path = tf.keras.utils.get_file("trainingSamples.csv",
                                                     "https://raw.githubusercontent.com/dream-365/SparrowRecSys/master/src/main/"
                                                     "resources/webroot/sampledata/trainingSamples.csv")

test_samples_file_path = tf.keras.utils.get_file("testSamples.csv",
                                                 "https://raw.githubusercontent.com/dream-365/SparrowRecSys/master/src/main/"
                                                 "resources/webroot/sampledata/testSamples.csv")


2024-02-26 09:20:17.201729: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-02-26 09:20:17.205299: I external/local_tsl/tsl/cuda/cudart_stub.cc:31] Could not find cuda drivers on your machine, GPU will not be used.
2024-02-26 09:20:17.246297: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-02-26 09:20:17.246329: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-02-26 09:20:17.247511: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to

Downloading data from https://raw.githubusercontent.com/dream-365/SparrowRecSys/master/src/main/resources/webroot/sampledata/trainingSamples.csv
Downloading data from https://raw.githubusercontent.com/dream-365/SparrowRecSys/master/src/main/resources/webroot/sampledata/testSamples.csv


### Traning data preview

In [2]:
import pandas as pd
pd.read_csv(training_samples_file_path).head(5)

Unnamed: 0,movieId,userId,rating,timestamp,label,releaseYear,movieGenre1,movieGenre2,movieGenre3,movieRatingCount,...,userRatingCount,userAvgReleaseYear,userReleaseYearStddev,userAvgRating,userRatingStddev,userGenre1,userGenre2,userGenre3,userGenre4,userGenre5
0,1,15555,3.0,900953740,0,1995,Adventure,Animation,Children,10759,...,92,1992,8.98,3.86,0.74,Drama,Comedy,Thriller,Action,Crime
1,1,25912,3.5,1111631768,1,1995,Adventure,Animation,Children,10759,...,21,1988,14.09,3.48,1.28,Action,Comedy,Romance,Adventure,Thriller
2,1,29912,3.0,866820360,0,1995,Adventure,Animation,Children,10759,...,4,1995,0.5,3.0,0.0,,,,,
3,10,17686,0.5,1195555011,0,1995,Action,Adventure,Thriller,6330,...,35,1992,8.35,2.97,1.48,Comedy,Drama,Adventure,Action,Thriller
4,104,20158,4.0,1155357691,1,1996,Comedy,,,3954,...,81,1991,8.7,3.6,0.72,Thriller,Drama,Action,Crime,Adventure


In [3]:
# load sample as tf dataset
def get_dataset(file_path):
    dataset = tf.data.experimental.make_csv_dataset(
        file_path,
        batch_size=12,
        label_name='label',
        na_value="0",
        num_epochs=1,
        ignore_errors=True)
    return dataset

# split as test dataset and training dataset
train_dataset = get_dataset(training_samples_file_path)
test_dataset = get_dataset(test_samples_file_path)

Instructions for updating:
Use `tf.data.Dataset.ignore_errors` instead.


In [4]:
# movie id embedding feature
movie_col = tf.feature_column.categorical_column_with_identity(key='movieId', num_buckets=1001)
movie_emb_col = tf.feature_column.embedding_column(movie_col, 10)

# user id embedding feature
user_col = tf.feature_column.categorical_column_with_identity(key='userId', num_buckets=30001)
user_emb_col = tf.feature_column.embedding_column(user_col, 10)

Instructions for updating:
Use Keras preprocessing layers instead, either directly or via the `tf.keras.utils.FeatureSpace` utility. Each of `tf.feature_column.*` has a functional equivalent in `tf.keras.layers` for feature preprocessing when training a Keras model.
Instructions for updating:
Use Keras preprocessing layers instead, either directly or via the `tf.keras.utils.FeatureSpace` utility. Each of `tf.feature_column.*` has a functional equivalent in `tf.keras.layers` for feature preprocessing when training a Keras model.


In [5]:
# define input for keras model
inputs = {
    'movieId': tf.keras.layers.Input(name='movieId', shape=(), dtype='int32'),
    'userId': tf.keras.layers.Input(name='userId', shape=(), dtype='int32'),
}

In [6]:
# neural cf model arch two. only embedding in each tower, then MLP as the interaction layers
def neural_cf_model_1(feature_inputs, item_feature_columns, user_feature_columns, hidden_units):
    item_tower = tf.keras.layers.DenseFeatures(item_feature_columns)(feature_inputs)
    user_tower = tf.keras.layers.DenseFeatures(user_feature_columns)(feature_inputs)
    interact_layer = tf.keras.layers.concatenate([item_tower, user_tower])
    for num_nodes in hidden_units:
        interact_layer = tf.keras.layers.Dense(num_nodes, activation='relu')(interact_layer)
    output_layer = tf.keras.layers.Dense(1, activation='sigmoid')(interact_layer)
    neural_cf_model = tf.keras.Model(feature_inputs, output_layer)
    return neural_cf_model

In [7]:
# neural cf model arch one. embedding+MLP in each tower, then dot product layer as the output
def neural_cf_model_2(feature_inputs, item_feature_columns, user_feature_columns, hidden_units):
    item_tower = tf.keras.layers.DenseFeatures(item_feature_columns)(feature_inputs)
    for num_nodes in hidden_units:
        item_tower = tf.keras.layers.Dense(num_nodes, activation='relu')(item_tower)

    user_tower = tf.keras.layers.DenseFeatures(user_feature_columns)(feature_inputs)
    for num_nodes in hidden_units:
        user_tower = tf.keras.layers.Dense(num_nodes, activation='relu')(user_tower)

    output = tf.keras.layers.Dot(axes=1)([item_tower, user_tower])
    output = tf.keras.layers.Dense(1, activation='sigmoid')(output)

    neural_cf_model = tf.keras.Model(feature_inputs, output)
    return neural_cf_model

In [8]:
# neural cf model architecture
model = neural_cf_model_1(inputs, [movie_emb_col], [user_emb_col], [10, 10])

# compile the model, set loss function, optimizer and evaluation metrics
model.compile(
    loss='binary_crossentropy',
    optimizer='adam',
    metrics=['accuracy', tf.keras.metrics.AUC(curve='ROC'), tf.keras.metrics.AUC(curve='PR')])

In [9]:
# train the model
model.fit(train_dataset, epochs=5)

# evaluate the model
test_loss, test_accuracy, test_roc_auc, test_pr_auc = model.evaluate(test_dataset)
print('\n\nTest Loss {}, Test Accuracy {}, Test ROC AUC {}, Test PR AUC {}'.format(test_loss, test_accuracy,
                                                                                   test_roc_auc, test_pr_auc))

Epoch 1/5


  inputs = self._flatten_to_reference_inputs(inputs)


Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


Test Loss 0.6829220652580261, Test Accuracy 0.6670231819152832, Test ROC AUC 0.7208721041679382, Test PR AUC 0.7484422922134399


In [10]:
# print some predict results
predictions = model.predict(test_dataset)
for prediction, goodRating in zip(predictions[:12], list(test_dataset)[0][1][:12]):
    print("Predicted good rating: {:.2%}".format(prediction[0]),
          " | Actual rating label: ",
          ("Good Rating" if bool(goodRating) else "Bad Rating"))

Predicted good rating: 90.41%  | Actual rating label:  Good Rating
Predicted good rating: 30.63%  | Actual rating label:  Good Rating
Predicted good rating: 10.21%  | Actual rating label:  Good Rating
Predicted good rating: 33.85%  | Actual rating label:  Good Rating
Predicted good rating: 53.25%  | Actual rating label:  Good Rating
Predicted good rating: 55.84%  | Actual rating label:  Bad Rating
Predicted good rating: 61.51%  | Actual rating label:  Good Rating
Predicted good rating: 78.06%  | Actual rating label:  Good Rating
Predicted good rating: 93.02%  | Actual rating label:  Good Rating
Predicted good rating: 21.47%  | Actual rating label:  Good Rating
Predicted good rating: 59.68%  | Actual rating label:  Good Rating
Predicted good rating: 64.44%  | Actual rating label:  Good Rating


In [14]:
model.save("model/NeuralCF")

INFO:tensorflow:Assets written to: model/NeuralCF/assets


INFO:tensorflow:Assets written to: model/NeuralCF/assets
