In [1]:
import tensorflow as tf
tf.__version__

'2.4.0'

In [2]:
# SparrowRecSys\NewCode\JavaPart\src\main\resources\webroot\sampledata
# H:\MachineLearningPractice\SparrowRecSys\NewCode\JavaPart\src\main\resources\webroot\sampledata

# Load the training data

In [3]:
# load sample as tf dataset
def get_dataset(file_path):
    dataset = tf.data.experimental.make_csv_dataset(
        file_path,
        batch_size=12,
        label_name='label',
        na_value="0",
        num_epochs=1,
        ignore_errors=True)
    return dataset

train_dataset = get_dataset(
    #"H:/MachineLearningPractice/SparrowRecSys/NewCode/JavaPart/src/main/resources/webroot/sampledata/trainingSamples.csv"
    r"D:\MachineLearningPractice\SparrowRecSys\NewCode\JavaPart\src\main\resources\webroot\sampledata\trainingSamples.csv"
)
test_dataset = get_dataset(
#     "H:/MachineLearningPractice/SparrowRecSys/NewCode/JavaPart/src/main/resources/webroot/sampledata/testSamples.csv"
    r"D:\MachineLearningPractice\SparrowRecSys\NewCode\JavaPart\src\main\resources\webroot\sampledata\testSamples.csv"
)

# Get the training data ready

In [5]:
# movie id embedding feature
movie_col = tf.feature_column.categorical_column_with_identity(key='movieId', num_buckets=1001)
movie_emb_col = tf.feature_column.embedding_column(movie_col, 10)

In [6]:
# user id embedding feature
user_col = tf.feature_column.categorical_column_with_identity(key='userId', num_buckets=30001)
user_emb_col = tf.feature_column.embedding_column(user_col, 10)

In [7]:
# define input for keras model
inputs = {
    'movieId': tf.keras.layers.Input(name='movieId', shape=(), dtype='int32'),
    'userId': tf.keras.layers.Input(name='userId', shape=(), dtype='int32'),
}

# Model

In [8]:
# neural cf model arch two. only embedding in each tower, then MLP as the interaction layers
def neural_cf_model_1(feature_inputs, item_feature_columns, user_feature_columns, hidden_units):
    item_tower = tf.keras.layers.DenseFeatures(item_feature_columns)(feature_inputs)
    user_tower = tf.keras.layers.DenseFeatures(user_feature_columns)(feature_inputs)
    interact_layer = tf.keras.layers.concatenate([item_tower, user_tower])
    for num_nodes in hidden_units:
        interact_layer = tf.keras.layers.Dense(num_nodes, activation='relu')(interact_layer)
    output_layer = tf.keras.layers.Dense(1, activation='sigmoid')(interact_layer)
    neural_cf_model = tf.keras.Model(feature_inputs, output_layer)
    return neural_cf_model

In [9]:
# neural cf model arch one. embedding+MLP in each tower, then dot product layer as the output
def neural_cf_model_2(feature_inputs, item_feature_columns, user_feature_columns, hidden_units):
    item_tower = tf.keras.layers.DenseFeatures(item_feature_columns)(feature_inputs)
    for num_nodes in hidden_units:
        item_tower = tf.keras.layers.Dense(num_nodes, activation='relu')(item_tower)

    user_tower = tf.keras.layers.DenseFeatures(user_feature_columns)(feature_inputs)
    for num_nodes in hidden_units:
        user_tower = tf.keras.layers.Dense(num_nodes, activation='relu')(user_tower)

    output = tf.keras.layers.Dot(axes=1)([item_tower, user_tower])

    neural_cf_model = tf.keras.Model(feature_inputs, output)
    return neural_cf_model

In [10]:
# neural cf model architecture
model = neural_cf_model_1(inputs, [movie_emb_col], [user_emb_col], [10, 10])

In [11]:
# compile the model, set loss function, optimizer and evaluation metrics
model.compile(
    loss='binary_crossentropy',
    optimizer='adam',
    metrics=['accuracy', tf.keras.metrics.AUC(curve='ROC'), tf.keras.metrics.AUC(curve='PR')])

In [12]:
# train the model
model.fit(train_dataset, epochs=5)

Epoch 1/5


  [n for n in tensors.keys() if n not in ref_input_names])


Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<tensorflow.python.keras.callbacks.History at 0x248ad4bdd68>

# Test the model

In [13]:
# evaluate the model
test_loss, test_accuracy, test_roc_auc, test_pr_auc = model.evaluate(test_dataset)
print('\n\nTest Loss {}, Test Accuracy {}, Test ROC AUC {}, Test PR AUC {}'.format(test_loss, test_accuracy,
                                                                                   test_roc_auc, test_pr_auc))

  [n for n in tensors.keys() if n not in ref_input_names])




Test Loss 0.660546600818634, Test Accuracy 0.6784313917160034, Test ROC AUC 0.7306731343269348, Test PR AUC 0.7577245235443115


In [14]:
# print some predict results
predictions = model.predict(test_dataset)
for prediction, goodRating in zip(predictions[:12], list(test_dataset)[0][1][:12]):
    print("Predicted good rating: {:.2%}".format(prediction[0]),
          " | Actual rating label: ",
          ("Good Rating" if bool(goodRating) else "Bad Rating"))

Predicted good rating: 10.23%  | Actual rating label:  Good Rating
Predicted good rating: 13.59%  | Actual rating label:  Good Rating
Predicted good rating: 81.58%  | Actual rating label:  Good Rating
Predicted good rating: 94.27%  | Actual rating label:  Good Rating
Predicted good rating: 52.33%  | Actual rating label:  Bad Rating
Predicted good rating: 27.52%  | Actual rating label:  Good Rating
Predicted good rating: 14.31%  | Actual rating label:  Good Rating
Predicted good rating: 83.36%  | Actual rating label:  Good Rating
Predicted good rating: 96.42%  | Actual rating label:  Good Rating
Predicted good rating: 88.39%  | Actual rating label:  Bad Rating
Predicted good rating: 19.30%  | Actual rating label:  Bad Rating
Predicted good rating: 85.58%  | Actual rating label:  Good Rating


# Save the model

In [15]:
tf.keras.models.save_model(
    model,
    r"D:\MachineLearningPractice\SparrowRecSys\NewCode\JavaPart\src\main\resources\webroot\modeldata\neuralcf\001",
    overwrite=True,
    include_optimizer=True,
    save_format=None,
    signatures=None,
    options=None
)

INFO:tensorflow:Assets written to: D:\MachineLearningPractice\SparrowRecSys\NewCode\JavaPart\src\main\resources\webroot\modeldata\neuralcf\001\assets


-----------------------------------------------