In [10]:
import sys
import pandas as pd
import tensorflow as tf
tf.get_logger().setLevel('ERROR') # only show error messages


In [11]:

from recommenders.utils.timer import Timer
from recommenders.models.ncf.ncf_singlenode import NCF
from recommenders.models.ncf.dataset import Dataset as NCFDataset
from recommenders.datasets import movielens
from recommenders.datasets.python_splitters import python_chrono_split


In [12]:
from recommenders.evaluation.python_evaluation import (
    ndcg_at_k, precision_at_k, recall_at_k
)

print("System version: {}".format(sys.version))
print("Pandas version: {}".format(pd.__version__))
print("Tensorflow version: {}".format(tf.__version__))

System version: 3.9.18 (main, Sep 11 2023, 13:41:44) 
[GCC 11.2.0]
Pandas version: 1.5.3
Tensorflow version: 2.16.1


In [13]:
# top k items to recommend
TOP_K = 10

# Select MovieLens data size: 100k, 1m, 10m, or 20m
MOVIELENS_DATA_SIZE = '100k'

# Model parameters
EPOCHS = 50
BATCH_SIZE = 256

SEED = 42

In [14]:
df = movielens.load_pandas_df(
    size=MOVIELENS_DATA_SIZE,
    header=["userID", "itemID", "rating", "timestamp"]
)

INFO:recommenders.datasets.download_utils:Downloading https://files.grouplens.org/datasets/movielens/ml-100k.zip
100%|██████████████████████████████████████████████████████████████████████████████████████████████| 4.81k/4.81k [00:01<00:00, 2.42kKB/s]


In [16]:
train, test = python_chrono_split(df, 0.75)

In [17]:
test = test[test["userID"].isin(train["userID"].unique())]
test = test[test["itemID"].isin(train["itemID"].unique())]

In [18]:
train_file = "./train.csv"
test_file = "./test.csv"
train.to_csv(train_file, index=False)
test.to_csv(test_file, index=False)

In [19]:
data = NCFDataset(train_file=train_file, test_file=test_file, seed=SEED)

INFO:recommenders.models.ncf.dataset:Indexing ./train.csv ...
INFO:recommenders.models.ncf.dataset:Indexing ./test.csv ...
INFO:recommenders.models.ncf.dataset:Creating full leave-one-out test file ./test_full.csv ...
100%|██████████████████████████████████████████████████████████████████████████████████████████████████| 943/943 [00:09<00:00, 94.70it/s]
INFO:recommenders.models.ncf.dataset:Indexing ./test_full.csv ...


In [24]:
model = NCF (
    n_users=data.n_users, 
    n_items=data.n_items,
    model_type="NeuMF",
    n_factors=4,
    layer_sizes=[16,8,4],
    n_epochs=EPOCHS,
    batch_size=BATCH_SIZE,
    learning_rate=1e-3,
    verbose=10,
    seed=SEED
)

2024-03-09 11:36:43.936587: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:998] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2024-03-09 11:36:43.948309: W tensorflow/core/common_runtime/gpu/gpu_device.cc:2251] Cannot dlopen some GPU libraries. Please make sure the missing libraries mentioned above are installed properly if you would like to use GPU. Follow the guide at https://www.tensorflow.org/install/gpu for how to download and setup the required libraries for your platform.
Skipping registering GPU devices...
2024-03-09 11:36:43.990628: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:388] MLIR V1 optimization pass is not enabled


In [25]:
with Timer() as train_time:
    model.fit(data)

print("Took {} seconds for training.".format(train_time))

INFO:recommenders.models.ncf.ncf_singlenode:Epoch 10 [1.45s]: train_loss = 0.259587 
INFO:recommenders.models.ncf.ncf_singlenode:Epoch 20 [1.36s]: train_loss = 0.246305 
INFO:recommenders.models.ncf.ncf_singlenode:Epoch 30 [1.46s]: train_loss = 0.239579 
INFO:recommenders.models.ncf.ncf_singlenode:Epoch 40 [1.37s]: train_loss = 0.234690 
INFO:recommenders.models.ncf.ncf_singlenode:Epoch 50 [1.38s]: train_loss = 0.230666 


Took 70.0582 seconds for training.


In [26]:
with Timer() as test_time:
    users, items, preds = [], [], []
    item = list(train.itemID.unique())
    for user in train.userID.unique():
        user = [user] * len(item) 
        users.extend(user)
        items.extend(item)
        preds.extend(list(model.predict(user, item, is_list=True)))

    all_predictions = pd.DataFrame(data={"userID": users, "itemID":items, "prediction":preds})

    merged = pd.merge(train, all_predictions, on=["userID", "itemID"], how="outer")
    all_predictions = merged[merged.rating.isnull()].drop('rating', axis=1)

print("Took {} seconds for prediction.".format(test_time))

Took 2.4859 seconds for prediction.


In [28]:
all_predictions[all_predictions["userID"]==1]

Unnamed: 0,userID,itemID,timestamp,prediction
74992,1,286,,0.717208
74993,1,258,,0.818400
74994,1,305,,0.059840
74995,1,307,,0.259305
74996,1,288,,0.889635
...,...,...,...,...
76380,1,1592,,0.000307
76381,1,1676,,0.005482
76382,1,907,,0.000347
76383,1,1681,,0.003493


In [30]:
eval_ndcg = ndcg_at_k(test, all_predictions, col_prediction='prediction', k=TOP_K)
eval_precision = precision_at_k(test, all_predictions, col_prediction='prediction', k=TOP_K)
eval_recall = recall_at_k(test, all_predictions, col_prediction='prediction', k=TOP_K)

print("NDCG:\t%f" % eval_ndcg,
      "Precision@K:\t%f" % eval_precision,
      "Recall@K:\t%f" % eval_recall, sep='\n')

NDCG:	0.200148
Precision@K:	0.178155
Recall@K:	0.098838


In [32]:
?model.predict

[0;31mSignature:[0m [0mmodel[0m[0;34m.[0m[0mpredict[0m[0;34m([0m[0muser_input[0m[0;34m,[0m [0mitem_input[0m[0;34m,[0m [0mis_list[0m[0;34m=[0m[0;32mFalse[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0;31mDocstring:[0m
Predict function of this trained model

Args:
    user_input (list or element of list): userID or userID list
    item_input (list or element of list): itemID or itemID list
    is_list (bool): if true, the input is list type
        noting that list-wise type prediction is faster than element-wise's.

Returns:
    list or float: A list of predicted rating or predicted rating score.
[0;31mFile:[0m      /opt/conda/envs/rec/lib/python3.9/site-packages/recommenders/models/ncf/ncf_singlenode.py
[0;31mType:[0m      method

In [37]:
?model.

[0;31mType:[0m        OrderedDict
[0;31mString form:[0m OrderedDict([(1, 0), (2, 1), (3, 2), (4, 3), (5, 4), (6, 5), (7, 6), (8, 7), (9, 8), (10, 9), (11 <...>  (936, 935), (937, 936), (938, 937), (939, 938), (940, 939), (941, 940), (942, 941), (943, 942)])
[0;31mLength:[0m      943
[0;31mFile:[0m        /opt/conda/envs/rec/lib/python3.9/collections/__init__.py
[0;31mDocstring:[0m   Dictionary that remembers insertion order

In [38]:
1

1