# Import libraries

In [1]:
import pandas as pd
from torch.utils.data import DataLoader
from dataset import NCFDataset
from recom_ncf import NCFRecommender
from evaluation import Evaluation
from helpers.index_manager import IndexManager
from helpers.splitter import Splitter

# Loading data

In [2]:
time_feature = 'z_timestamp'
df = pd.read_csv('../data/interaction-clean.csv')[['user_id', 'item_id', 'rating_imp', 'timestamp', time_feature]]
df.head()

Unnamed: 0,user_id,item_id,rating_imp,timestamp,z_timestamp
0,76561197960432447,10,1,1738278781,0.559647
1,76561198071230926,10,1,1736206418,0.535447
2,76561198206216352,10,1,1738041574,0.556877
3,76561198110801124,10,1,1738015332,0.55657
4,76561199813732773,10,1,1737853720,0.554683


# Indexing data

In [3]:
index_manager = IndexManager()
index_manager.fit(df_interaction=df)
index_manager.transform_interactions(df, inplace=True)
df.head()

Indexed 836887 users and 69001 items
User index range: 0-836886
Item index range: 0-69000


Unnamed: 0,user_id,item_id,rating_imp,timestamp,z_timestamp
0,0,0,1,1738278781,0.559647
1,1,0,1,1736206418,0.535447
2,2,0,1,1738041574,0.556877
3,3,0,1,1738015332,0.55657
4,4,0,1,1737853720,0.554683


# Creating datasets

In [4]:
splitter = Splitter(df)
df_train, df_val, df_test = splitter.leave_k_out_split()

train_dataset = NCFDataset(df_train, time_feature=time_feature)
val_dataset = NCFDataset(df_val, time_feature=time_feature)

train_dataloader = DataLoader(train_dataset, batch_size=16384, num_workers=4, persistent_workers=True, prefetch_factor=2, pin_memory=True, shuffle=True)
eval_dataloader = DataLoader(val_dataset, batch_size=16384, num_workers=4, persistent_workers=True, prefetch_factor=2, pin_memory=True, shuffle=False)

Splitting data with leave-2-out strategy (1 for validation, 1 for testing)
Total users: 836887
Interactions per user: min=1, max=1035, avg=1.4
Note: 706515 users have fewer than 2 interactions.
These users will be placed entirely in the training set.
Split complete: 1156226 total interactions
Train set: 895482 interactions (77.4%)
Validation set: 130372 interactions (11.3%)
Test set: 130372 interactions (11.3%)


# Training model

In [5]:
params = {'factors': 8,
 'mlp_user_item_dim': 64,
 'mlp_time_dim': 16,
 'learning_rate': 0.0005,
 'epochs': 20,
 'optimizer': 'adagrad',
 'dropout': 0.2,
 'weight_decay': 1e-05,
 'loss_fn': 'bce'}

unique_users = index_manager.get_indexed_users()
unique_items = index_manager.get_indexed_items()

model = NCFRecommender(unique_users, unique_items, **params)
model.fit(train_dataloader, eval_dataloader)

All weights initialized with Gaussian distribution (mean=0, std=0.01)
Epoch 1/20
Train loss: 0.691783, Validation loss: 0.691056
Epoch 2/20
Train loss: 0.690399, Validation loss: 0.690042
Epoch 3/20
Train loss: 0.689448, Validation loss: 0.689219
Epoch 4/20
Train loss: 0.688627, Validation loss: 0.688471
Epoch 5/20
Train loss: 0.687854, Validation loss: 0.687743
Epoch 6/20
Train loss: 0.687075, Validation loss: 0.686980
Epoch 7/20
Train loss: 0.686237, Validation loss: 0.686146
Epoch 8/20
Train loss: 0.685293, Validation loss: 0.685192
Epoch 9/20
Train loss: 0.684184, Validation loss: 0.684059
Epoch 10/20
Train loss: 0.682785, Validation loss: 0.682529
Epoch 11/20
Train loss: 0.680809, Validation loss: 0.680409
Epoch 12/20
Train loss: 0.678031, Validation loss: 0.677435
Epoch 13/20
Train loss: 0.674239, Validation loss: 0.673486
Epoch 14/20
Train loss: 0.669206, Validation loss: 0.668381
Epoch 15/20
Train loss: 0.662798, Validation loss: 0.661973
Epoch 16/20
Train loss: 0.654810, Valid

# Evaluating model

In [6]:
evaluator = Evaluation(recommender=model, test_data=df_test, time_feature=time_feature)
metrics = evaluator.evaluate()

Starting evaluation preparation...
Creating ground truth sets...
Ground truth created for 130372 users with an average of 1.0 items each
Ground truth creation completed in 1.97 seconds
Extracting timestamps for each user...
Timestamp extraction completed in 0.12 seconds
Generating predictions for 130372 users...
Processing predictions for 130372 users and 69001 items
Processing 1 of 130372 users... (0.00%)
Memory usage: 0.1591796875 . Increased user batch size from 128 to 649
Memory usage: 0.1591796875 . Increased item batch size from 1024 to 5195
Processing 129 of 130372 users... (0.10%)
Memory usage: 1.014404296875 . Reduced item batch size from 5195 to 2597
Processing 778 of 130372 users... (0.60%)
Memory usage: 0.55322265625 . Increased user batch size from 649 to 947
Memory usage: 0.55322265625 . Increased item batch size from 2597 to 3791
Processing 1427 of 130372 users... (1.09%)
Memory usage: 1.084716796875 . Reduced item batch size from 3791 to 1895
Processing 2374 of 130372 u

In [7]:
metrics

{'Hit Ratio@10': 0.0423250391188292,
 'NDCG@10': 0.019633985413472964,
 'Recall@10': 0.0423250391188292}