# Import libraries

In [1]:
import pandas as pd
from torch.utils.data import DataLoader
from sklearn.model_selection import train_test_split
from dataset import NCFDataset
from recom_ncf import NCFRecommender
from evaluation import Evaluation
from helpers.index_manager import IndexManager

# Loading data

In [2]:
df = pd.read_csv('../data/interaction-clean.csv')[['user_id', 'item_id', 'rating_imp']]
df.head()

Unnamed: 0,user_id,item_id,rating_imp
0,76561197960432447,10,1
1,76561198071230926,10,1
2,76561198206216352,10,1
3,76561198110801124,10,1
4,76561199813732773,10,1


# Indexing data

In [3]:
index_manager = IndexManager()
index_manager.fit(df_interaction=df)
index_manager.transform_interactions(df, inplace=True)
df.head()

Indexed 836887 users and 69001 items
User index range: 0-836886
Item index range: 0-69000


Unnamed: 0,user_id,item_id,rating_imp
0,0,0,1
1,1,0,1
2,2,0,1
3,3,0,1
4,4,0,1


# Creating datasets

In [4]:
df_train_val, df_test = train_test_split(df, test_size=0.1, random_state=42)
df_train, df_val = train_test_split(df_train_val, test_size=(0.1/0.9), random_state=42)

train_dataset = NCFDataset(df_train)
val_dataset = NCFDataset(df_val)

train_dataloader = DataLoader(train_dataset, batch_size=16384, num_workers=4, persistent_workers=True, prefetch_factor=2, pin_memory=True, shuffle=True)
eval_dataloader = DataLoader(val_dataset, batch_size=16384, num_workers=4, persistent_workers=True, prefetch_factor=2, pin_memory=True, shuffle=False)

# Training model

In [5]:
params = {'factors': 8, 'mlp_user_item_dim': 64, 'learning_rate': 0.001, 'epochs': 10, 'optimizer': 'adam', 'dropout': 0.0, 'weight_decay': 1e-05, 'loss_fn': 'bce'}

unique_users = index_manager.get_indexed_users()
unique_items = index_manager.get_indexed_items()

model = NCFRecommender(unique_users, unique_items, **params)
model.fit(train_dataloader, eval_dataloader)

Epoch 1/10
Train loss: 0.632678, Validation loss: 0.500334
Epoch 2/10
Train loss: 0.486505, Validation loss: 0.482340
Epoch 3/10
Train loss: 0.477919, Validation loss: 0.476741
Epoch 4/10
Train loss: 0.472169, Validation loss: 0.471945
Epoch 5/10
Train loss: 0.467222, Validation loss: 0.467762
Epoch 6/10
Train loss: 0.462657, Validation loss: 0.463949
Epoch 7/10
Train loss: 0.458133, Validation loss: 0.460253
Epoch 8/10
Train loss: 0.453593, Validation loss: 0.456590
Epoch 9/10
Train loss: 0.449159, Validation loss: 0.453338
Epoch 10/10
Train loss: 0.444825, Validation loss: 0.449919
Training completed!


# Evaluating model

In [6]:
evaluator = Evaluation(recommender=model, test_data=df_test)
metrics = evaluator.evaluate()

Starting evaluation preparation...
Creating ground truth sets...
Ground truth created for 105036 users with an average of 1.1 items each
Ground truth creation completed in 1.95 seconds
Generating predictions for 105036 users...
Processing predictions for 105036 users and 69001 items
Processing user batch 1/206 (512 users)
Processing user batch 2/206 (512 users)
Processing user batch 3/206 (512 users)
Processing user batch 4/206 (512 users)
Processing user batch 5/206 (512 users)
Processing user batch 6/206 (512 users)
Processing user batch 7/206 (512 users)
Processing user batch 8/206 (512 users)
Processing user batch 9/206 (512 users)
Processing user batch 10/206 (512 users)
Processing user batch 11/206 (512 users)
Processing user batch 12/206 (512 users)
Processing user batch 13/206 (512 users)
Processing user batch 14/206 (512 users)
Processing user batch 15/206 (512 users)
Processing user batch 16/206 (512 users)
Processing user batch 17/206 (512 users)
Processing user batch 18/206

In [7]:
metrics

{'Hit Ratio@10': 0.01253855820861419,
 'NDCG@10': 0.00722760860678418,
 'Recall@10': 0.0122151090147777}