# Import libraries

In [1]:
import pandas as pd
from torch.utils.data import DataLoader
from dataset import NCFDataset
from recom_ncf import NCFRecommender
from evaluation import Evaluation
from helpers.index_manager import IndexManager
from helpers.dataloader_custom_functions import collate_fn, worker_init_fn
from helpers.cache import CacheType
from helpers.mem_map_dataloader import MemMapDataLoader

# Loading data

In [2]:
df_train = pd.read_csv('../data/train-leave2.csv')
df_val = pd.read_csv('../data/val-leave2.csv')
df_test = pd.read_csv('../data/test-leave2.csv')
index_manager = IndexManager()
index_manager.load('../data/index.pkl')

# Creating datasets

In [3]:
image_dataloader = MemMapDataLoader(file_dir='D:/image-features', index_manager=index_manager, cache_type=CacheType.UNLIMITED)

In [4]:
train_dataset = NCFDataset(df_train, image_dataloader=image_dataloader)
val_dataset = NCFDataset(df_val, image_dataloader=image_dataloader)

dataloader_params = {
    'batch_size': 2**16,
    'num_workers': 4,
    'persistent_workers': True,
    'prefetch_factor': 2,
    'pin_memory': True,
    'pin_memory_device': 'cuda',
    'collate_fn': collate_fn,
    'worker_init_fn': worker_init_fn,
}

train_dataloader = DataLoader(train_dataset, shuffle=True, **dataloader_params)
eval_dataloader = DataLoader(val_dataset, shuffle=False, **dataloader_params)

# Training model

In [5]:
params = {'factors': 8, 'mlp_user_item_dim': 128, 'learning_rate': 0.005, 'epochs': 5, 'optimizer': 'adagrad', 'dropout': 0.0, 'weight_decay': 0.0001, 'loss_fn': 'mse', 'image_dim': 128}

unique_users = index_manager.get_indexed_values('user_id')
unique_items = index_manager.get_indexed_values('item_id')

model = NCFRecommender(unique_users, unique_items, image_dataloader=image_dataloader, **params)
model.fit(train_dataloader, eval_dataloader)

Epoch 1/5
Train loss: 0.209850, Validation loss: 0.225900
Epoch 2/5
Train loss: 0.160078, Validation loss: 0.228184
Epoch 3/5
Train loss: 0.160029, Validation loss: 0.228242
Epoch 4/5
Train loss: 0.160011, Validation loss: 0.228350
Early stopping triggered after 4 epochs
Training completed!


# Evaluating model

In [6]:
evaluator = Evaluation(recommender=model, test_data=df_test)
metrics = evaluator.evaluate()

Creating ground truth sets...
Generating predictions...
Processing 1 of 121682 users... (0.00%)
Memory usage: 0.671630859375 . Increasing batch size with increasing rate of 1.1
Increased user batch size from 256 to 282
Increased item batch size from 2048 to 2253
Processing 257 of 121682 users... (0.21%)
Memory usage: 0.6494140625 . Increasing batch size with increasing rate of 1.1
Increased user batch size from 282 to 310
Increased item batch size from 2253 to 2478
Processing 539 of 121682 users... (0.44%)
Memory usage: 0.756103515625 . Increasing batch size with increasing rate of 1.1
Increased user batch size from 310 to 341
Increased item batch size from 2478 to 2726
Processing 849 of 121682 users... (0.70%)
Processing 1190 of 121682 users... (0.98%)
Processing 1531 of 121682 users... (1.26%)
Processing 1872 of 121682 users... (1.54%)
Processing 2213 of 121682 users... (1.82%)
Processing 2554 of 121682 users... (2.10%)
Processing 2895 of 121682 users... (2.38%)
Processing 3236 of 12

In [7]:
metrics

{'Hit Ratio@10': 9.03995660820828e-05,
 'NDCG@10': 2.8392796702563603e-05,
 'Recall@10': 9.03995660820828e-05}