In [1]:
import numpy as np
import pandas as pd
import torch
from time import time
from data import TargetData

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

device='cpu'

In [2]:
test_data = pd.read_csv("evaluations/nfcf/test_userPages.csv")  # consists of random sample
test_data.rename(columns={'user_id': 'uid', 'like_id': 'mid'}, inplace=True)
test_data['rating'] = 1

test_data

Unnamed: 0,uid,mid,rating
0,1,425,1
1,4,48,1
2,4,1337,1
3,7,313,1
4,8,490,1
...,...,...,...
9943,6038,1616,1
9944,6039,990,1
9945,6039,1572,1
9946,6039,3050,1


In [3]:
data = TargetData()  # consists of latest / last item for each user

data.test

Unnamed: 0,uid,mid,rating
25,1,25,1
66,2,66,1
232,3,207,1
235,4,208,1
258,5,222,1
...,...,...,...
998205,6036,1048,1
998959,6037,294,1
999133,6038,1528,1
999166,6039,420,1


In [4]:
print(data.num_users)

4920


In [5]:
from models import NCF3

# ncf = NCF(6040, 3416, 128).to(device)
ncf = NCF3(data.num_users, data.num_movies, 128, 4, 1, device).to(device)
ncf.load_state_dict(torch.load("saved_models/NCF",  map_location=torch.device(device)))
# ncf.load_state_dict(torch.load("saved_models/preTrained_NCF",  map_location=torch.device(device)))
ncf.to(device)

NCF3(
  (embed_user_GMF): Embedding(4920, 128)
  (embed_item_GMF): Embedding(3416, 128)
  (embed_user_MLP): Embedding(4920, 1024)
  (embed_item_MLP): Embedding(3416, 1024)
  (MLP_layers): Sequential(
    (0): Dropout(p=1, inplace=False)
    (1): Linear(in_features=2048, out_features=1024, bias=True)
    (2): ReLU()
    (3): Dropout(p=1, inplace=False)
    (4): Linear(in_features=1024, out_features=512, bias=True)
    (5): ReLU()
    (6): Dropout(p=1, inplace=False)
    (7): Linear(in_features=512, out_features=256, bias=True)
    (8): ReLU()
    (9): Dropout(p=1, inplace=False)
    (10): Linear(in_features=256, out_features=128, bias=True)
    (11): ReLU()
  )
  (predict_layer): Linear(in_features=256, out_features=1, bias=True)
  (out_act): Sigmoid()
)

In [6]:
from evaluators import eval_model

t1 = time()
hr2, ndcg2 = eval_model(ncf, data, num_users= data.num_users, device=device)
t2 = time()

print(f'{int(t2 - t1)} seconds')
print(f'hr: {round(hr2, 2)}, ndcg: {round(ndcg2, 2)}')

36 seconds
hr: 0.94, ndcg: 0.94


In [12]:
from evaluators import evaluate_model
CUDA_LAUNCH_BLOCKING=1
t1 = time()
avg_HR_preTrain, avg_NDCG_preTrain = evaluate_model(
    ncf,
    data.complete_test[['uid',  'mid']].values,
    top_k=10,
    random_samples=100,
    num_items=data.num_movies,
    device=device
)
t2 = time()

print(f'{int(t2 - t1)} seconds')
print(f'hr: {avg_HR_preTrain.round(2)}, ndcg:{avg_NDCG_preTrain.round(2)}')
print(f'hr: {round(avg_HR_preTrain[-1], 2)}, ndcg:{round(avg_NDCG_preTrain[-1], 2)}')

39 seconds
hr: [0.   0.94 0.94 0.94 0.94 0.94 0.94 0.94 0.94 0.94], ndcg:[0.   0.94 0.94 0.94 0.94 0.94 0.94 0.94 0.94 0.94]
hr: 0.94, ndcg:0.94


In [None]:
from dump.evaluate import Evaluate

evaluator = Evaluate(data=data, testing_data=test_data, device=device)

t1 = time()
hr, ndcg = evaluator(model=ncf)
t2 = time()

print(f'{int(t2 - t1)} seconds')
print(f'hr: {round(hr, 2)}, ndcg: {round(ndcg, 2)}')

In [10]:
data.test[['uid',  'mid']].describe()

Unnamed: 0,uid,mid
count,6040.0,6040.0
mean,3020.5,937.987914
std,1743.742145,787.384617
min,1.0,0.0
25%,1510.75,268.0
50%,3020.5,729.0
75%,4530.25,1396.0
max,6040.0,3400.0
