In [1]:
import faiss
import numpy as np
import pandas as pd
from datetime import datetime,timedelta

In [2]:
train_df = pd.read_pickle('data/train_df.pkl')
test_df = pd.read_pickle('data/test_df.pkl')
test_df = test_df[["item_no","user_no","time"]]
train_df = train_df[["item_no","user_no","time"]]
test_df_userno_group = test_df.groupby("user_no") 

In [3]:
def load_m4():
    userid_vectors = np.load("userid_vectors_m4.npy",)
    userid_index = np.load("userid_index_m4.npy")
    itemid_vectors = np.load("itemid_vectors_m4.npy")
    itemid_index = np.load("itemid_index_m4.npy")
    return userid_vectors,userid_index,itemid_vectors,itemid_index

In [4]:
def load_m5():
    userid_vectors = np.load("userid_vectors_M5.npy",)
    userid_index = np.load("userid_index_M5.npy")
    itemid_vectors = np.load("itemid_vectors_M5.npy")
    itemid_index = np.load("itemid_index_M5.npy")
    return userid_vectors,userid_index,itemid_vectors,itemid_index

In [5]:
def load_fullsoftmax():
    userid_vectors = np.load("userid_vectors_fullsoftmax.npy",)
    userid_index = np.load("userid_index_fullsoftmax.npy")
    itemid_vectors = np.load("itemid_vectors_fullsoftmax.npy")
    itemid_index = np.load("itemid_index_fullsoftmax.npy")
    return userid_vectors,userid_index,itemid_vectors,itemid_index

In [6]:
def evluate(dim):
    index=faiss.IndexFlatIP(dim)
    index.add(itemid_vectors)
    D, I = index.search(userid_vectors, 1000)
    for k in [1,5,10,50,100,300,500,1000]:
        recall_sum = 0
        item_sum = 0
        for i in range(len(userid_index)):
            user_item_label = test_df_userno_group.get_group(userid_index[i]).item_no.tolist()
            user_item_topK = set(itemid_index[I[i][:k]].tolist())
            count = 0
            for item_no in user_item_label:
                if item_no in user_item_topK:
                    count+=1
            recall_sum += count
            item_sum += len(user_item_label)
        recall = recall_sum/item_sum
        print("Recall@" + str(k) + " = "+ str(round(recall,4)))

## POP

In [7]:
%run DPACL/Methods_POP.py 

Methods POP:
Recall@1 = 0.0001
Recall@5 = 0.0012
Recall@10 = 0.0019
Recall@50 = 0.0064
Recall@100 = 0.0115


## POP_RealTime

In [8]:
%run DPACL/Methods_POP_RealTime.py 

Methods_POP_RealTime:
Recall@1 = 0.001
Recall@5 = 0.004
Recall@10 = 0.0063
Recall@50 = 0.0194
Recall@100 = 0.0308


## DeepU2I_FullSoftmax

In [9]:
userid_vectors,userid_index,itemid_vectors,itemid_index = load_fullsoftmax()
evluate(128)

Recall@1 = 0.0225
Recall@5 = 0.0736
Recall@10 = 0.1068
Recall@50 = 0.1856
Recall@100 = 0.2167
Recall@300 = 0.264
Recall@500 = 0.2869
Recall@1000 = 0.3222


## M4

In [10]:
userid_vectors,userid_index,itemid_vectors,itemid_index = load_m4()
evluate(128)

Recall@1 = 0.0272
Recall@5 = 0.0762
Recall@10 = 0.108
Recall@50 = 0.1876
Recall@100 = 0.2201
Recall@300 = 0.2629
Recall@500 = 0.2838
Recall@1000 = 0.3176


## M5 

In [11]:
userid_vectors,userid_index,itemid_vectors,itemid_index = load_m5()
evluate(128)

Recall@1 = 0.0327
Recall@5 = 0.0908
Recall@10 = 0.1273
Recall@50 = 0.2037
Recall@100 = 0.2285
Recall@300 = 0.2665
Recall@500 = 0.2877
Recall@1000 = 0.3216


## M0(M4+test_dy_30day)

In [12]:
userid_vectors,userid_index,itemid_vectors,itemid_index = load_m4()
item_df = train_df[["item_no"]]
item_num_dict = item_df["item_no"].value_counts().to_dict()
adjust_vector = np.array([np.log(item_num_dict.get(item_no,0)+1)/20 for item_no in itemid_index])
userid_vectors = np.column_stack((userid_vectors,np.ones(userid_vectors.shape[0]))).astype(np.float32)
itemid_vectors = np.column_stack((itemid_vectors,adjust_vector)).astype(np.float32)
evluate(129)

Recall@1 = 0.0484
Recall@5 = 0.1163
Recall@10 = 0.1501
Recall@50 = 0.2268
Recall@100 = 0.2589
Recall@300 = 0.3202
Recall@500 = 0.355
Recall@1000 = 0.4114


## M1(M5+test_dy_30day)

In [13]:
userid_vectors,userid_index,itemid_vectors,itemid_index = load_m5()
item_df = train_df[["item_no"]]
item_num_dict = item_df["item_no"].value_counts().to_dict()
adjust_vector = np.array([np.log(item_num_dict.get(item_no,0)+1)/20 for item_no in itemid_index])
userid_vectors = np.column_stack((userid_vectors,np.ones(userid_vectors.shape[0]))).astype(np.float32)
itemid_vectors = np.column_stack((itemid_vectors,adjust_vector)).astype(np.float32)
evluate(129)

Recall@1 = 0.0547
Recall@5 = 0.123
Recall@10 = 0.1571
Recall@50 = 0.2301
Recall@100 = 0.2624
Recall@300 = 0.324
Recall@500 = 0.3593
Recall@1000 = 0.4163


## M2(M4+test_dy_24hour)

In [14]:
%run DPACL/Evaluate_DeepU2I_M2.py

cal hour: 0
cal hour: 1
cal hour: 2
cal hour: 3
cal hour: 4
cal hour: 5
cal hour: 6
cal hour: 7
cal hour: 8
cal hour: 9
cal hour: 10
cal hour: 11
cal hour: 12
cal hour: 13
cal hour: 14
cal hour: 15
cal hour: 16
cal hour: 17
cal hour: 18
cal hour: 19
cal hour: 20
cal hour: 21
cal hour: 22
cal hour: 23
Recall@1 = 0.0322
Recall@5 = 0.0886
Recall@10 = 0.12
Recall@50 = 0.2043
Recall@100 = 0.239
Recall@300 = 0.2971
Recall@500 = 0.3279
Recall@1000 = 0.3757


## M3(M5+test_dy_24hour)

In [15]:
%run DPACL/Evaluate_DeepU2I_M3.py

cal hour: 0
cal hour: 1
cal hour: 2
cal hour: 3
cal hour: 4
cal hour: 5
cal hour: 6
cal hour: 7
cal hour: 8
cal hour: 9
cal hour: 10
cal hour: 11
cal hour: 12
cal hour: 13
cal hour: 14
cal hour: 15
cal hour: 16
cal hour: 17
cal hour: 18
cal hour: 19
cal hour: 20
cal hour: 21
cal hour: 22
cal hour: 23
Recall@1 = 0.0397
Recall@5 = 0.0998
Recall@10 = 0.135
Recall@50 = 0.2136
Recall@100 = 0.2447
Recall@300 = 0.2991
Recall@500 = 0.3291
Recall@1000 = 0.3794
