In [None]:
# -*- coding: utf-8 -*-
import numpy as np
import torch
import pdb
from sklearn.metrics import roc_auc_score
np.random.seed(2020)
torch.manual_seed(2020)
import pandas as pd
from dataset import load_data
from matrix_factorization import MF, MF_N_IPS, MF_N_DR_JL, MF_N_MRDR_JL

from utils import gini_index, ndcg_func, get_user_wise_ctr, rating_mat_to_sample, binarize, shuffle, minU,recall_func, precision_func
mse_func = lambda x,y: np.mean((x-y)**2)
acc_func = lambda x,y: np.sum(x == y) / len(x)

dataset_name = "kuai"

In [None]:
if dataset_name == "kuai":
    rdf_train = np.array(pd.read_table("./data/kuai/user.txt", header = None, sep = ','))     
    rdf_test = np.array(pd.read_table("./data/kuai/random.txt", header = None, sep = ','))
    rdf_train_new = np.c_[rdf_train, np.ones(rdf_train.shape[0])]
    rdf_test_new = np.c_[rdf_test, np.zeros(rdf_test.shape[0])]
    rdf = np.r_[rdf_train_new, rdf_test_new]
    
    rdf = rdf[np.argsort(rdf[:, 0])]
    c = rdf.copy()
    for i in range(rdf.shape[0]):
        if i == 0:
            c[:, 0][i] = i
            temp = rdf[:, 0][0]
        else:
            if c[:, 0][i] == temp:
                c[:, 0][i] = c[:, 0][i-1]
            else:
                c[:, 0][i] = c[:, 0][i-1] + 1
            temp = rdf[:, 0][i]
    
    c = c[np.argsort(c[:, 1])]
    d = c.copy()
    for i in range(rdf.shape[0]):
        if i == 0:
            d[:, 1][i] = i
            temp = c[:, 1][0]
        else:
            if d[:, 1][i] == temp:
                d[:, 1][i] = d[:, 1][i-1]
            else:
                d[:, 1][i] = d[:, 1][i-1] + 1
            temp = c[:, 1][i]

    y_train = d[:, 2][d[:, 3] == 1]
    y_test = d[:, 2][d[:, 3] == 0]
    x_train = d[:, :2][d[:, 3] == 1]
    x_test = d[:, :2][d[:, 3] == 0]
    
    num_user = x_train[:,0].max() + 1
    num_item = x_train[:,1].max() + 1

y_train = binarize(y_train, 2)
y_test = binarize(y_test, 2)
num_user = int(num_user)
num_item = int(num_item)
    
print("# user: {}, # item: {}".format(num_user, num_item))
print(sum(y_train)/len(y_train))

In [None]:
x_train = np.int64(x_train)
y_train = np.int64(y_train)

In [None]:
"MF naive"
mf = MF(num_user, num_item, batch_size=2048)
mf.cuda()
mf.fit(x_train, y_train, 
    lr=0.01,
    lamb=5e-5,
    tol=1e-5)
test_pred = mf.predict(x_test)
mse_mf = mse_func(y_test, test_pred)
auc_mf = roc_auc_score(y_test, test_pred)
ndcg_res = ndcg_func(mf, x_test, y_test, top_k_list = [20, 50])
recall_res = recall_func(mf, x_test, y_test, top_k_list = [20, 50])

print("***"*5 + "[MF]" + "***"*5)
print("[MF] test mse:", mse_mf)
print("[MF] test auc:", auc_mf)
print("[MF] ndcg@20:{:.6f}, ndcg@50:{:.6f}".format(
        np.mean(ndcg_res["ndcg_20"]), np.mean(ndcg_res["ndcg_50"])))
print("[MF] recall@20:{:.6f}, recall@50:{:.6f}".format(
        np.mean(recall_res["recall_20"]), np.mean(recall_res["recall_50"])))
user_wise_ctr = get_user_wise_ctr(x_test,y_test,test_pred)
gi,gu = gini_index(user_wise_ctr)
print("***"*5 + "[MF]" + "***"*5)

In [None]:
"MF N IPS"
mf_interference_ips = MF_N_IPS(num_user, num_item, low = 0.05, up = 0.95, c = 10)
mf_interference_ips.cuda()

ips_idxs = np.arange(len(y_test))
np.random.shuffle(ips_idxs)
y_ips = y_test[ips_idxs[:int(0.05 * len(ips_idxs))]]

mf_interference_ips.propensity_model.fit(x_train, lr = 0.01, thr = 0.9, batch_size = 2048, lamb = 1e-5)

mf_interference_ips.fit(x_train, y_train, y_ips, thr = 0.8, g_value = [0],
    lr=0.01,
    g = 200,
    h = 100,
    batch_size=2048,
    lamb1 = 5e-5,
    lamb2 = 5e-5,
    tol=1e-5,
    verbose=False)
test_pred = mf_interference_ips.predict(x_test)
mse_mfips = mse_func(y_test, test_pred)
auc_mfips = roc_auc_score(y_test, test_pred)
ndcg_res = ndcg_func(mf_interference_ips, x_test, y_test, top_k_list = [20, 50])
recall_res = recall_func(mf_interference_ips, x_test, y_test, top_k_list = [20, 50])
precision_res = precision_func(mf_interference_ips, x_test, y_test, top_k_list = [20, 50])

print("***"*5 + "[MF-Interference-IPS]" + "***"*5)
print("[MF-Interference-IPS] test mse:", mse_func(y_test, test_pred))
print("[MF-Interference-IPS] test auc:", auc_mfips)
print("[MF-Interference-IPS] ndcg@20:{:.6f}, ndcg@50:{:.6f}".format(
        np.mean(ndcg_res["ndcg_20"]), np.mean(ndcg_res["ndcg_50"])))
print("[MF-Interference-IPS] recall@20:{:.6f}, recall@50:{:.6f}".format(
        np.mean(recall_res["recall_20"]), np.mean(recall_res["recall_50"])))
print("[MF-Interference-IPS] precision@20:{:.6f}, precision@50:{:.6f}".format(
        np.mean(precision_res["precision_20"]), np.mean(precision_res["precision_50"])))    
print('f1@50', np.mean(recall_res["recall_50"]) * np.mean(precision_res["precision_50"])/
     (np.mean(recall_res["recall_50"]) + np.mean(precision_res["precision_50"])))
user_wise_ctr = get_user_wise_ctr(x_test,y_test,test_pred)
gi,gu = gini_index(user_wise_ctr)
print("***"*5 + "[MF-Interference-IPS]" + "***"*5)

In [33]:
"MF N DR JL"
mf_interference_dr_jl = MF_N_DR_JL(num_user, num_item, low = 0.05, up = 0.95, c = 10)
mf_interference_dr_jl.cuda()

ips_idxs = np.arange(len(y_test))
np.random.shuffle(ips_idxs)
y_ips = y_test[ips_idxs[:int(0.05 * len(ips_idxs))]]

mf_interference_dr_jl.propensity_model.fit(x_train, lr = 0.01, thr = 1, lamb = 1e-5, batch_size = 2048)

mf_interference_dr_jl.fit(x_train, y_train, y_ips, g_value = [0],
    lr=0.01,
    g = 200,
    h = 200,
    G = 2,
    batch_size=2048,
    lamb1 = 1e-5,
    lamb2 = 1e-5,
    tol=1e-5,
    verbose=False)
test_pred = mf_interference_dr_jl.predict(x_test)
mse_mfdrjl = mse_func(y_test, test_pred)
auc_mfdrjl = roc_auc_score(y_test, test_pred)
ndcg_res = ndcg_func(mf_interference_dr_jl, x_test, y_test, top_k_list = [20, 50])
recall_res = recall_func(mf_interference_dr_jl, x_test, y_test, top_k_list = [20, 50])
precision_res = precision_func(mf_interference_dr_jl, x_test, y_test, top_k_list = [20, 50])

print("***"*5 + "[MF-Interference-IPS]" + "***"*5)
print("[MF-Interference-IPS] test mse:", mse_func(y_test, test_pred))
print("[MF-Interference-IPS] test auc:", auc_mfdrjl)
print("[MF-Interference-IPS] ndcg@20:{:.6f}, ndcg@50:{:.6f}".format(
        np.mean(ndcg_res["ndcg_20"]), np.mean(ndcg_res["ndcg_50"])))
print("[MF-Interference-IPS] recall@20:{:.6f}, recall@50:{:.6f}".format(
        np.mean(recall_res["recall_20"]), np.mean(recall_res["recall_50"])))
print("[MF-Interference-IPS] precision@20:{:.6f}, precision@50:{:.6f}".format(
        np.mean(precision_res["precision_20"]), np.mean(precision_res["precision_50"])))    
print('f1@50', np.mean(recall_res["recall_50"]) * np.mean(precision_res["precision_50"])/
     (np.mean(recall_res["recall_50"]) + np.mean(precision_res["precision_50"])))
user_wise_ctr = get_user_wise_ctr(x_test,y_test,test_pred)
gi,gu = gini_index(user_wise_ctr)
print("***"*5 + "[MF-Interference-IPS]" + "***"*5)

[MF-Interference-IPS] epoch:18, xent:125.20612573623657
[MF-N-DR-JL] epoch:105, xent:10009488.09375
***************[MF-Interference-IPS]***************
[MF-Interference-IPS] test mse: 0.06218944475425952
[MF-Interference-IPS] test auc: 0.7791225770306165
[MF-Interference-IPS] ndcg@20:0.465605, ndcg@50:0.543906
[MF-Interference-IPS] recall@20:0.442665, recall@50:0.679644
[MF-Interference-IPS] precision@20:0.089157, precision@50:0.063019
f1@50 0.05767160548177845
Num User: 1411
Gini index: 0.8282210368823074
Global utility: 0.04847625797306875
***************[MF-Interference-IPS]***************


In [None]:
"MF N MRDR JL"
mf_interference_mrdr_jl = MF_N_MRDR_JL(num_user, num_item, low = 0.05, up = 0.95, c = 10)
mf_interference_mrdr_jl.cuda()

ips_idxs = np.arange(len(y_test))
np.random.shuffle(ips_idxs)
y_ips = y_test[ips_idxs[:int(0.05 * len(ips_idxs))]]

mf_interference_mrdr_jl.propensity_model.fit(x_train, lr = 0.01, thr = 1, lamb = 1e-5, batch_size = 2048)

mf_interference_mrdr_jl.fit(x_train, y_train, y_ips, g_value = [0],
    lr=0.01,
    g = 200,
    h = 200,
    G = 1,
    batch_size=2048,
    lamb1 = 1e-5,
    lamb2 = 1e-5,
    tol=1e-5,
    verbose=False)
test_pred = mf_interference_mrdr_jl.predict(x_test)
mse_mfmrdrjl = mse_func(y_test, test_pred)
auc_mfmrdrjl = roc_auc_score(y_test, test_pred)
ndcg_res = ndcg_func(mf_interference_mrdr_jl, x_test, y_test, top_k_list = [20, 50])
recall_res = recall_func(mf_interference_mrdr_jl, x_test, y_test, top_k_list = [20, 50])
precision_res = precision_func(mf_interference_mrdr_jl, x_test, y_test, top_k_list = [20, 50])

print("***"*5 + "[MF-Interference-IPS]" + "***"*5)
print("[MF-Interference-IPS] test mse:", mse_func(y_test, test_pred))
print("[MF-Interference-IPS] test auc:", auc_mfmrdrjl)
print("[MF-Interference-IPS] ndcg@20:{:.6f}, ndcg@50:{:.6f}".format(
        np.mean(ndcg_res["ndcg_20"]), np.mean(ndcg_res["ndcg_50"])))
print("[MF-Interference-IPS] recall@20:{:.6f}, recall@50:{:.6f}".format(
        np.mean(recall_res["recall_20"]), np.mean(recall_res["recall_50"])))
print("[MF-Interference-IPS] precision@20:{:.6f}, precision@50:{:.6f}".format(
        np.mean(precision_res["precision_20"]), np.mean(precision_res["precision_50"])))    
print('f1@50', np.mean(recall_res["recall_50"]) * np.mean(precision_res["precision_50"])/
     (np.mean(recall_res["recall_50"]) + np.mean(precision_res["precision_50"])))
user_wise_ctr = get_user_wise_ctr(x_test,y_test,test_pred)
gi,gu = gini_index(user_wise_ctr)
print("***"*5 + "[MF-Interference-IPS]" + "***"*5)