In [None]:
# -*- coding: utf-8 -*-
import numpy as np
import torch
from sklearn.metrics import roc_auc_score
np.random.seed(2020)
torch.manual_seed(2020)
import pdb

from dataset import load_data
from matrix_factorization import MF, MF_N_IPS, MF_N_DR_JL, MF_N_MRDR_JL

from utils import gini_index, ndcg_func, get_user_wise_ctr, rating_mat_to_sample, binarize, shuffle, minU,recall_func, precision_func
mse_func = lambda x,y: np.mean((x-y)**2)
acc_func = lambda x,y: np.sum(x == y) / len(x)

dataset_name = "yahoo"

In [None]:
if dataset_name == "coat":
    train_mat, test_mat = load_data("coat")        
    x_train, y_train = rating_mat_to_sample(train_mat)
    x_test, y_test = rating_mat_to_sample(test_mat)
    num_user = train_mat.shape[0]
    num_item = train_mat.shape[1]

elif dataset_name == "yahoo":
    x_train, y_train, x_test, y_test = load_data("yahoo")
    x_train, y_train = shuffle(x_train, y_train)
    num_user = x_train[:,0].max() + 1
    num_item = x_train[:,1].max() + 1

print("# user: {}, # item: {}".format(num_user, num_item))
# binarize
y_train = binarize(y_train)
y_test = binarize(y_test)

In [None]:
"MF naive"
mf = MF(num_user, num_item, batch_size=2048)
mf.cuda()
mf.fit(x_train, y_train, 
    lr=0.05,
    lamb=1e-4,
    tol=1e-5)
test_pred = mf.predict(x_test)
mse_mf = mse_func(y_test, test_pred)
auc_mf = roc_auc_score(y_test, test_pred)
ndcg_res = ndcg_func(mf, x_test, y_test)
recall_res = recall_func(mf, x_test, y_test)

print("***"*5 + "[MF]" + "***"*5)
print("[MF] test mse:", mse_mf)
print("[MF] test auc:", auc_mf)
print("[MF] ndcg@5:{:.6f}, ndcg@10:{:.6f}".format(
        np.mean(ndcg_res["ndcg_5"]), np.mean(ndcg_res["ndcg_10"])))
print("[MF] recall@5:{:.6f}, recall@10:{:.6f}".format(
        np.mean(recall_res["recall_5"]), np.mean(recall_res["recall_10"])))
user_wise_ctr = get_user_wise_ctr(x_test,y_test,test_pred)
gi,gu = gini_index(user_wise_ctr)
print("***"*5 + "[MF]" + "***"*5)

In [None]:
"MF N IPS"
mf_interference_ips = MF_N_IPS(num_user, num_item, low = 0.05, up = 0.95, c = 3)
mf_interference_ips.cuda()

ips_idxs = np.arange(len(y_test))
np.random.shuffle(ips_idxs)
y_ips = y_test[ips_idxs[:int(0.05 * len(ips_idxs))]]

mf_interference_ips.propensity_model.fit(x_train, lr = 0.01, thr = 0.9, batch_size = 2048, lamb = 1e-3)

mf_interference_ips.fit(x_train, y_train, y_ips, thr = 0.8, g_value = [0],
    lr=0.01,
    g = 300,
    h = 3000,
    batch_size=2048,
    lamb1 = 1e-4,
    lamb2 = 1e-4,
    tol=1e-5,
    verbose=False)
test_pred = mf_interference_ips.predict(x_test)
mse_mfips = mse_func(y_test, test_pred)
auc_mfips = roc_auc_score(y_test, test_pred)
ndcg_res = ndcg_func(mf_interference_ips, x_test, y_test)
recall_res = recall_func(mf_interference_ips, x_test, y_test)
precision_res = precision_func(mf_interference_ips, x_test, y_test)

print("***"*5 + "[MF-Interference-IPS]" + "***"*5)
print("[MF-Interference-IPS] test mse:", mse_func(y_test, test_pred))
print("[MF-Interference-IPS] test auc:", auc_mfips)
print("[MF-Interference-IPS] ndcg@5:{:.6f}, ndcg@10:{:.6f}".format(
        np.mean(ndcg_res["ndcg_5"]), np.mean(ndcg_res["ndcg_10"])))
print("[MF-Interference-IPS] recall@5:{:.6f}, recall@10:{:.6f}".format(
        np.mean(recall_res["recall_5"]), np.mean(recall_res["recall_10"])))
print("[MF-Interference-IPS] precision@5:{:.6f}, precision@10:{:.6f}".format(
        np.mean(precision_res["precision_5"]), np.mean(precision_res["precision_10"])))    
print('f1@5', np.mean(recall_res["recall_5"]) * np.mean(precision_res["precision_5"])/
     (np.mean(recall_res["recall_5"]) + np.mean(precision_res["precision_5"])))
user_wise_ctr = get_user_wise_ctr(x_test,y_test,test_pred)
gi,gu = gini_index(user_wise_ctr)
print("***"*5 + "[MF-Interference-IPS]" + "***"*5)

In [None]:
"MF N DR JL"
mf_interference_dr_jl = MF_N_DR_JL(num_user, num_item, low = 0.05, up = 0.95, c = 3)
mf_interference_dr_jl.cuda()

ips_idxs = np.arange(len(y_test))
np.random.shuffle(ips_idxs)
y_ips = y_test[ips_idxs[:int(0.05 * len(ips_idxs))]]

mf_interference_dr_jl.propensity_model.fit(x_train, thr = 0.9, lamb = 1e-5, batch_size = 2048)

mf_interference_dr_jl.fit(x_train, y_train, y_ips, g_value = [0],
    lr=0.01,
    g = 300,
    h = 3000,
    G = 2,
    batch_size=2048,
    lamb1 = 5e-5,
    lamb2 = 5e-5,
    tol=1e-5,
    verbose=False)
test_pred = mf_interference_dr_jl.predict(x_test)
mse_mfdrjl = mse_func(y_test, test_pred)
auc_mfdrjl = roc_auc_score(y_test, test_pred)
ndcg_res = ndcg_func(mf_interference_dr_jl, x_test, y_test)
recall_res = recall_func(mf_interference_dr_jl, x_test, y_test)
precision_res = precision_func(mf_interference_dr_jl, x_test, y_test)

print("***"*5 + "[MF-Interference-DR-JL]" + "***"*5)
print("[MF-Interference-DR-JL] test mse:", mse_func(y_test, test_pred))
print("[MF-Interference-DR-JL] test auc:", auc_mfdrjl)
print("[MF-Interference-DR-JL] ndcg@5:{:.6f}, ndcg@10:{:.6f}".format(
        np.mean(ndcg_res["ndcg_5"]), np.mean(ndcg_res["ndcg_10"])))
print("[MF-Interference-DR-JL] recall@5:{:.6f}, recall@10:{:.6f}".format(
        np.mean(recall_res["recall_5"]), np.mean(recall_res["recall_10"])))
print("[MF-Interference-DR-JL] precision@5:{:.6f}, precision@10:{:.6f}".format(
        np.mean(precision_res["precision_5"]), np.mean(precision_res["precision_10"])))    
print('f1@5', np.mean(recall_res["recall_5"]) * np.mean(precision_res["precision_5"])/
     (np.mean(recall_res["recall_5"]) + np.mean(precision_res["precision_5"])))
user_wise_ctr = get_user_wise_ctr(x_test,y_test,test_pred)
gi,gu = gini_index(user_wise_ctr)
print("***"*5 + "[MF-Interference-DR-JL]" + "***"*5)

In [None]:
"MF N MRDR JL"
mf_interference_mrdr_jl = MF_N_MRDR_JL(num_user, num_item, low = 0.05, up = 0.95, c = 3)
mf_interference_mrdr_jl.cuda()

ips_idxs = np.arange(len(y_test))
np.random.shuffle(ips_idxs)
y_ips = y_test[ips_idxs[:int(0.05 * len(ips_idxs))]]

mf_interference_mrdr_jl.propensity_model.fit(x_train, thr = 1, lamb = 1e-5, batch_size = 2048)

mf_interference_mrdr_jl.fit(x_train, y_train, y_ips, g_value = [0],
    lr=0.01,
    g = 300,
    h = 3000,
    G = 2,
    batch_size=2048,
    lamb1 = 5e-5,
    lamb2 = 5e-5,
    tol=1e-5,
    verbose=False)
test_pred = mf_interference_mrdr_jl.predict(x_test)
mse_mfmrdrjl = mse_func(y_test, test_pred)
auc_mfmrdrjl = roc_auc_score(y_test, test_pred)
ndcg_res = ndcg_func(mf_interference_mrdr_jl, x_test, y_test)
recall_res = recall_func(mf_interference_mrdr_jl, x_test, y_test)
precision_res = precision_func(mf_interference_mrdr_jl, x_test, y_test)

print("***"*5 + "[MF-Interference-MRDR-JL]" + "***"*5)
print("[MF-Interference-MRDR-JL] test mse:", mse_func(y_test, test_pred))
print("[MF-Interference-MRDR-JL] test auc:", auc_mfmrdrjl)
print("[MF-Interference-MRDR-JL] ndcg@5:{:.6f}, ndcg@10:{:.6f}".format(
        np.mean(ndcg_res["ndcg_5"]), np.mean(ndcg_res["ndcg_10"])))
print("[MF-Interference-MRDR-JL] recall@5:{:.6f}, recall@10:{:.6f}".format(
        np.mean(recall_res["recall_5"]), np.mean(recall_res["recall_10"])))
print("[MF-Interference-MRDR-JL] precision@5:{:.6f}, precision@10:{:.6f}".format(
        np.mean(precision_res["precision_5"]), np.mean(precision_res["precision_10"])))    
print('f1@5', np.mean(recall_res["recall_5"]) * np.mean(precision_res["precision_5"])/
     (np.mean(recall_res["recall_5"]) + np.mean(precision_res["precision_5"])))
user_wise_ctr = get_user_wise_ctr(x_test,y_test,test_pred)
gi,gu = gini_index(user_wise_ctr)
print("***"*5 + "[MF-Interference-MRDR-JL]" + "***"*5)