In [2]:
# -*- coding: utf-8 -*-
import numpy as np
import torch
from sklearn.metrics import roc_auc_score
import random
import pandas as pd
from dataset import load_data
from Cali_MR_Model import MF_Cali_MR

from utils import ndcg_func, binarize, shuffle, recall_func, precision_func
mse_func = lambda x,y: np.mean((x-y)**2)
acc_func = lambda x,y: np.sum(x == y) / len(x)

dataset_name = "yahoo"

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
def set_seed(seed):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)

set_seed(2024)

In [4]:
x_train, y_train, x_test, y_test = load_data("yahoo")
x_train, y_train = shuffle(x_train, y_train)
num_user = x_train[:,0].max() + 1
num_item = x_train[:,1].max() + 1

print("# user: {}, # item: {}".format(num_user, num_item))
# binarize
y_train = binarize(y_train)
y_test = binarize(y_test)

===>Load from yahoo data set<===
[train] num data: 311704
[test]  num data: 54000
# user: 15401, # item: 1001


In [None]:
# 15400 个用户，所有1000个物品，评分个数311,704，训练集
# 前5400个用户，随机10个物品, 评分个数为54000，测试集

In [7]:
set_seed(2024)
all_data = pd.DataFrame(np.zeros((num_user, num_item))).stack().reset_index()
all_data = all_data.values[:, :2]
print(all_data.shape)
print(all_data)
unlabeled_x = np.array(list(set(map(tuple, all_data)) - set(map(tuple, x_train))), dtype=int)

mf_cali_mr = MF_Cali_MR(num_user, num_item)
mf_cali_mr.cuda()

mf_cali_mr._compute_IPS(x_train, lr=0.05, lamb=5e-3, gamma=1, batch_size_prop =1049088, tol=1e-5)

mf_cali_mr.fit(x_train, y_train, unlabeled_x,
    batch_size=8192,
    lr1=0.05,
    lamb1=1e-6,
    lr2=0.05,
    lamb2 =5e-6,
    lr3=0.05,
    lamb3=5e-6,
    prop_clip=0.01,
    gamma=5,
    G = 3,
    tol=1e-5,
    verbose = False)

test_pred = mf_cali_mr.predict(x_test)
mse_mfcali_mr = mse_func(y_test, test_pred)
auc_mfcali_mr = roc_auc_score(y_test, test_pred)
ndcg_res = ndcg_func(mf_cali_mr, x_test, y_test)
recall_res = recall_func(mf_cali_mr, x_test, y_test)
precision_res = precision_func(mf_cali_mr, x_test, y_test)


print("***"*5 + "[MF-Cali-MR]" + "***"*5)
print("[MF-Cali-MR] test mse:", mse_mfcali_mr)
print("[MF-Cali-MR] test auc:", auc_mfcali_mr)
print("[MF-Cali-MR] ndcg@5:{:.6f}, ndcg@10:{:.6f}".format(
        np.mean(ndcg_res["ndcg_5"]), np.mean(ndcg_res["ndcg_10"])))
print("[MF-Cali-MR] recall@5:{:.6f}, recall@10:{:.6f}".format(
        np.mean(recall_res["recall_5"]), np.mean(recall_res["recall_10"])))
print("[MF-Cali-MR] precision@5:{:.6f}, precision@10:{:.6f}".format(
        np.mean(precision_res["precision_5"]), np.mean(precision_res["precision_10"])))
print("[MF-Cali-MR] f1@5:{:.6f}, f1@10:{:.6f}".format(
        2 * (np.mean(precision_res["precision_5"]) * np.mean(recall_res["recall_5"])) / (np.mean(precision_res["precision_5"]) + np.mean(recall_res["recall_5"])),
        2 * (np.mean(precision_res["precision_10"]) * np.mean(recall_res["recall_10"])) / (np.mean(precision_res["precision_10"]) + np.mean(recall_res["recall_10"]))))

(15416401, 2)
[[0.00e+00 0.00e+00]
 [0.00e+00 1.00e+00]
 [0.00e+00 2.00e+00]
 ...
 [1.54e+04 9.98e+02]
 [1.54e+04 9.99e+02]
 [1.54e+04 1.00e+03]]
[PS] epoch:19, xent:729.8358807563782
[MF-MR] epoch:60, xent:3.4754952415823936
***************[MF-Cali-MR]***************
[MF-Cali-MR] test mse: 0.2686779836715469
[MF-Cali-MR] test auc: 0.7031546402545443
[MF-Cali-MR] ndcg@5:0.675884, ndcg@10:0.785900
[MF-Cali-MR] recall@5:0.446212, recall@10:0.726296
[MF-Cali-MR] precision@5:0.271889, precision@10:0.231833
[MF-Cali-MR] f1@5:0.337891, f1@10:0.351476
