In [1]:
# -*- coding: utf-8 -*-
import numpy as np
import torch
from sklearn.metrics import roc_auc_score
import random
import pandas as pd
from dataset import load_data
from Cali_MR_Model import MF_Cali_MR

from utils import ndcg_func, rating_mat_to_sample, binarize, recall_func, precision_func
mse_func = lambda x,y: np.mean((x-y)**2)
acc_func = lambda x,y: np.sum(x == y) / len(x)

dataset_name = "coat"

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
def set_seed(seed):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)

set_seed(2024)

In [3]:
train_mat, test_mat = load_data("coat")        
x_train, y_train = rating_mat_to_sample(train_mat)
x_test, y_test = rating_mat_to_sample(test_mat)
num_user = train_mat.shape[0]
num_item = train_mat.shape[1]

print("# user: {}, # item: {}".format(num_user, num_item))
# binarize
y_train = binarize(y_train)
y_test = binarize(y_test)

===>Load from coat data set<===
[train] rating ratio: 0.080000
[test]  rating ratio: 0.053333
# user: 290, # item: 300


In [8]:
# 290 users, 300 items
# 每个user根据自己喜好评价24个items，构成了290*24=6960个样本，为训练集
# 每个user随即评价16个items，构成了290*16=4640个样本，为测试集
x_train

array([[  0,  72],
       [  0, 136],
       [  0, 150],
       ...,
       [289, 144],
       [289, 282],
       [289, 294]], dtype=int64)

In [5]:
set_seed(2024)
all_data = pd.DataFrame(np.zeros((num_user, num_item))).stack().reset_index()
all_data = all_data.values[:, :2]
print(all_data.shape)
print(all_data)
unlabeled_x = np.array(list(set(map(tuple, all_data)) - set(map(tuple, x_train))), dtype=int)

mf_cali_mr = MF_Cali_MR(num_user, num_item)
mf_cali_mr.cuda()

mf_cali_mr._compute_IPS(x_train, lr=0.05, lamb=5e-3, gamma=2, batch_size_prop =2048, tol=1e-5)

mf_cali_mr.fit(x_train, y_train, unlabeled_x,
    batch_size=128,
    lr1=0.05,
    lamb1=1e-4,
    lr2=0.05,
    lamb2 =5e-5,
    lr3=0.05,
    lamb3=5e-5,
    gamma=8,
    G = 3,
    tol=1e-5,
    verbose = False)

test_pred = mf_cali_mr.predict(x_test)
mse_mfcali_mr = mse_func(y_test, test_pred)
auc_mfcali_mr = roc_auc_score(y_test, test_pred)
ndcg_res = ndcg_func(mf_cali_mr, x_test, y_test)
recall_res = recall_func(mf_cali_mr, x_test, y_test)
precision_res = precision_func(mf_cali_mr, x_test, y_test)


print("***"*5 + "[MF-Cali-MR]" + "***"*5)
print("[MF-Cali-MR] test mse:", mse_mfcali_mr)
print("[MF-Cali-MR] test auc:", auc_mfcali_mr)
print("[MF-Cali-MR] ndcg@5:{:.6f}, ndcg@10:{:.6f}".format(
        np.mean(ndcg_res["ndcg_5"]), np.mean(ndcg_res["ndcg_10"])))
print("[MF-Cali-MR] recall@5:{:.6f}, recall@10:{:.6f}".format(
        np.mean(recall_res["recall_5"]), np.mean(recall_res["recall_10"])))
print("[MF-Cali-MR] precision@5:{:.6f}, precision@10:{:.6f}".format(
        np.mean(precision_res["precision_5"]), np.mean(precision_res["precision_10"])))
print("[MF-Cali-MR] f1@5:{:.6f}, f1@10:{:.6f}".format(
        2 * (np.mean(precision_res["precision_5"]) * np.mean(recall_res["recall_5"])) / (np.mean(precision_res["precision_5"]) + np.mean(recall_res["recall_5"])),
        2 * (np.mean(precision_res["precision_10"]) * np.mean(recall_res["recall_10"])) / (np.mean(precision_res["precision_10"]) + np.mean(recall_res["recall_10"]))))

(87000, 2)
[[  0.   0.]
 [  0.   1.]
 [  0.   2.]
 ...
 [289. 297.]
 [289. 298.]
 [289. 299.]]
[PS] epoch:15, xent:86.39530432224274
[MF-MR] epoch:38, xent:4.206998221576214
***************[MF-Cali-MR]***************
[MF-Cali-MR] test mse: 0.2773858549875212
[MF-Cali-MR] test auc: 0.6642905087464108
[MF-Cali-MR] ndcg@5:0.595649, ndcg@10:0.664967
[MF-Cali-MR] recall@5:0.427175, recall@10:0.700108
[MF-Cali-MR] precision@5:0.494483, precision@10:0.438276
[MF-Cali-MR] f1@5:0.458371, f1@10:0.539081


In [6]:
set_seed(2024)
all_data = pd.DataFrame(np.zeros((num_user, num_item))).stack().reset_index()
all_data = all_data.values[:, :2]
print(all_data.shape)
print(all_data)
unlabeled_x = np.array(list(set(map(tuple, all_data)) - set(map(tuple, x_train))), dtype=int)

mf_cali_mr = MF_Cali_MR(num_user, num_item)
mf_cali_mr.cuda()

mf_cali_mr._compute_IPS(x_train, lr=0.05, lamb=5e-3, gamma=2, batch_size_prop =2048, tol=1e-5)

mf_cali_mr.fit(x_train, y_train, unlabeled_x,
    batch_size=128,
    lr1=0.05,
    lamb1=1e-4,
    lr2=0.05,
    lamb2 =5e-5,
    lr3=0.05,
    lamb3=5e-5,
    gamma=8,
    G = 3,
    tol=1e-5,
    verbose = False)

test_pred = mf_cali_mr.predict(x_test)
mse_mfcali_mr = mse_func(y_test, test_pred)
auc_mfcali_mr = roc_auc_score(y_test, test_pred)
ndcg_res = ndcg_func(mf_cali_mr, x_test, y_test)
recall_res = recall_func(mf_cali_mr, x_test, y_test)
precision_res = precision_func(mf_cali_mr, x_test, y_test)


print("***"*5 + "[MF-Cali-MR]" + "***"*5)
print("[MF-Cali-MR] test mse:", mse_mfcali_mr)
print("[MF-Cali-MR] test auc:", auc_mfcali_mr)
print("[MF-Cali-MR] ndcg@5:{:.6f}, ndcg@10:{:.6f}".format(
        np.mean(ndcg_res["ndcg_5"]), np.mean(ndcg_res["ndcg_10"])))
print("[MF-Cali-MR] recall@5:{:.6f}, recall@10:{:.6f}".format(
        np.mean(recall_res["recall_5"]), np.mean(recall_res["recall_10"])))
print("[MF-Cali-MR] precision@5:{:.6f}, precision@10:{:.6f}".format(
        np.mean(precision_res["precision_5"]), np.mean(precision_res["precision_10"])))
print("[MF-Cali-MR] f1@5:{:.6f}, f1@10:{:.6f}".format(
        2 * (np.mean(precision_res["precision_5"]) * np.mean(recall_res["recall_5"])) / (np.mean(precision_res["precision_5"]) + np.mean(recall_res["recall_5"])),
        2 * (np.mean(precision_res["precision_10"]) * np.mean(recall_res["recall_10"])) / (np.mean(precision_res["precision_10"]) + np.mean(recall_res["recall_10"]))))

(87000, 2)
[[  0.   0.]
 [  0.   1.]
 [  0.   2.]
 ...
 [289. 297.]
 [289. 298.]
 [289. 299.]]
[PS] epoch:15, xent:86.39530432224274
[MF-MR] epoch:28, xent:7.046097449958324
***************[MF-Cali-MR]***************
[MF-Cali-MR] test mse: 0.21531039026505272
[MF-Cali-MR] test auc: 0.7383472372693536
[MF-Cali-MR] ndcg@5:0.650074, ndcg@10:0.710519
[MF-Cali-MR] recall@5:0.452770, recall@10:0.720345
[MF-Cali-MR] precision@5:0.528276, precision@10:0.454138
[MF-Cali-MR] f1@5:0.487618, f1@10:0.557072
