In [1]:
!nvidia-smi

Tue Nov 26 17:56:57 2024       
+---------------------------------------------------------------------------------------+
| NVIDIA-SMI 530.30.02              Driver Version: 530.30.02    CUDA Version: 12.1     |
|-----------------------------------------+----------------------+----------------------+
| GPU  Name                  Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf            Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                                         |                      |               MIG M. |
|   0  NVIDIA A40                      On | 00000000:35:00.0 Off |                    0 |
|  0%   47C    P0               78W / 300W|   1728MiB / 46068MiB |      0%      Default |
|                                         |                      |                  N/A |
+-----------------------------------------+----------------------+----------------------+
|   1  NVIDIA A40                      On | 00000000:36:00.0 Off |  

In [2]:
# -*- coding: utf-8 -*-
import numpy as np
import torch
from sklearn.metrics import roc_auc_score
import random
import pandas as pd
from Cali_MR_Model import MF_Cali_MR

from utils import ndcg_func, binarize, recall_func, precision_func
mse_func = lambda x,y: np.mean((x-y)**2)
acc_func = lambda x,y: np.sum(x == y) / len(x)

dataset_name = "kuai"

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
def set_seed(seed):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)

set_seed(2024)

In [4]:
rdf_train = np.array(pd.read_table("./data/kuai/user.txt", header = None, sep = ','))
rdf_test = np.array(pd.read_table("./data/kuai/random.txt", header = None, sep = ','))
rdf_train_new = np.c_[rdf_train, np.ones(rdf_train.shape[0])]
rdf_test_new = np.c_[rdf_test, np.zeros(rdf_test.shape[0])]
rdf = np.r_[rdf_train_new, rdf_test_new]

rdf = rdf[np.argsort(rdf[:, 0])]
c = rdf.copy()
for i in range(rdf.shape[0]):
    if i == 0:
        c[:, 0][i] = i
        temp = rdf[:, 0][0]
    else:
        if c[:, 0][i] == temp:
            c[:, 0][i] = c[:, 0][i-1]
        else:
            c[:, 0][i] = c[:, 0][i-1] + 1
        temp = rdf[:, 0][i]

c = c[np.argsort(c[:, 1])]
d = c.copy()
for i in range(rdf.shape[0]):
    if i == 0:
        d[:, 1][i] = i
        temp = c[:, 1][0]
    else:
        if d[:, 1][i] == temp:
            d[:, 1][i] = d[:, 1][i-1]
        else:
            d[:, 1][i] = d[:, 1][i-1] + 1
        temp = c[:, 1][i]

y_train = d[:, 2][d[:, 3] == 1]
y_test = d[:, 2][d[:, 3] == 0]
x_train = d[:, :2][d[:, 3] == 1]
x_test = d[:, :2][d[:, 3] == 0]

num_user = x_train[:,0].max() + 1
num_item = x_train[:,1].max() + 1


y_train = binarize(y_train, 2)
y_test = binarize(y_test, 2)

num_user = int(num_user)
num_item = int(num_item)
print("# user: {}, # item: {}".format(num_user, num_item))

# user: 1411, # item: 3327


In [5]:
torch.cuda.set_device(1)

In [6]:
set_seed(2024)
all_data = pd.DataFrame(np.zeros((num_user, num_item))).stack().reset_index()
all_data = all_data.values[:, :2]
print(all_data.shape)
print(all_data)
unlabeled_x = np.array(list(set(map(tuple, all_data)) - set(map(tuple, x_train))), dtype=int)

mf_cali_mr = MF_Cali_MR(num_user, num_item)
mf_cali_mr.cuda()

mf_cali_mr._compute_IPS(x_train, lr=0.05, lamb=1e-5, gamma=2, batch_size_prop = 65536, tol=1e-5)

mf_cali_mr.fit(x_train, y_train, unlabeled_x,
    batch_size=8196,
    stab=1e-5,
    lr1=0.05,
    lamb1=1e-6,
    lr2=0.05,
    lamb2 =5e-6,
    lr3=0.05,
    lamb3=1e-6,
    gamma=5,
    G = 3,
    tol=1e-5,
    verbose = False)

test_pred = mf_cali_mr.predict(x_test)
mse_mfcali_mr = mse_func(y_test, test_pred)
auc_mfcali_mr = roc_auc_score(y_test, test_pred)
ndcg_res = ndcg_func(mf_cali_mr, x_test, y_test, top_k_list = [20, 50])
recall_res = recall_func(mf_cali_mr, x_test, y_test, top_k_list = [20, 50])
precision_res = precision_func(mf_cali_mr, x_test, y_test, top_k_list = [20, 50])


print("***"*5 + "[MF-Cali-MR]" + "***"*5)
print("[MF-Cali-MR] test mse:", mse_mfcali_mr)
print("[MF-Cali-MR] test auc:", auc_mfcali_mr)
print("[MF-Cali-MR] ndcg@20:{:.6f}, ndcg@50:{:.6f}".format(
        np.mean(ndcg_res["ndcg_20"]), np.mean(ndcg_res["ndcg_50"])))
print("[MF-Cali-MR] recall@20:{:.6f}, recall@50:{:.6f}".format(
        np.mean(recall_res["recall_20"]), np.mean(recall_res["recall_50"])))
print("[MF-Cali-MR] precision@20:{:.6f}, precision@50:{:.6f}".format(
        np.mean(precision_res["precision_20"]), np.mean(precision_res["precision_50"])))
print("[MF-Cali-MR] f1@20:{:.6f}, f1@50:{:.6f}".format(
        2 * (np.mean(precision_res["precision_20"]) * np.mean(recall_res["recall_20"])) / (np.mean(precision_res["precision_20"]) + np.mean(recall_res["recall_20"])),
        2 * (np.mean(precision_res["precision_50"]) * np.mean(recall_res["recall_50"])) / (np.mean(precision_res["precision_50"]) + np.mean(recall_res["recall_50"]))))


(4694397, 2)
[[0.000e+00 0.000e+00]
 [0.000e+00 1.000e+00]
 [0.000e+00 2.000e+00]
 ...
 [1.410e+03 3.324e+03]
 [1.410e+03 3.325e+03]
 [1.410e+03 3.326e+03]]
[PS] epoch:14, xent:507.53363859653473
[MF-MR] epoch:33, xent:2.2896972745656967
***************[MF-Cali-MR]***************
[MF-Cali-MR] test mse: 0.09357830071352316
[MF-Cali-MR] test auc: 0.7978901154125856
[MF-Cali-MR] ndcg@20:0.521847, ndcg@50:0.588744
[MF-Cali-MR] recall@20:0.452208, recall@50:0.655270
[MF-Cali-MR] precision@20:0.095571, precision@50:0.061828
[MF-Cali-MR] f1@20:0.157793, f1@50:0.112995
