In [29]:
import torch
import torch.nn as nn
import numpy as np
from sklearn.metrics import f1_score, roc_auc_score, accuracy_score
from sklearn.model_selection import train_test_split
import random
import copy
import pandas as pd
from utils import *
import pickle
from arg_parser import *

In [30]:
def boolean_string(s):
    if s not in {'False', 'True'}:
        raise ValueError('Not a valid boolean string')
    return s == 'True'

parser = argparse.ArgumentParser() 
parser.add_argument('--dataset', default='ciao')
parser.add_argument('--model_name', default="DESIGN")  

parser.add_argument('--gpu_id', default=0, type=int)
# training hyper_parameter
parser.add_argument('--batch_size', default=1024, type=int)
parser.add_argument('--learning_rate', default=0.001, type=float)
parser.add_argument('--num_epoch', default=200, type=int)
parser.add_argument('--hop', default=2, type=int) # 3
parser.add_argument('--hidden', default=64, type=int)
parser.add_argument('--dropout', default=0.5, type=float)
parser.add_argument('--neg', default=1, type=int) # 
# parser.add_argument('--split', default=0.8, type=float)
parser.add_argument('--std', default=0.1, type=float) 
parser.add_argument('--decay', default=1e-4, type=float)

# IDGL hyper_parameter 
parser.add_argument('--graph_learn_hidden_size', default=70, type=int)
parser.add_argument('--graph_learn_top_k_S', default=30, type=int) 
parser.add_argument('--graph_learn_epsilon', default=0, type=float)
parser.add_argument('--graph_skip_conn', default=0.8, type=float)
parser.add_argument('--graph_learn_num_pers', default=4, type=int)
parser.add_argument('--metric_type', default='weighted_cosine', type=str)

# ssl hyper_parameter
parser.add_argument('--ssl_temp', default=0.2, type=float)
parser.add_argument('--ssl_reg', default=1e-6, type=float) # 0.1/0.2
parser.add_argument('--ssl_ratio', default=0.1, type=float) 
parser.add_argument('--ssl_aug_type', default='ed', type=str) 

# recon hyper_parameter
parser.add_argument('--recon_reg', default=0.2, type=float)
parser.add_argument('--recon_drop', default=0.8, type=float)

# kl hyper_parameter
parser.add_argument('--kl_reg', default=1, type=float)

# test 
parser.add_argument('--mtd', default='UI', type=str) 
parser.add_argument('--is_shadow', type=boolean_string, default=False) 
parser.add_argument('--seed', default=42, type=int) 


args = parser.parse_known_args()[0]

pref = '../raw dataset/'
# train target rec model
if not args.is_shadow:
    if args.dataset == 'ciao':
        data_name = 'ciao20230314.pkl'
    elif args.dataset == 'flickr':
        data_name = 'flickr20241204.pkl'
    elif args.dataset == 'yelp':
        data_name = 'yelp_small.pkl'

args.data_dir = pref + args.dataset + '/' + data_name


In [35]:
import pickle

def calculate_jaccard_similarity(user_items, user1, user2, topk=30):
    set1 = set(user_items[user1][:topk])
    set2 = set(user_items[user2][:topk])    
    intersection = len(set1.intersection(set2))
    union = len(set1.union(set2))
    if union == 0:
        return 0  
    return intersection / union


def calculate_cosine_similarity(user_items, user1, user2, topk=30):
    set1 = set(user_items[user1][:topk])
    set2 = set(user_items[user2][:topk])    
    intersection = len(set1.intersection(set2))
    union = len(set1) * len(set2)
    if union == 0:
        return 0  
    return intersection / union


from torch.utils.data import Dataset, DataLoader

args.dataset = 'ciao'
# args.dataset = 'flickr'
config = vars(args)
test_file = './social_mia/DESIGN-ciao-final.pth/mia_test_shadow_0.1_ciao_DESIGN-ciao-final.pth-top30-pp.pkl.csv'
# test_file = './social_mia/DESIGN-flickr-final.pth/mia_test_shadow_0.1_flickr_DESIGN-flickr-final.pth-top30-pp.pkl.csv'
config['test_path'] = test_file

data_dir = './social_mia/DiffNet-ciao-final.pth/ciao_DiffNet-ciao-final.pth-top30-pp.pkl'
data_file = open(data_dir, 'rb')
ui_rec_dict = pickle.load(data_file)
test_raw_data = pd.read_csv(config['test_path'])
print(test_raw_data.head())

   user1  user2  y
0   3904   2158  1
1   2683   3377  1
2    203    282  1
3    151    875  1
4   7101   5234  0


In [36]:
from tqdm import tqdm
from sklearn.metrics import roc_auc_score, precision_score, recall_score, f1_score

test_yhat, test_y = [], []
print(len(test_raw_data))
for idx, row in enumerate(tqdm(test_raw_data.itertuples())):
    u1, u2, y = row.user1, row.user2, row.y
    yhat = calculate_jaccard_similarity(ui_rec_dict, u1, u2)
    # print(y, yhat)
    test_yhat.append(yhat)
    test_y.append(y)

record = roc_auc_score(test_y, test_yhat)

print('jaccard auc', record)

113606


113606it [00:00, 162258.30it/s]

jaccard auc 0.6172832783063888





In [37]:
from tqdm import tqdm
from sklearn.metrics import roc_auc_score, precision_score, recall_score, f1_score

test_yhat, test_y = [], []
print(len(test_raw_data))
for idx, row in enumerate(tqdm(test_raw_data.itertuples())):
    u1, u2, y = row.user1, row.user2, row.y
    yhat = calculate_cosine_similarity(ui_rec_dict, u1, u2)
    # print(y, yhat)
    test_yhat.append(yhat)
    test_y.append(y)

record = roc_auc_score(test_y, test_yhat)

print('cosine auc', record)

113606


113606it [00:00, 230662.56it/s]

cosine auc 0.6172832783063888





In [None]:
record = roc_auc_score(test_y, test_yhat)

print('auc', record)

thres = [0.5, 0.4, 0.3, 0.2, 0.1, 0.01, 0.001, 0.0001, 0.00001]
for t in thres:
    test_yhat_pred = [1 if x > t else 0 for x in test_yhat]
    recall = recall_score(test_y, test_yhat_pred)
    precision = precision_score(test_y, test_yhat_pred)
    f1 = f1_score(test_y, test_yhat_pred)
    print(f'recall: {recall}, precision: {precision}, f1: {f1}')

In [27]:
res = 0.5555
print('{:.2f}, {:.4f}'.format(res, res))

0.56, 0.5555
