In [1]:
#Pytorch
import torch
import torch.nn as nn
import torch.autograd as autograd
from torch.autograd import Variable
import torch.optim as optim
import torch.nn.functional as F
#Python Libs
import numpy as np
from time import time
import datetime
#Implementations
from model.agree_re import VARGR_NCF, NCF
from config_CA import Config
from dataset_Meetup import GDataset
from batch_test import *

In [2]:
WU_point = 20
def training(model, train_loader, epoch_id, config):
    # user trainning
    t1 = time()
    model.train()
    learning_rates = config.lr
    lr = learning_rates[0] #if epoch_id<WU_point else 0.01
    beta = 0.0 if epoch_id<WU_point else 1.0
    # optimizer
    optimizer = optim.Adam(model.parameters(), lr=lr)

    losses = []

    *_, last = train_loader

    for batch_id, (g, pi_ni) in enumerate(train_loader):
        # Data Load
        group_input = g
        pos_item_input = pi_ni[:, 0]
        neg_item_input = pi_ni[:, 1]

        pos_prediction, dkl = model(group_input, pos_item_input, is_training=True)
        neg_prediction, _ = model(group_input, neg_item_input, is_training=True)

        # Zero_grad
        model.zero_grad()
        # Loss
        eps=1e-7
        loss = torch.mean(-torch.log(pos_prediction+eps)-torch.log(1.0-neg_prediction+eps))+beta*dkl
        
        # record loss history
        #print("batch_id: " + str(batch_id) + " loss: " + str(loss.item()))
        if not torch.isinf(loss.data) and not torch.isnan(loss.data):
            losses.append(float(loss.item()))
        # Backward
        loss.backward()
        optimizer.step()

    print('Iteration %d,\tloss: [%.4f], time: [%.1fs]' % (epoch_id, np.mean(np.array(losses)), time() - t1))


In [3]:
def evaluation(model, groups_to_test, Ks, trainRatings, num_items):
    model.eval()
    t2 = time()
    ret = test(model, groups_to_test, Ks, trainRatings, num_items) #See batch_test

    print('\t Evaluation done [%.1f s]' % (time() - t2))
    for i, k in enumerate(Ks):
        print('\t\t @%d: HR = %.4f, NDCG = %.4f, Rec = %.4f' % (k, ret['hit_ratio'][i], ret['ndcg'][i], ret['recall'][i]))
    return ret

In [4]:
config = Config()

meetup.ca 80 256


In [5]:
dataset = GDataset(config.user_dataset, config.group_dataset, config.user_in_group_path, 1)
num_groups, num_users, num_items = dataset.num_groups, dataset.num_users, dataset.num_items
print("num_groups: "+str(num_groups)+" num_users: "+str(num_users)+" num_items: "+str(num_items))
gu_dict = dataset.gu_dict

num_groups: 607 num_users: 56878 num_items: 1490


In [6]:
test_warm = dataset.load_rating_file_as_dict(config.group_dataset + ".test.rating_warm")
test_cold = dataset.load_rating_file_as_dict(config.group_dataset + ".test.rating_cold")
#ret = evaluation(agree, test_warm, config.topK[:2], dataset.group_trainRatings, dataset.num_items)
#ret = evaluation(agree, test_cold, config.topK[:2], dataset.group_trainRatings, dataset.num_items)

In [7]:
agree = VARGR_NCF(num_users, num_items, num_groups, config.embedding_size, gu_dict, config.drop_ratio).cuda()
best_checkpoint = -1.0
best_weights_path = None
for num_negatives in config.num_negatives:
    dataset.num_negatives = num_negatives
    print("AGREE: embedding size %d, run Iteration: %d, #neg: %d" %(config.embedding_size, config.epoch, num_negatives))
    # train the model
    now = datetime.datetime.now().strftime("%Y-%m-%d")

    for epoch in range(config.epoch): 
        training(agree, dataset.get_group_dataloader(config.batch_size), epoch, config)
        

        # Evaluation
        if epoch > WU_point:

            #ret = evaluation(agree, dataset.group_testRatings, config.topK[:2], dataset.group_trainRatings, dataset.num_items)
            ret = evaluation(agree, test_warm, config.topK[:2], dataset.group_trainRatings, dataset.num_items)
            cur_checkpoint = ret['hit_ratio'][1]
            ret = evaluation(agree, test_cold, config.topK[:2], dataset.group_trainRatings, dataset.num_items)
            current_weights_path = 'weights/VarGrNCF_'+str(config.dataset)+"_"+str(config.embedding_size)+"_"+str(config.lr[0])+'_'+str(num_negatives)+'_'+str(epoch)
            torch.save(agree.state_dict(), current_weights_path)
            if best_checkpoint <= cur_checkpoint:
                best_weights_path = current_weights_path


AGREE: embedding size 32, run Iteration: 80, #neg: 4
Iteration 0,	loss: [1.1518], time: [9.4s]
Iteration 1,	loss: [0.8803], time: [8.3s]
Iteration 2,	loss: [0.6174], time: [8.3s]
Iteration 3,	loss: [0.3784], time: [8.3s]
Iteration 4,	loss: [0.2545], time: [8.2s]
Iteration 5,	loss: [0.1856], time: [8.3s]
Iteration 6,	loss: [0.1390], time: [8.7s]
Iteration 7,	loss: [0.1099], time: [8.3s]
Iteration 8,	loss: [0.0888], time: [8.2s]
Iteration 9,	loss: [0.0738], time: [8.3s]
Iteration 10,	loss: [0.0647], time: [8.5s]
Iteration 11,	loss: [0.0631], time: [8.3s]
Iteration 12,	loss: [0.0507], time: [8.2s]
Iteration 13,	loss: [0.0386], time: [8.1s]
Iteration 14,	loss: [0.0380], time: [8.1s]
Iteration 15,	loss: [0.0416], time: [8.2s]
Iteration 16,	loss: [0.0407], time: [8.2s]
Iteration 17,	loss: [0.0381], time: [8.1s]
Iteration 18,	loss: [0.0394], time: [8.2s]
Iteration 19,	loss: [0.0410], time: [8.1s]
Iteration 20,	loss: [73.1183], time: [8.1s]
Iteration 21,	loss: [40.4427], time: [8.1s]
	 Evaluat

KeyboardInterrupt: 

In [None]:
agree = VARGR_NCF(num_users, num_items, num_groups, config.embedding_size, gu_dict, config.drop_ratio).cuda()
agree.load_state_dict(torch.load(best_weights_path))



In [None]:

print('total')
ret = evaluation(agree, dataset.group_testRatings, config.topK[:2], dataset.group_trainRatings, dataset.num_items)
print('cold')
ret = evaluation(agree, test_cold, config.topK[:2], dataset.group_trainRatings, dataset.num_items)
print('warm')
ret = evaluation(agree, test_warm, config.topK[:2], dataset.group_trainRatings, dataset.num_items)   

In [None]:
#For ablation
agree = VARGR_NCF(num_users, num_items, num_groups, config.embedding_size, gu_dict, config.drop_ratio).cuda()
agree.load_state_dict(torch.load('weights/VarGrNCF_meetup.ca_32_0.01_4_57'))
for _ in range(10):
    print('warm')
    ret = evaluation(agree, test_warm, config.topK[:2], dataset.group_trainRatings, dataset.num_items)  

In [None]:
agree.load_state_dict(torch.load('weights/VarGrNCF_meetup.ca_32_0.01_4_57'))
print('total')
ret = evaluation(agree, dataset.group_testRatings, config.topK[:2], dataset.group_trainRatings, dataset.num_items)
print('cold')
ret = evaluation(agree, test_cold, config.topK[:2], dataset.group_trainRatings, dataset.num_items)
print('warm')
ret = evaluation(agree, test_warm, config.topK[:2], dataset.group_trainRatings, dataset.num_items)  

In [None]:
for _ in range(3):
    print('total')
    ret = evaluation(agree, dataset.group_testRatings, config.topK[:2], dataset.group_trainRatings, dataset.num_items)
    print('cold')
    ret = evaluation(agree, test_cold, config.topK[:2], dataset.group_trainRatings, dataset.num_items)
    print('warm')
    ret = evaluation(agree, test_warm, config.topK[:2], dataset.group_trainRatings, dataset.num_items)   

In [None]:
def utest_one_group(x, model, Ks, group_trainRatings, group_testRatings, num_items,thresholds, num_repeat, tor):
    #rating = x[0]
    g = x #group 1개 들어옴

    try:
        training_items = group_trainRatings[g]
    except Exception:
        training_items = []

    group_pos_test = group_testRatings[g]

    all_items = set(range(num_items))

    test_items = list(all_items-set(training_items))
    one_group_batch = np.full(len(test_items),g)
    group_var = torch.LongTensor(one_group_batch)
    item_var = torch.LongTensor(test_items)

    rating_mean = 0
    rating_variance = 0
    for _ in range(num_repeat):
        rating = model(group_var, item_var).detach().cpu()
        rating_mean += rating
        rating_variance += torch.square(rating)
    rating_mean = rating_mean / num_repeat
    rating_variance = rating_variance/num_repeat - torch.square(rating_mean)

    
    EPS = 1e-8
    entropy = - rating_mean * torch.log(rating_mean+EPS)- (1.0-rating_mean)*torch.log(1.0-rating_mean+EPS)
    #entropy = 4*rating_mean*(1.0-rating_mean)
    #entropy = (rating_variance-rating_variance.min())/((rating_variance.max()-rating_variance.min()+1e-8))
    
    
    #ipdb.set_trace()
    
    results = []
    for threshold in thresholds:
        indices = torch.where(entropy <=threshold)[0]

        utest_items = [test_items[i] for i in indices.tolist()]
        urating = torch.index_select(rating_mean, 0, indices)
        
        pos_urating = torch.where(urating >=tor)[0].tolist()
        
        urating = urating.cpu().numpy()
        pt = len(set(pos_urating).intersection(group_pos_test))
        nt = len(urating) - len(pos_urating) - (len(group_pos_test)-pt)
        #print()
        acc = (pt+nt)/len(urating)

        r, auc = ranklist_by_sorted(group_pos_test, utest_items, urating, Ks)
        ret = get_performance(group_pos_test, r, auc, Ks)
        ret['acc'] = acc
        results.append(ret)
    return results

def utest(model, groups_to_test, Ks, group_trainRatings, num_items, batch_test_flag = False, thresholds=[1.0,], num_repeat=1, tor=0.9):
    result = {}
    for threshold in thresholds:
        result[threshold] = {'precision': np.zeros(len(Ks)), 'recall': np.zeros(len(Ks)), 'ndcg': np.zeros(len(Ks)),
                              'hit_ratio': np.zeros(len(Ks)), 'auc': 0., 'acc': 0.}


    test_groups = groups_to_test
    n_test_groups = len(test_groups)
    #n_group_batchs = n_test_groups // g_batch_size + 1
    #print(n_group_batchs)
    #count = 0
    for g_id in test_groups:
        re = utest_one_group(g_id, model, Ks, group_trainRatings, test_groups, num_items,thresholds=thresholds, num_repeat=num_repeat, tor=tor)
        #for re in batch_result:
        for i, threshold in enumerate(thresholds):
            result[threshold]['precision'] += re[i]['precision']/n_test_groups
            result[threshold]['recall'] += re[i]['recall']/n_test_groups
            result[threshold]['ndcg'] += re[i]['ndcg']/n_test_groups
            result[threshold]['hit_ratio'] += re[i]['hit_ratio']/n_test_groups
            result[threshold]['auc'] += re[i]['auc']/n_test_groups
            result[threshold]['acc'] += re[i]['acc']/n_test_groups
        #print(re['hit_ratio'])


    return result

def uevaluation(model, groups_to_test, Ks, trainRatings, num_items, thresholds=[1.0,], num_repeat=1, tor = 0.9):
    model.eval()
    t2 = time()

    ret = utest(model, groups_to_test, Ks, trainRatings, num_items,thresholds=thresholds, num_repeat=num_repeat, tor=tor) #See batch_test for test()

    print('\t Evaluation done [%.1f s]' % (time() - t2))
    for threshold in thresholds:
        print('Threshold:', threshold)
        for i, k in enumerate(Ks):
            print('\t\t @%d: HR = %.4f, NDCG = %.4f, Rec = %.4f, Precision = %.4f, AUC = %.4f' % (k, ret[threshold]['hit_ratio'][i], ret[threshold]['ndcg'][i], ret[threshold]['recall'][i], ret[threshold]['precision'][i], ret[threshold]['acc']))
    return ret

In [None]:
agree = VARGR_NCF(num_users, num_items, num_groups, config.embedding_size, gu_dict, config.drop_ratio).cuda()
agree.load_state_dict(torch.load('weights/VarGrNCF_meetup.ca_32_0.01_4_57'))
for tor in [0.5,]:
    print(tor)
    tttt = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]
    #print('total')
    #ret_total = uevaluation(agree, dataset.group_testRatings, config.topK[:2], dataset.group_trainRatings, dataset.num_items, thresholds=tttt, num_repeat=5, tor=tor)
    print('cold')
    ret_cold = uevaluation(agree, test_cold, config.topK[:2], dataset.group_trainRatings, dataset.num_items, thresholds=tttt, num_repeat=5, tor=tor)
    print('warm')
    ret_warm = uevaluation(agree, test_warm, config.topK[:2], dataset.group_trainRatings, dataset.num_items, thresholds=tttt, num_repeat=5, tor=tor)  

In [None]:
test_g = torch.LongTensor(list(test_cold.keys()))
print(test_g.shape)
test_v = torch.zeros(test_g.shape).long()
_ = agree(test_g, test_v)
agree.eval()
mu, sigma = agree.get_z_dist()
mu = torch.norm(mu, dim=1)
sigma = torch.norm(sigma, dim=1)
print(torch.mean(mu), torch.mean(sigma))

test_g = torch.LongTensor(list(test_warm.keys()))
print(test_g.shape)
test_v = torch.zeros(test_g.shape).long()
_ = agree(test_g, test_v)
agree.eval()
mu, sigma = agree.get_z_dist()
mu = torch.norm(mu, dim=1)
sigma = torch.norm(sigma, dim=1)
print(torch.mean(mu), torch.mean(sigma))

In [None]:
dataset = GDataset(config.user_dataset, config.group_dataset, config.user_in_group_path, 1)
test_cold = dataset.load_rating_file_as_dict(config.group_dataset + ".test.rating_cold")
num_groups, num_users, num_items = dataset.num_groups, dataset.num_users, dataset.num_items
agree = VARGR_NCF(num_users, num_items, num_groups, config.embedding_size, gu_dict, config.drop_ratio).cuda()
agree.load_state_dict(torch.load('weights/VarGrNCF_meetup.ca_32_0.01_4_57'))

test_g_cold = torch.LongTensor(list(test_cold.keys()))
test_g_warm = torch.LongTensor(list(dataset.group_trainRatings.keys()))
test_v = torch.zeros(test_g_cold.shape).long()
agree.eval()
_ = agree(test_g_cold, test_v)
mu, sigma = agree.get_z_dist()

q_std = np.sqrt(2/32)
def KL(mu, sigma, mup, sigmap):
    t = 2*torch.log(sigmap) - 2*torch.log(sigma) + (sigma**2)* (1/sigmap**2) + (1/sigmap**2)*((mu-mup)**2)-1
    return t*0.5

kl_loss_cold = []
for i, g in enumerate(test_g_cold.detach().numpy()):
    min_kl = 10000000.0
    for gg in dataset.group_trainRatings:
        sg = agree.groupembeds(torch.LongTensor([gg]))
        kl = torch.sum(KL(sg.squeeze(0), (q_std*torch.ones(sg.shape)).squeeze(0).cuda(), mu[i], sigma[i])).item()
        if kl <min_kl:
            min_kl = kl

    kl_loss_cold.append(min_kl)
print(len(kl_loss_cold))
print(np.mean(kl_loss_cold), np.std(kl_loss_cold)/np.sqrt(len(kl_loss_cold)))

test_v = torch.zeros(test_g_warm.shape).long()
agree.eval()
_ = agree(test_g_warm, test_v)
mu, sigma = agree.get_z_dist()

kl_loss_warm = []
for i, g in enumerate(test_g_warm.detach().numpy()):
    min_kl = 10000000.0
    sg = agree.groupembeds(torch.LongTensor([g]))
    kl = torch.sum(KL(sg.squeeze(0), (q_std*torch.ones(sg.shape)).squeeze(0).cuda(), mu[i], sigma[i])).item()
    if kl <min_kl:
        min_kl = kl

    kl_loss_warm.append(min_kl)
print(len(kl_loss_warm))
print(np.mean(kl_loss_warm), np.std(kl_loss_warm)/np.sqrt(len(kl_loss_warm)))

In [None]:
import matplotlib.pyplot as plt
import matplotlib
%matplotlib inline

font = {       'size'   : 11}
matplotlib.rc('font', **font)
fig, ax = plt.subplots(figsize=(2.5, 4))

# plot violin plot
'''
axs[0].violinplot(kl_loss_warm,
                  showmeans=False,
                  showmedians=True)
axs[0].set_title('Violin plot')
axs[0].set_yscale('log')
'''
# plot box plot
bp = ax.boxplot([kl_loss_warm, kl_loss_cold], patch_artist=True, labels=['Warm-start', 'Cold-start'], widths=(0.45,0.45))
ax.set_title('VarGr_NCF')
ax.set_yscale('log')
for patch, color in zip(bp['boxes'], ['pink', 'lightblue']):
    patch.set_facecolor(color)

ax.yaxis.grid(True)
#ax.set_xticks(['Warm-start', 'Cold-start'])
ax.set_ylabel('KL loss', fontsize=14)
ax.set_ylim([0.001, 50])

'''
# add x-tick labels
plt.setp(axs, xticks=[y + 1 for y in range(len(all_data))],
         xticklabels=['x1', 'x2', 'x3', 'x4'])
'''
#plt.savefig('figures/KL_VarNCF_MeetupCA.pdf', bbox_inches="tight")
plt.show()

In [None]:
dataset = GDataset(config.user_dataset, config.group_dataset, config.user_in_group_path, 1)
num_groups, num_users, num_items = dataset.num_groups, dataset.num_users, dataset.num_items
agree = VARGR_NCF(num_users, num_items, num_groups, config.embedding_size, gu_dict, config.drop_ratio).cuda()
agree.load_state_dict(torch.load('weights/VarGrNCF_meetup.ca_32_0.01_4_57'))

new_group_list = []
for g in dataset.group_testRatings.keys():
    num_users = len(dataset.gu_dict[g])
    if int(num_users*0.1) > 1:
        for t in range(int(num_users*0.1), num_users, int(num_users*0.1)):
            new_users = dataset.gu_dict[g][:t]
            new_group_list.append(len(agree.group_member_dict))
            agree.group_member_dict[len(agree.group_member_dict)] = new_users
            

test_g = torch.LongTensor(list(dataset.group_testRatings.keys())+new_group_list)
test_v = torch.zeros(test_g.shape).long()
_ = agree(test_g, test_v)
agree.eval()
mu, sigma = agree.get_z_dist()
jaccard = {}
mse = {}
mas_var = {}
most_similar_group = {}
for i, g in enumerate(test_g.detach().numpy()):
    if g in dataset.group_trainRatings:
        sg = agree.groupembeds(torch.LongTensor([g]))
        mse[g] = torch.sum(KL(sg.squeeze(0), (q_std*torch.ones(sg.shape)).squeeze(0).cuda(), mu[i], sigma[i]))
        #mas_var[g] = torch.mean(sigma[i]**2)
        jaccard[g] = 1.0
    else:
        max_g = None
        max_jac = 0.0
        for gg in dataset.group_trainRatings:
            g_users = agree.group_member_dict[g]
            gg_users = agree.group_member_dict[gg]
            jac = len(set(g_users)&set(gg_users))/len(set(g_users)|set(gg_users))
            if jac >= max_jac:
                max_jac = jac
                max_g = gg
        sg = agree.groupembeds(torch.LongTensor([max_g]))
        mse[g] = torch.sum(KL(sg.squeeze(0), (q_std*torch.ones(sg.shape)).squeeze(0).cuda(), mu[i], sigma[i]))
        #mas_var[g] = torch.mean(sigma[i]**2)
        jaccard[g] = max_jac

In [None]:
q_std = np.sqrt(2/32)
def KL(mu, sigma, mup, sigmap):
    t = 2*torch.log(sigmap) - 2*torch.log(sigma) + (sigma**2)* (1/sigmap**2) + (1/sigmap**2)*((mu-mup)**2)-1
    return t*0.5
for i, g in enumerate(test_g.detach().numpy()):
    if g in dataset.group_trainRatings:
        sg = agree.groupembeds(torch.LongTensor([g]))
        mse[g] = torch.sum(KL(sg.squeeze(0), (q_std*torch.ones(sg.shape)).squeeze(0).cuda(), mu[i], sigma[i]))
        #mas_var[g] = torch.mean(sigma[i]**2)
        jaccard[g] = 1.0
    else:
        max_g = None
        max_jac = 0.0
        for gg in dataset.group_trainRatings:
            g_users = agree.group_member_dict[g]
            gg_users = agree.group_member_dict[gg]
            jac = len(set(g_users)&set(gg_users))/len(set(g_users)|set(gg_users))
            if jac >= max_jac:
                max_jac = jac
                max_g = gg
        sg = agree.groupembeds(torch.LongTensor([max_g]))
        mse[g] = torch.sum(KL(sg.squeeze(0), (q_std*torch.ones(sg.shape)).squeeze(0).cuda(), mu[i], sigma[i]))
        #mas_var[g] = torch.mean(sigma[i]**2)
        jaccard[g] = max_jac

In [None]:
avgs_mse = {}
avgs_var = {}
idices = set()
for g in jaccard:
    idx = round(jaccard[g]*10)
    idices.add(idx)
    if idx not in avgs_mse:
        avgs_mse[idx] = []
        avgs_var[idx] = []
    avgs_mse[idx].append(mse[g].item())
    #avgs_var[idx].append(mas_var[g].item())
final_mse = [0]*len(avgs_mse)
final_var = [0]*len(avgs_mse)
print(idices)
for i in range(len(avgs_mse)):
    final_mse[i] = np.mean(avgs_mse[i]) #sum(avgs_mse[i])/len(avgs_mse[i])
    final_var[i] = np.std(avgs_mse[i])/np.sqrt(len(avgs_mse[i])) #sum(avgs_var[i])/len(avgs_var[i])


In [None]:
print(final_mse, final_var)

In [None]:
#TSNE
from sklearn.manifold import TSNE
dataset = GDataset(config.user_dataset, config.group_dataset, config.user_in_group_path, 1)
test_cold = dataset.load_rating_file_as_dict(config.group_dataset + ".test.rating_cold")
num_groups, num_users, num_items = dataset.num_groups, dataset.num_users, dataset.num_items
agree = VARGR_NCF(num_users, num_items, num_groups, config.embedding_size, gu_dict, config.drop_ratio).cuda()
agree.load_state_dict(torch.load('weights/VarGrNCF_meetup.ca_32_0.01_4_57'))

cold_size = len(test_cold)
test_g = torch.LongTensor(list(test_cold.keys())+list(dataset.group_trainRatings.keys()))
test_v = torch.zeros(test_g.shape).long()
agree.eval()
_ = agree(test_g, test_v)
mu, sigma = agree.get_z_dist()
mu = mu.detach().cpu().numpy()
sigma = sigma.detach().cpu().numpy()
print(mu.shape, sigma.shape)

In [None]:
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import matplotlib.cbook as cbook
print(mu_embedded.shape, sigma_embedded.shape)
print(cold_size)
print(sigma.shape)
sigma_embedded = np.linalg.norm(sigma, axis=1) #TSNE(n_components=1).fit_transform(sigma)
s_max = max(sigma_embedded)
s_min = min(sigma_embedded) 
sigma_embedded = (sigma_embedded - s_min) / (s_max-s_min)
print(sigma_embedded.shape)
mu_embedded = TSNE(n_components=1).fit_transform(mu)
fig, ax = plt.subplots(figsize=(3, 3))
#print(sigma_embedded)
ax.scatter(mu[:cold_size, 0], sigma_embedded[:cold_size], s=10, label='new groups')
ax.scatter(mu[cold_size:, 0], sigma_embedded[cold_size:], s=10, label='existing groups')


#ax.scatter(mu_embedded[cold_size:, 0], mu_embedded[cold_size:, 1], s=sigma_embedded[cold_size:]*500, alpha=0.5)
#ax.scatter(mu_embedded[:cold_size, 0], mu_embedded[:cold_size, 1], s=sigma_embedded[:cold_size]*500, alpha=0.5)
#ax.legend(ncol=2, loc=(0.1, 1.1))
ax.set_xlabel(r'$\mu_z$', fontsize=15)
ax.set_ylabel(r'$\sigma_z$', fontsize=15)
#ax.set_title('Volume and percent change')

ax.grid(True)
fig.tight_layout()
plt.savefig('figures/tsne_cmfncf.pdf', bbox_inches="tight") 
#plt.savefig('figures/tsne_legend.pdf', bbox_inches="tight") 
plt.show()

In [None]:
def utest_one_group(x, model, Ks, group_trainRatings, group_testRatings, num_items,thresholds, num_repeat, tor):
    #rating = x[0]
    g = x #group 1개 들어옴

    try:
        training_items = group_trainRatings[g]
    except Exception:
        training_items = []

    group_pos_test = group_testRatings[g]

    all_items = set(range(num_items))

    test_items = list(all_items-set(training_items))
    one_group_batch = np.full(len(test_items),g)
    group_var = torch.LongTensor(one_group_batch)
    item_var = torch.LongTensor(test_items)

    rating_mean = 0
    rating_variance = 0
    for _ in range(num_repeat):
        rating = model(group_var, item_var).detach().cpu()
        rating_mean += rating
        rating_variance += torch.square(rating)
    rating_mean = rating_mean / num_repeat
    rating_variance = rating_variance/num_repeat - torch.square(rating_mean)

    
    EPS = 1e-8
    entropy = - rating_mean * torch.log(rating_mean+EPS)- (1.0-rating_mean)*torch.log(1.0-rating_mean+EPS)
    #mu, sig = model.get_z_dist()
    #print(torch.max(sig), torch.min(sig))
    #print(torch.max(rating_variance), torch.min(rating_variance))
    #sig = torch.norm(sig, dim=1, keepdim=True)/np.sqrt(32)/2
    
    
    #max_var = torch.max(rating_variance)+EPS
    #min_var = torch.min(rating_variance)+EPS
    #entropy = (rating_variance-min_var)/(max_var-min_var)
    
    
    
    #entropy = 4*rating_mean*(1.0-rating_mean)
    #entropy = (rating_variance-rating_variance.min())/((rating_variance.max()-rating_variance.min()+1e-8))
    
    
    #ipdb.set_trace()
    
    results = []
    for threshold in thresholds:
        indices = torch.where(entropy <=threshold)[0]

        utest_items = [test_items[i] for i in indices.tolist()]
        urating = torch.index_select(rating_mean, 0, indices)
        
        pos_urating = torch.where(urating >=tor)[0].tolist()
        
        urating = urating.cpu().numpy()
        pt = len(set(pos_urating).intersection(group_pos_test))
        nt = len(urating) - len(pos_urating) - (len(group_pos_test)-pt)
        #print()
        acc = 0
        if len(urating) > 0:
            acc = (pt+nt)/len(urating)

        r, auc = ranklist_by_sorted(group_pos_test, utest_items, urating, Ks)
        
        ret = get_performance(group_pos_test, r, auc, Ks)
        
        ret['acc'] = acc
        results.append(ret)
    return results

def utest(model, groups_to_test, Ks, group_trainRatings, num_items, batch_test_flag = False, thresholds=[1.0,], num_repeat=1, tor=0.9):
    result = {}
    for threshold in thresholds:
        result[threshold] = {'precision': np.zeros(len(Ks)), 'recall': np.zeros(len(Ks)), 'ndcg': np.zeros(len(Ks)),
                              'hit_ratio': np.zeros(len(Ks)), 'auc': 0., 'acc': 0.}


    test_groups = groups_to_test
    n_test_groups = len(test_groups)
    #n_group_batchs = n_test_groups // g_batch_size + 1
    #print(n_group_batchs)
    #count = 0
    for g_id in test_groups:
        re = utest_one_group(g_id, model, Ks, group_trainRatings, test_groups, num_items,thresholds=thresholds, num_repeat=num_repeat, tor=tor)
        #for re in batch_result:
        for i, threshold in enumerate(thresholds):
            result[threshold]['precision'] += re[i]['precision']/n_test_groups
            result[threshold]['recall'] += re[i]['recall']/n_test_groups
            result[threshold]['ndcg'] += re[i]['ndcg']/n_test_groups
            result[threshold]['hit_ratio'] += re[i]['hit_ratio']/n_test_groups
            result[threshold]['auc'] += re[i]['auc']/n_test_groups
            print(result[threshold]['auc'],re[i]['auc'])
            result[threshold]['acc'] += re[i]['acc']/n_test_groups
        #print(re['hit_ratio'])


    return result

def uevaluation(model, groups_to_test, Ks, trainRatings, num_items, thresholds=[1.0,], num_repeat=1, tor = 0.9):
    model.eval()
    t2 = time()

    ret = utest(model, groups_to_test, Ks, trainRatings, num_items,thresholds=thresholds, num_repeat=num_repeat, tor=tor) #See batch_test for test()

    print('\t Evaluation done [%.1f s]' % (time() - t2))
    for threshold in thresholds:
        print('Threshold:', threshold)
        for i, k in enumerate(Ks):
            print('\t\t @%d: HR = %.4f, NDCG = %.4f, Rec = %.4f, Precision = %.4f, ACC = %.4f' % (k, ret[threshold]['hit_ratio'][i], ret[threshold]['ndcg'][i], ret[threshold]['recall'][i], ret[threshold]['precision'][i], ret[threshold]['acc']))
    return ret

In [None]:
agree = VARGR_NCF(num_users, num_items, num_groups, config.embedding_size, gu_dict, config.drop_ratio).cuda()
agree.load_state_dict(torch.load('weights/VarGrNCF_meetup.ca_32_0.01_4_57'))
ret_cold = None
ret_warm = None
for tor in [0.5,]:
    print(tor)
    tttt = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]
    #print('total')
    #ret_total = uevaluation(agree, dataset.group_testRatings, config.topK[:2], dataset.group_trainRatings, dataset.num_items, thresholds=tttt, num_repeat=5, tor=tor)
    print('cold')
    ret_cold = uevaluation(agree, test_cold, [5, 10, 20, 50], dataset.group_trainRatings, dataset.num_items, thresholds=tttt, num_repeat=5, tor=tor)
    print('warm')
    ret_warm = uevaluation(agree, test_warm, [5, 10, 20, 50], dataset.group_trainRatings, dataset.num_items, thresholds=tttt, num_repeat=5, tor=tor)  

In [None]:
from sklearn.metrics import roc_auc_score
roc_auc_score(y_true=[1,1,1], y_score=[0.1, 0.2, 0.3])