In [1]:
from model.agree_ori import AGREE
import torch
import torch.nn as nn
import torch.autograd as autograd
from torch.autograd import Variable
import torch.optim as optim
import torch.nn.functional as F
import numpy as np
from time import time
from config_CA import Config
from utils.util import Helper
from dataset_Meetup import GDataset
import datetime
from batch_test import *

In [2]:
def training(model, train_loader, epoch_id, config):
    # user trainning
    learning_rates = config.lr
    lr = learning_rates[0]

    # optimizer
    optimizer = optim.Adam(model.parameters(), lr)
    losses = []

    *_, last = train_loader

    for batch_id, (g, pi_ni) in enumerate(train_loader):
        # Data Load
        group_input = g
        pos_item_input = pi_ni[:, 0]
        neg_item_input = pi_ni[:, 1]

        pos_prediction = model(group_input, pos_item_input)
        neg_prediction = model(group_input, neg_item_input)

        # Zero_grad
        model.zero_grad()
        # Loss
        #loss = torch.mean((pos_prediction - neg_prediction -1) **2)
        eps = 1e-7
        loss = torch.mean(-torch.log(pos_prediction+eps)-torch.log(1.0-neg_prediction+eps))
        # record loss history
        #print("batch_id: " + str(batch_id) + " loss: " + str(loss.item()))
        if not torch.isinf(loss.data) and not torch.isnan(loss.data):
            losses.append(float(loss.item()))
        # Backward
        loss.backward()
        optimizer.step()
    print('Iteration %d, loss is [%.4f ]' % (epoch_id, np.mean(np.array(losses))))


In [3]:
def evaluation(model, groups_to_test, Ks, trainRatings, num_items):
    model.eval()
    ret = test(model, groups_to_test, Ks, trainRatings, num_items)
    return ret

In [4]:
config = Config()

meetup.ca 80 256


In [5]:
helper = Helper()

In [6]:
print(config.epoch)

80


In [7]:
dataset = GDataset(config.user_dataset, config.group_dataset, config.user_in_group_path, 1)
num_groups, num_users, num_items = dataset.num_groups, dataset.num_users, dataset.num_items
print("num_groups: "+str(num_groups)+" num_users: "+str(num_users)+" num_items: "+str(num_items))

agree = AGREE(num_users, num_items, num_groups, config.embedding_size, dataset.gu_dict, config.drop_ratio).cuda()

for num_negatives in config.num_negatives:
    dataset.num_negatives = num_negatives
    print("AGREE: embedding size %d, run Iteration: %d, #neg: %d, NDCG@, HR@%d" %(config.embedding_size, config.epoch, num_negatives, config.topK[1]))
    # train the model
    now = datetime.datetime.now().strftime("%Y-%m-%d")

    for epoch in range(30+0*config.epoch):
        agree.train()
        t1 = time()
        training(agree, dataset.get_group_dataloader(config.batch_size), epoch, config)
        t2 = time()
        print("one epoch done: [%.1f s]" % (t2 - t1))
        # Evaluation

        ret = evaluation(agree, dataset.group_testRatings, config.topK, dataset.group_trainRatings, dataset.num_items)
        hr1 = ret['hit_ratio'][0]
        ndcg1 = ret['ndcg'][0]
        rec1 = ret['recall'][0]
        hr2 = ret['hit_ratio'][1]
        ndcg2 = ret['ndcg'][1]
        rec2 = ret['recall'][1]
        hr3 = ret['hit_ratio'][2]
        ndcg3 = ret['ndcg'][2]
        rec3 = ret['recall'][2]
        hr4 = ret['hit_ratio'][3]
        ndcg4 = ret['ndcg'][3]
        rec4 = ret['recall'][3]
        hr5 = ret['hit_ratio'][4]
        ndcg5 = ret['ndcg'][4]
        rec5 = ret['recall'][4]
        pre1 = ret['precision'][0]
        pre2 = ret['precision'][1]
        pre3 = ret['precision'][2]
        pre4 = ret['precision'][3]
        pre5 = ret['precision'][4]

        print('Group Iteration %d [%.1f s]: HR = %.4f, NDCG = %.4f, Rec = %.4f' % (
        epoch, time() - t2, hr2, ndcg2, rec2))
        '''
        f.write(str(hr1) + "," + str(hr2) + "," + str(hr3) + "," + str(hr4) + "," + str(hr5) + "," + \
                str(ndcg1) + "," + str(ndcg2) + "," + str(ndcg3) + "," + str(ndcg4) + "," + str(ndcg5) + "," + \
                str(rec1) + "," + str(rec2) + "," + str(rec3) + "," + str(rec4) + "," + str(rec5) + "," + \
                str(pre1) + "," + str(pre2) + "," + str(pre3) + "," + str(pre4) + "," + str(pre5) + "\n")
        '''
    #f.close()
    #f1.close()


num_groups: 607 num_users: 56878 num_items: 1490
AGREE: embedding size 32, run Iteration: 80, #neg: 4, NDCG@, HR@10
Iteration 0, loss is [1.1481 ]
one epoch done: [156.4 s]
Group Iteration 0 [533.5 s]: HR = 0.2126, NDCG = 0.0893, Rec = 0.1063
Iteration 1, loss is [0.9333 ]
one epoch done: [150.3 s]
Group Iteration 1 [524.9 s]: HR = 0.2184, NDCG = 0.1410, Rec = 0.1240
Iteration 2, loss is [0.7759 ]
one epoch done: [148.8 s]
Group Iteration 2 [499.6 s]: HR = 0.2126, NDCG = 0.1029, Rec = 0.1169
Iteration 3, loss is [0.6328 ]
one epoch done: [145.7 s]
Group Iteration 3 [523.2 s]: HR = 0.2759, NDCG = 0.1575, Rec = 0.1609
Iteration 4, loss is [0.5183 ]
one epoch done: [156.2 s]
Group Iteration 4 [507.3 s]: HR = 0.2816, NDCG = 0.1415, Rec = 0.1758
Iteration 5, loss is [0.4211 ]
one epoch done: [155.7 s]
Group Iteration 5 [513.0 s]: HR = 0.3218, NDCG = 0.1596, Rec = 0.2155
Iteration 6, loss is [0.3492 ]
one epoch done: [151.3 s]
Group Iteration 6 [517.4 s]: HR = 0.3506, NDCG = 0.1671, Rec = 0.

In [8]:
test_warm = dataset.load_rating_file_as_dict(config.group_dataset + ".test.rating_warm")
test_cold = dataset.load_rating_file_as_dict(config.group_dataset + ".test.rating_cold")
cold_groups = []
for g in test_cold.keys():
    cold_groups.append(g)
cold_group_indicies = torch.LongTensor(cold_groups)
g_shape = agree.groupembeds.groupEmbedding.weight[cold_group_indicies].shape

In [10]:
def utest_one_group(x, model, Ks, group_trainRatings, group_testRatings, num_items, threshold, num_repeat):
    #rating = x[0]
    g = x #group 1개 들어옴

    try:
        training_items = group_trainRatings[g]
    except Exception:
        training_items = []

    group_pos_test = group_testRatings[g]

    all_items = set(range(num_items))

    test_items = list(all_items-set(training_items))
    one_group_batch = np.full(len(test_items),g)
    group_var = torch.LongTensor(one_group_batch)
    item_var = torch.LongTensor(test_items)
    
    rating_mean = 0
    rating_var = 0
    for _ in range(num_repeat):
        with torch.no_grad():
            model.groupembeds.groupEmbedding.weight[cold_group_indicies] = (torch.randn(g_shape)*5).cuda()
        rating = model(group_var, item_var).cpu()
        rating = rating.data.numpy()
        rating_mean += rating
        
    rating_mean = rating_mean/num_repeat
    eps = 1e-7
    rating_var = -rating_mean*np.log(rating_mean+eps)-(1-rating_mean)*np.log(1-rating_mean+eps)

    test_idx1 = np.where(rating_var<threshold)[0] #Total
    
    test_idx2 = np.where(rating_mean>=0.5)[0] 
    test_idx = list(set(test_idx1) & set(test_idx2)) #Correct
    TP= set(np.array(test_items)[test_idx]) & set(group_pos_test)

    return len(TP),  len(test_idx)

def utest(model, groups_to_test, Ks, group_trainRatings, num_items, threshold=0.0, num_repeat=20):


    #pool = multiprocessing.Pool(cores)

    #g_batch_size = BATCH_SIZE
    test_groups = groups_to_test
    n_test_groups = len(test_groups)
    #n_group_batchs = n_test_groups // g_batch_size + 1
    #print(n_group_batchs)
    counts = 0
    corrects =0 
    for g_id in test_groups:
        inter, total = utest_one_group(g_id, model, Ks, group_trainRatings, test_groups, num_items, threshold, num_repeat)
        corrects += inter
        counts += total
    if counts == 0:
        counts = 1
        corrects = 1
    return corrects/counts

In [11]:
def uevaluation(model, groups_to_test, Ks, trainRatings, num_items, threshold=0.0, num_repeat=10):
    model.eval()
    t2 = time()
    ret = utest(model, groups_to_test, Ks, trainRatings, num_items, threshold=threshold, num_repeat=num_repeat) #See batch_test

    print('\t Evaluation done [%.1f s]: Acc = %.4f' % (time() - t2, ret))

    return ret

In [12]:
tttt = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]
for ttt in tttt:
    ret = uevaluation(agree, dataset.group_testRatings, config.topK[:2], dataset.group_trainRatings, dataset.num_items, threshold=ttt, num_repeat=10)

	 Evaluation done [5175.5 s]: Acc = 0.0664
	 Evaluation done [5177.1 s]: Acc = 0.0582
	 Evaluation done [5188.1 s]: Acc = 0.0546
	 Evaluation done [5127.0 s]: Acc = 0.0514
	 Evaluation done [5210.9 s]: Acc = 0.0496
	 Evaluation done [5123.7 s]: Acc = 0.0464
	 Evaluation done [5198.7 s]: Acc = 0.0388
	 Evaluation done [5146.0 s]: Acc = 0.0418
	 Evaluation done [5186.6 s]: Acc = 0.0393
	 Evaluation done [5154.7 s]: Acc = 0.0405


def evaluation(model, groups_to_test, Ks, trainRatings, num_items):
    model.eval()
    t2 = time()
    ret = test(model, groups_to_test, Ks, trainRatings, num_items) #See batch_test

    print('\t Evaluation done [%.1f s]' % (time() - t2))
    for i, k in enumerate(Ks):
        print('\t\t @%d: HR = %.4f, NDCG = %.4f, Rec = %.4f' % (k, ret['hit_ratio'][i], ret['ndcg'][i], ret['recall'][i]))
    return ret

ret = evaluation(agree, test_warm, config.topK[:2], dataset.group_trainRatings, dataset.num_items)  
for _ in range(6):
    with torch.no_grad():
        agree.groupembeds.groupEmbedding.weight[cold_group_indicies] = 2*torch.randn(g_shape).cuda()
    print('total')
    ret = evaluation(agree, dataset.group_testRatings, config.topK[:2], dataset.group_trainRatings, dataset.num_items)

    print('cold')
    ret = evaluation(agree, test_cold, config.topK[:2], dataset.group_trainRatings, dataset.num_items)
    #print('warm')
    #ret = evaluation(agree, test_warm, config.topK[:2], dataset.group_trainRatings, dataset.num_items)   