In [1]:
#Pytorch
import torch
import torch.nn as nn
import torch.autograd as autograd
from torch.autograd import Variable
import torch.optim as optim
import torch.nn.functional as F
#Python Libs
import numpy as np
from time import time
import datetime
import gc
#Implementations
from model.soagree import SoAGREE
from config_soagree import Config
from dataset_Meetup import GDataset
from batch_test import *

In [2]:
def training(model, train_loader, epoch_id, config):
    # user trainning
    learning_rates = config.lr
    # learning rate decay
    lr = learning_rates[0]
    if epoch_id >= 20 and epoch_id < 50:
        lr = learning_rates[1]
    elif epoch_id >=50:
        lr = learning_rates[2]
    # lr decay
    # if epoch_id % 5 == 0:
    #     lr /= 2
    t1 = time()
    # optimizer
    optimizer = optim.Adam(model.parameters(), 0.005)

    losses = []

    *_, last = train_loader
    model.train()
    for batch_id, (g, pi_ni) in enumerate(train_loader):
        # Data Load
        group_input = g
        pos_item_input = pi_ni[:, 0]
        neg_item_input = pi_ni[:, 1]

        optimizer.zero_grad()
        model.zero_grad()
        
        pos_prediction = model(group_input, pos_item_input)
        neg_prediction = model(group_input, neg_item_input)

        # Loss
        loss = torch.mean((pos_prediction - neg_prediction -1) **2)
        # record loss history
        #print("batch_id: " + str(batch_id) + " loss: " + str(loss.item()))
        if not torch.isinf(loss.data) and not torch.isnan(loss.data):
            losses.append(float(loss.item()))
        # Backward
        loss.backward()
        optimizer.step()
        del group_input, pos_item_input, neg_item_input

    print('Iteration %d,\tloss: [%.4f], time: [%.1fs]' % (epoch_id, np.mean(np.array(losses)), time() - t1))
    gc.collect()

In [3]:
def evaluation(model, groups_to_test, Ks, trainRatings, num_items):
    model.eval()
    t2 = time()
    ret = test(model, groups_to_test, Ks, trainRatings, num_items) #See batch_test

    print('\t Evaluation done [%.1f s]' % (time() - t2))
    for i, k in enumerate(Ks):
        print('\t\t @%d: HR = %.4f, NDCG = %.4f, Rec = %.4f' % (k, ret['hit_ratio'][i], ret['ndcg'][i], ret['recall'][i]))
    return ret

In [4]:
config = Config()

douban 128 256


In [5]:
dataset = GDataset(config.user_dataset, config.group_dataset, config.user_in_group_path, 1)
num_groups, num_users, num_items = dataset.num_groups, dataset.num_users, dataset.num_items
print("num_groups: "+str(num_groups)+" num_users: "+str(num_users)+" num_items: "+str(num_items))
gu_dict = dataset.gu_dict

num_groups: 92489 num_users: 63893 num_items: 15741


In [6]:
test_warm = dataset.load_rating_file_as_dict(config.group_dataset + ".test.rating_warm")
test_cold = dataset.load_rating_file_as_dict(config.group_dataset + ".test.rating_cold")
#ret = evaluation(agree, test_warm, config.topK[:2], dataset.group_trainRatings, dataset.num_items)
#ret = evaluation(agree, test_cold, config.topK[:2], dataset.group_trainRatings, dataset.num_items)
valid = dataset.load_rating_file_as_dict(config.group_dataset + ".valid.rating")

def gen_user_follow_dict(path):
    g_m_d = {}
    with open(path, 'r') as f:
        line = f.readline().strip()
        while line != None and line != "":
            a = line.split(':')
            g = int(a[0])
            g_m_d[g] = []
            for m in a[1].split(' '):
                if m is None or m == '':
                    continue
                g_m_d[g].append(int(m))
            line = f.readline().strip()
    return g_m_d
u_f_d = gen_user_follow_dict(config.path+"userFollow.txt")

In [None]:
agree = SoAGREE(num_users, num_items, num_groups, num_users, config.embedding_size, gu_dict, None, config.drop_ratio).cuda()
best_checkpoint = -1.0
best_weights_path = None
for num_negatives in config.num_negatives:
    dataset.num_negatives = num_negatives
    print("AGREE: embedding size %d, run Iteration: %d, #neg: %d" %(config.embedding_size, config.epoch, num_negatives))
    # train the model
    now = datetime.datetime.now().strftime("%Y-%m-%d")

    for epoch in range(config.epoch): 
        training(agree, dataset.get_group_dataloader(config.batch_size), epoch, config)


        # Evaluation
        if epoch % 3 == 0:
            agree.eval()
            with torch.no_grad():
                #ret = evaluation(agree, dataset.group_testRatings, config.topK[:2], dataset.group_trainRatings, dataset.num_items)
                ret = evaluation(agree, valid, config.topK[:2], dataset.group_trainRatings, dataset.num_items)
                cur_checkpoint = ret['hit_ratio'][1]
                #ret = evaluation(agree, test_cold, config.topK[:2], dataset.group_trainRatings, dataset.num_items)
                #cur_checkpoint += ret['hit_ratio'][1]
                #cur_checkpoint = cur_checkpoint/2
                current_weights_path = 'weights/soagree_'+str(config.dataset)+"_"+str(config.embedding_size)+"_"+str(config.lr[0])+'_'+str(num_negatives)+'_'+str(epoch)
                torch.save(agree.state_dict(), current_weights_path)
                if best_checkpoint <= cur_checkpoint:
                    best_weights_path = current_weights_path
                    best_checkpoint = cur_checkpoint

AGREE: embedding size 32, run Iteration: 128, #neg: 4
Iteration 0,	loss: [0.7279], time: [1703.8s]
	 Evaluation done [6356.4 s]
		 @5: HR = 0.0000, NDCG = 0.0000, Rec = 0.0000
		 @10: HR = 0.0000, NDCG = 0.0000, Rec = 0.0000
Iteration 1,	loss: [0.3197], time: [1706.3s]
Iteration 2,	loss: [0.1516], time: [1705.6s]
Iteration 3,	loss: [0.0945], time: [1707.5s]
	 Evaluation done [6388.5 s]
		 @5: HR = 0.0042, NDCG = 0.0032, Rec = 0.0042
		 @10: HR = 0.0127, NDCG = 0.0058, Rec = 0.0127
Iteration 4,	loss: [0.0692], time: [1702.1s]
Iteration 5,	loss: [0.0572], time: [1703.5s]
Iteration 6,	loss: [0.0497], time: [1702.3s]
	 Evaluation done [6340.3 s]
		 @5: HR = 0.0106, NDCG = 0.0069, Rec = 0.0106
		 @10: HR = 0.0191, NDCG = 0.0096, Rec = 0.0191
Iteration 7,	loss: [0.0455], time: [1705.9s]
Iteration 8,	loss: [0.0427], time: [1705.6s]
Iteration 9,	loss: [0.0401], time: [1703.0s]
	 Evaluation done [6366.8 s]
		 @5: HR = 0.0042, NDCG = 0.0019, Rec = 0.0042
		 @10: HR = 0.0191, NDCG = 0.0067, Rec =

In [None]:
agree = SoAGREE(num_users, num_items, num_groups, num_users, config.embedding_size, gu_dict, None, config.drop_ratio).cuda()
agree.load_state_dict(torch.load(best_weights_path))
print(best_weights_path)

In [None]:
print('cold')
ret = evaluation(agree, test_cold, config.topK[:2], dataset.group_trainRatings, dataset.num_items)
print('warm')
ret = evaluation(agree, test_warm, config.topK[:2], dataset.group_trainRatings, dataset.num_items)   