In [1]:
#Pytorch
import torch
import torch.nn as nn
import torch.autograd as autograd
from torch.autograd import Variable
import torch.optim as optim
import torch.nn.functional as F
#Python Libs
import numpy as np
from time import time
import datetime
import gc
#Implementations
from model.mosan import MoSAN
from config_douban import Config
from dataset_Meetup import GDataset
from batch_test import *

In [2]:
def training(model, train_loader, epoch_id, config):
    # user trainning
    t1 = time()
    model.train()
    learning_rates = config.lr
    lr = learning_rates[0]

    # optimizer
    optimizer = optim.Adam(model.parameters(), lr=0.001, weight_decay=1e-5)

    losses = []

    *_, last = train_loader

    for batch_id, (g, pi_ni) in enumerate(train_loader):
        # Data Load
        group_input = g
        pos_item_input = pi_ni[:, 0]
        neg_item_input = pi_ni[:, 1]

        optimizer.zero_grad()
        model.zero_grad()

        pos_prediction = model(group_input, pos_item_input)
        neg_prediction = model(group_input, neg_item_input)
        #print(pos_prediction[0], neg_prediction[0])

        # Loss

        loss = torch.mean(-F.logsigmoid(pos_prediction-neg_prediction))
        

        
        # record loss history
        #print("batch_id: " + str(batch_id) + " loss: " + str(loss.item()))
        if not torch.isinf(loss.data) and not torch.isnan(loss.data):
            losses.append(float(loss.item()))
        # Backward
        loss.backward()
        optimizer.step()
        del group_input, pos_item_input, neg_item_input
    gc.collect()
    print('Iteration %d,\tloss: [%.4f], time: [%.1fs]' % (epoch_id, np.mean(np.array(losses)), time() - t1))


In [3]:
def valid_model(model, test_groups, Ks, group_trainRatings, num_items):
    valid_group = []
    valid_item = []
    for g in test_groups:
        for v in test_groups[g]:
            valid_group.append(g)
            valid_item.append(v)

            
    group_var = torch.LongTensor(valid_group).cuda()
    item_var = torch.LongTensor(valid_item).cuda()

    model.eval()
    rating = model(group_var, item_var)
    avg_rating = torch.mean(rating).item()
    return avg_rating

In [4]:
def evaluation(model, groups_to_test, Ks, trainRatings, num_items):
    model.eval()
    t2 = time()
    ret = test(model, groups_to_test, Ks, trainRatings, num_items) #See batch_test

    print('\t Evaluation done [%.1f s]' % (time() - t2))
    for i, k in enumerate(Ks):
        print('\t\t @%d: HR = %.4f, NDCG = %.4f, Rec = %.4f' % (k, ret['hit_ratio'][i], ret['ndcg'][i], ret['recall'][i]))
    return ret

In [5]:
config = Config()

douban 127 256 [0.0005, 1e-06, 5e-07]


In [6]:
dataset = GDataset(config.user_dataset, config.group_dataset, config.user_in_group_path, 1)
num_groups, num_users, num_items = dataset.num_groups, dataset.num_users, dataset.num_items
print("num_groups: "+str(num_groups)+" num_users: "+str(num_users)+" num_items: "+str(num_items))
gu_dict = dataset.gu_dict

num_groups: 92489 num_users: 63893 num_items: 15741


In [7]:
test_warm = dataset.load_rating_file_as_dict(config.group_dataset + ".test.rating_warm")
test_cold = dataset.load_rating_file_as_dict(config.group_dataset + ".test.rating_cold")
valid = dataset.load_rating_file_as_dict(config.group_dataset + ".valid.rating")

In [8]:
agree = MoSAN(num_users, num_items, num_groups, config.embedding_size, gu_dict, 0.5).cuda()
best_checkpoint = -1.0
best_weights_path = None
for num_negatives in config.num_negatives:
    dataset.num_negatives = num_negatives
    print("MoSAN: embedding size %d, run Iteration: %d, #neg: %d" %(config.embedding_size, config.epoch, num_negatives))
    # train the model
    now = datetime.datetime.now().strftime("%Y-%m-%d")

    for epoch in range(config.epoch):
        training(agree, dataset.get_group_dataloader(config.batch_size), epoch, config)
        

        # Evaluation
        if epoch % 3 == 0:
            agree.eval()
            with torch.no_grad():
                t2 = time()
                ret = valid_model(agree, test_warm, config.topK[:2], dataset.group_trainRatings, dataset.num_items)
                cur_checkpoint = ret
                print('\t Evaluation done [%.1f s]: %.3f' % (time() - t2,ret))
                #cur_checkpoint = ret['hit_ratio'][1]
                #ret = evaluation(agree, test_cold, config.topK[:2], dataset.group_trainRatings, dataset.num_items)
                #cur_checkpoint += ret['hit_ratio'][1]
                #cur_checkpoint = cur_checkpoint/2
                current_weights_path = 'weights/MoSAN'+str(config.dataset)+"_"+str(config.embedding_size)+"_"+str(config.lr[0])+'_'+str(num_negatives)+'_'+str(epoch)
                torch.save(agree.state_dict(), current_weights_path)
                if best_checkpoint <= cur_checkpoint:
                    print('saved')
                    best_weights_path = current_weights_path
                    best_checkpoint = cur_checkpoint

MoSAN: embedding size 32, run Iteration: 127, #neg: 4
Iteration 0,	loss: [0.5273], time: [1970.1s]
	 Evaluation done [1.6 s]: 1.060
saved
Iteration 1,	loss: [0.3877], time: [1975.1s]
Iteration 2,	loss: [0.3374], time: [1978.7s]
Iteration 3,	loss: [0.3139], time: [1980.0s]
	 Evaluation done [1.5 s]: 2.158
saved
Iteration 4,	loss: [0.2991], time: [1984.9s]
Iteration 5,	loss: [0.2902], time: [1988.0s]
Iteration 6,	loss: [0.2836], time: [1983.4s]
	 Evaluation done [1.6 s]: 2.480
saved
Iteration 7,	loss: [0.2801], time: [1983.0s]
Iteration 8,	loss: [0.2755], time: [1983.2s]
Iteration 9,	loss: [0.2711], time: [1977.2s]
	 Evaluation done [1.5 s]: 2.657
saved
Iteration 10,	loss: [0.2705], time: [1972.7s]
Iteration 11,	loss: [0.2674], time: [1972.0s]
Iteration 12,	loss: [0.2661], time: [1982.7s]
	 Evaluation done [1.5 s]: 2.664
saved
Iteration 13,	loss: [0.2628], time: [1971.3s]
Iteration 14,	loss: [0.2635], time: [1974.9s]
Iteration 15,	loss: [0.2629], time: [1971.2s]
	 Evaluation done [1.5 s]

In [9]:
print(best_weights_path)
agree = MoSAN(num_users, num_items, num_groups, config.embedding_size, gu_dict, config.drop_ratio).cuda()
gc.collect()
agree.load_state_dict(torch.load(best_weights_path))
print('cold')
ret = evaluation(agree, test_cold, config.topK[:2], dataset.group_trainRatings, dataset.num_items)

ret = evaluation(agree, test_warm, config.topK[:2], dataset.group_trainRatings, dataset.num_items)


weights/MoSANdouban_32_0.0005_4_120
cold
	 Evaluation done [115661.8 s]
		 @5: HR = 0.0121, NDCG = 0.0072, Rec = 0.0121
		 @10: HR = 0.0227, NDCG = 0.0105, Rec = 0.0227
	 Evaluation done [22990.8 s]
		 @5: HR = 0.0143, NDCG = 0.0085, Rec = 0.0143
		 @10: HR = 0.0221, NDCG = 0.0109, Rec = 0.0221
