In [1]:
import argparse
import os
import time
import numpy as np

import torch
import torch.optim as optim
from torch.utils.data import DataLoader
import torch.backends.cudnn as cudnn

import models.gaussian_diffusion as gd
from models.DNN import GDN
import evaluate_utils
import data_utils

In [2]:
import random
random_seed = 1
torch.manual_seed(random_seed)  # cpu
torch.cuda.manual_seed(random_seed)  # gpu
np.random.seed(random_seed)  # numpy
random.seed(random_seed)  # random and transforms
torch.backends.cudnn.deterministic=True  # cudnn
def worker_init_fn(worker_id):
    np.random.seed(random_seed + worker_id)
def seed_worker(worker_id):
    worker_seed = torch.initial_seed() % 2**32
    np.random.seed(worker_seed)

In [3]:
parser = argparse.ArgumentParser()
parser.add_argument('--dataset', type=str, default='ml-1m', help='choose the dataset')
parser.add_argument('--data_path', type=str, default='../datasets/', help='load data path')
parser.add_argument('--lr', type=float, default=0.0001, help='learning rate')
parser.add_argument('--drop_out', type=float, default=0.1, help='learning rate')
parser.add_argument('--weight_decay', type=float, default=0.0)
parser.add_argument('--batch_size', type=int, default=400)
parser.add_argument('--epochs', type=int, default=300, help='upper epoch limit')
parser.add_argument('--topN', type=str, default='[10, 20, 50, 100]')
parser.add_argument('--tst_w_val', action='store_true', help='test with validation')
parser.add_argument('--cuda', action='store_true', help='use CUDA')
parser.add_argument('--gpu', type=str, default='0', help='gpu card ID')
parser.add_argument('--save_path', type=str, default='./saved_models/', help='save model path')
parser.add_argument('--log_name', type=str, default='log', help='the log name')
parser.add_argument('--round', type=int, default=1, help='record the experiment')

parser.add_argument('--w_min', type=float, default=0.1, help='the minimum weight for interactions')
parser.add_argument('--w_max', type=float, default=1., help='the maximum weight for interactions')

# params for the model
parser.add_argument('--time_type', type=str, default='add', help='cat or add')
parser.add_argument('--graph_layers', type=int, default=1, help='the nums layer for the GNN')
parser.add_argument('--graph_views', type=int, default=1, help='the nums views for the GNN')
parser.add_argument('--mlp_hidden_dims', type=str, default='[1000]', help='the dims for the DNN')
parser.add_argument('--norm', type=bool, default=True, help='Normalize the input or not')
parser.add_argument('--emb_size', type=int, default=10, help='timestep embedding size')

# params for diffusion
parser.add_argument('--sample_style', type=str, default='uniform', help='importance/uniform/fully')
parser.add_argument('--mean_type', type=str, default='x0', help='MeanType for diffusion: x0, eps')
parser.add_argument('--steps', type=int, default=2, help='diffusion steps')
parser.add_argument('--noise_schedule', type=str, default='linear-var', help='the schedule for noise generating')
parser.add_argument('--noise_scale', type=float, default=1.0, help='noise scale for noise generating')
parser.add_argument('--noise_min', type=float, default=0.0005, help='noise lower bound for noise generating')
parser.add_argument('--noise_max', type=float, default=0.005, help='noise upper bound for noise generating')
parser.add_argument('--sampling_noise', type=bool, default=False, help='sampling with noise or not')
parser.add_argument('--sampling_steps', type=int, default=0, help='steps of the forward process during inference')
parser.add_argument('--reweight', type=bool, default=True, help='assign different weight to different timestep or not')

args = parser.parse_args([])
print("args:", args)

os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu
device = torch.device("cuda:0" if args.cuda else "cpu")

print("Starting time: ", time.strftime('%Y-%m-%d %H:%M:%S',time.localtime(time.time())))

args: Namespace(batch_size=400, cuda=False, data_path='../datasets/', dataset='ml-1m', drop_out=0.1, emb_size=10, epochs=300, gpu='0', graph_layers=1, graph_views=1, log_name='log', lr=0.0001, mean_type='x0', mlp_hidden_dims='[1000]', noise_max=0.005, noise_min=0.0005, noise_scale=1.0, noise_schedule='linear-var', norm=True, reweight=True, round=1, sample_style='uniform', sampling_noise=False, sampling_steps=0, save_path='./saved_models/', steps=2, time_type='add', topN='[10, 20, 50, 100]', tst_w_val=False, w_max=1.0, w_min=0.1, weight_decay=0.0)
Starting time:  2023-11-11 17:11:53


In [4]:
### DATA LOAD ###
train_path = os.path.join(args.data_path, args.dataset, 'train_list.npy')
valid_path = os.path.join(args.data_path, args.dataset, 'valid_list.npy')
test_path = os.path.join(args.data_path, args.dataset, 'test_list.npy')

train_data, train_data_ori, valid_y_data, test_y_data, n_user, n_item, g = data_utils.data_load(train_path, valid_path, test_path, args.w_min, args.w_max)
train_dataset = data_utils.DataDiffusion(torch.FloatTensor(train_data.A))
train_loader = DataLoader(train_dataset, batch_size=args.batch_size, pin_memory=True, shuffle=True, worker_init_fn=worker_init_fn)
test_loader = DataLoader(train_dataset, batch_size=args.batch_size, shuffle=False)

if args.tst_w_val:
    tv_dataset = data_utils.DataDiffusion(torch.FloatTensor(train_data.A) + torch.FloatTensor(valid_y_data.A))
    test_twv_loader = DataLoader(tv_dataset, batch_size=args.batch_size, shuffle=False)
mask_tv = train_data_ori + valid_y_data

print('data ready.')


### Build Gaussian Diffusion ###
if args.mean_type == 'x0':
    mean_type = gd.ModelMeanType.START_X
elif args.mean_type == 'eps':
    mean_type = gd.ModelMeanType.EPSILON
else:
    raise ValueError("Unimplemented mean type %s" % args.mean_type)

diffusion = gd.GaussianDiffusion(mean_type, args.noise_schedule, args.noise_scale,
                                 args.noise_min, args.noise_max, args.steps, device).to(device)

### Build MLP ###
if eval(args.mlp_hidden_dims):
    mlp_dims = [n_item] + eval(args.mlp_hidden_dims) + [n_item]
else:
    mlp_dims = [n_item, n_item]
model = GDN(mlp_dims, args.emb_size, g, args.graph_layers, norm=args.norm, dropout=args.drop_out).to(device)

optimizer = optim.AdamW(model.parameters(), lr=args.lr, weight_decay=args.weight_decay)
print("models ready.")

param_num = 0
mlp_num = sum([param.nelement() for param in model.parameters()])
diff_num = sum([param.nelement() for param in diffusion.parameters()])  # 0
param_num = mlp_num + diff_num
print("Number of all parameters:", param_num)

user num: 5949
item num: 2810
data ready.
models ready.
Number of all parameters: 5633920


In [5]:
def evaluate(data_loader, data_te, mask_his, topN):
    model.eval()
    e_idxlist = list(range(mask_his.shape[0]))
    e_N = mask_his.shape[0]

    predict_items = []
    target_items = []
    for i in range(e_N):
        target_items.append(data_te[i, :].nonzero()[1].tolist())
    
    with torch.no_grad():
        for batch_idx, batch in enumerate(data_loader):
            his_data = mask_his[e_idxlist[batch_idx*args.batch_size:batch_idx*args.batch_size+len(batch)]]
            batch = batch.to(device)
            prediction = diffusion.p_sample(model, batch, args.sampling_steps, args.sampling_noise)
            prediction[his_data.nonzero()] = -np.inf

            _, indices = torch.topk(prediction, topN[-1])
            indices = indices.cpu().numpy().tolist()
            predict_items.extend(indices)

    test_results = evaluate_utils.computeTopNAccuracy(target_items, predict_items, topN)

    return test_results

In [6]:
best_recall, best_epoch = -100, 0
best_test_result = None
print("Start training...")
lr_adjust_times = 0
all_lr = [args.lr*i for i in [1, 0.1, 0.01]]
for epoch in range(1, args.epochs + 1):
    if epoch - best_epoch >= 20:
        print('-'*18)
        break

    model.train()
    start_time = time.time()

    batch_count = 0
    total_loss = 0.0
    
    for batch_idx, batch in enumerate(train_loader):
        batch = batch.to(device)
        batch_count += 1
        optimizer.zero_grad()
        losses = diffusion.training_losses(model, batch, args.sample_style, args.reweight)
        loss = losses["loss"].mean()
        total_loss += loss
        loss.backward()
        optimizer.step()
    
    if epoch % 5 == 0:
        valid_results = evaluate(test_loader, valid_y_data, train_data, eval(args.topN))
        if args.tst_w_val:
            test_results = evaluate(test_twv_loader, test_y_data, mask_tv, eval(args.topN))
        else:
            test_results = evaluate(test_loader, test_y_data, mask_tv, eval(args.topN))
        evaluate_utils.print_results(None, valid_results, test_results)

        if valid_results[1][1] > best_recall: # recall@20 as selection
            best_recall, best_epoch = valid_results[1][1], epoch
            best_results = valid_results
            best_test_results = test_results
            
    print("Runing Epoch {:03d} ".format(epoch) + 'train loss {:.4f}'.format(total_loss) + " costs " + time.strftime(
                        "%H: %M: %S", time.gmtime(time.time()-start_time)))
    print('---'*18)

print('==='*18)
print("End. Best Epoch {:03d} ".format(best_epoch))
evaluate_utils.print_results(None, best_results, best_test_results)   
print("End time: ", time.strftime('%Y-%m-%d %H:%M:%S',time.localtime(time.time())))

Start training...
Runing Epoch 001 train loss 5467.7392 costs 00: 00: 06
------------------------------------------------------
Runing Epoch 002 train loss 5069.3424 costs 00: 00: 04
------------------------------------------------------
Runing Epoch 003 train loss 4978.5873 costs 00: 00: 03
------------------------------------------------------
Runing Epoch 004 train loss 4912.9554 costs 00: 00: 03
------------------------------------------------------
[Valid]: Precision: 0.0641-0.0589-0.0485-0.0404 Recall: 0.0459-0.0826-0.163-0.2596 NDCG: 0.074-0.0821-0.1076-0.1423 MRR: 0.1521-0.1625-0.1683-0.17
[Test]: Precision: 0.0507-0.0437-0.0345-0.0275 Recall: 0.0678-0.1118-0.2103-0.3244 NDCG: 0.0712-0.0843-0.1174-0.1525 MRR: 0.1313-0.1406-0.1467-0.1487
Runing Epoch 005 train loss 4697.3982 costs 00: 00: 11
------------------------------------------------------
Runing Epoch 006 train loss 4645.5617 costs 00: 00: 03
------------------------------------------------------
Runing Epoch 007 train lo

Runing Epoch 051 train loss 3548.0584 costs 00: 00: 03
------------------------------------------------------
Runing Epoch 052 train loss 3495.2280 costs 00: 00: 03
------------------------------------------------------
Runing Epoch 053 train loss 3575.7855 costs 00: 00: 04
------------------------------------------------------
Runing Epoch 054 train loss 3506.2596 costs 00: 00: 03
------------------------------------------------------
[Valid]: Precision: 0.0675-0.0625-0.0544-0.047 Recall: 0.0678-0.1171-0.2287-0.354 NDCG: 0.0844-0.0993-0.1375-0.1826 MRR: 0.1623-0.1743-0.1816-0.1832
[Test]: Precision: 0.0686-0.0605-0.0479-0.0375 Recall: 0.1265-0.2039-0.3545-0.5028 NDCG: 0.1092-0.136-0.1875-0.2337 MRR: 0.1759-0.1879-0.1947-0.1961
Runing Epoch 055 train loss 3523.7544 costs 00: 00: 12
------------------------------------------------------
Runing Epoch 056 train loss 3471.8115 costs 00: 00: 04
------------------------------------------------------
Runing Epoch 057 train loss 3432.6178 cost