In [1]:
import time
import datetime
# 시간 표시 함수
def format_time(elapsed):
    # 반올림
    elapsed_rounded = int(round((elapsed)))
    # hh:mm:ss으로 형태 변경
    return str(datetime.timedelta(seconds=elapsed_rounded))

start_time = time.time()
print("  Training epoch took: {:}".format(format_time(time.time() - start_time)))

  Training epoch took: 0:00:00


In [2]:
"""Training IGMC model on the MovieLens dataset."""

import os
import sys
import time
import glob
import random
import argparse
from shutil import copy
# from pyinstrument import Profiler

import numpy as np
import torch as th
import torch.nn as nn
import torch.optim as optim

from model import IGMC
from data import MovieLens
from dataset import MovieLensDataset, collate_movielens 
from utils import MetricLogger

# os.environ['TZ'] = 'Asia/Shanghai'
# time.tzset()

Using backend: pytorch


In [3]:
def evaluate(model, loader, device):
    # Evaluate RMSE
    model.eval()
    mse = 0.
    for batch in loader:
        with th.no_grad():
            preds = model(batch[0].to(device))
        labels = batch[1].to(device)
        mse += ((preds - labels) ** 2).sum().item()
    mse /= len(loader.dataset)
    return np.sqrt(mse)

def adj_rating_reg(model):
    arr_loss = 0
    for conv in model.convs:
        weight = conv.weight.view(conv.num_bases, conv.in_feat * conv.out_feat)
        weight = th.matmul(conv.w_comp, weight).view(conv.num_rels, conv.in_feat, conv.out_feat)
        arr_loss += th.sum((weight[1:, :, :] - weight[:-1, :, :])**2)
    return arr_loss

# @profile
def train_epoch(model, loss_fn, optimizer, arr_lambda, loader, device, log_interval):
    model.train()

    epoch_loss = 0.
    iter_loss = 0.
    iter_mse = 0.
    iter_cnt = 0
    iter_dur = []

    # profiler = Profiler()
    # profiler.start()
    for iter_idx, batch in enumerate(loader, start=1):
        t_start = time.time()

        inputs = batch[0].to(device)
        labels = batch[1].to(device)
        preds = model(inputs)
        loss = loss_fn(preds, labels).mean() + arr_lambda * adj_rating_reg(model)
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        epoch_loss += loss.item() * preds.shape[0]
        iter_loss += loss.item() * preds.shape[0]
        iter_mse += ((preds - labels) ** 2).sum().item()
        iter_cnt += preds.shape[0]
        iter_dur.append(time.time() - t_start)

        if iter_idx % log_interval == 0:
            print("Iter={}, loss={:.4f}, mse={:.4f}, time={:.4f}".format(
                iter_idx, iter_loss/iter_cnt, iter_mse/iter_cnt, np.average(iter_dur)))
            iter_loss = 0.
            iter_mse = 0.
            iter_cnt = 0
    # profiler.stop()
    # profiler.output_html()
    return epoch_loss / len(loader.dataset)

def train(args):
    ### prepare data and set model
    movielens = MovieLens(args.data_name, testing=args.testing,
                            test_ratio=args.data_test_ratio, valid_ratio=args.data_valid_ratio)
    if args.testing:
        test_dataset = MovieLensDataset(
            movielens.test_rating_pairs, movielens.test_rating_values, movielens.train_graph, 
            args.hop, args.sample_ratio, args.max_nodes_per_hop) 
    else:
        test_dataset = MovieLensDataset(
            movielens.valid_rating_pairs, movielens.valid_rating_values, movielens.train_graph, 
            args.hop, args.sample_ratio, args.max_nodes_per_hop)
    train_dataset = MovieLensDataset(
        movielens.train_rating_pairs, movielens.train_rating_values, movielens.train_graph, 
        args.hop, args.sample_ratio, args.max_nodes_per_hop)

    train_loader = th.utils.data.DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True, 
                            num_workers=args.num_workers, collate_fn=collate_movielens)
    test_loader = th.utils.data.DataLoader(test_dataset, batch_size=args.batch_size, shuffle=False, 
                            num_workers=args.num_workers, collate_fn=collate_movielens)

    in_feats = (args.hop+1)*2 #+ movielens.train_graph.ndata['refex'].shape[1]
    model = IGMC(in_feats=in_feats, 
                 latent_dim=[32, 32, 32, 32],
                 num_relations=5, # movielens.num_rating, 
                 num_bases=4, 
                 regression=True, 
                 edge_dropout=args.edge_dropout,
                #  side_features=args.use_features,
                #  n_side_features=n_features,
                #  multiply_by=args.multiply_by
            ).to(args.device)
    loss_fn = nn.MSELoss().to(args.device)
    optimizer = optim.Adam(model.parameters(), lr=args.train_lr, weight_decay=0)
    print("Loading network finished ...\n")

    ### prepare the logger
    logger = MetricLogger(args.save_dir, args.valid_log_interval)
    
    best_epoch = 0
    best_rmse = np.inf
    ### declare the loss information
    print("Start training ...")
    for epoch_idx in range(1, args.train_epochs+1):
        print ('Epoch', epoch_idx)
    
        train_loss = train_epoch(model, loss_fn, optimizer, args.arr_lambda, 
                                train_loader, args.device, args.train_log_interval)
        test_rmse = evaluate(model, test_loader, args.device)
        eval_info = {
            'epoch': epoch_idx,
            'train_loss': train_loss,
            'test_rmse': test_rmse,
        }
        print('=== Epoch {}, train loss {:.6f}, test rmse {:.6f} ==='.format(*eval_info.values()))

        if epoch_idx % args.train_lr_decay_step == 0:
            for param in optimizer.param_groups:
                param['lr'] = args.train_lr_decay_factor * param['lr']

        logger.log(eval_info, model, optimizer)
        if best_rmse > test_rmse:
            best_rmse = test_rmse
            best_epoch = epoch_idx
    eval_info = "Training ends. The best testing rmse is {:.6f} at epoch {}".format(best_rmse, best_epoch)
    print(eval_info)
    with open(os.path.join(args.save_dir, 'log.txt'), 'a') as f:
        f.write(eval_info)

In [4]:
def config():
    parser = argparse.ArgumentParser(description='IGMC')
    # general settings
    parser.add_argument('--testing', action='store_true', default=False,
                        help='if set, use testing mode which splits all ratings into train/test;\
                        otherwise, use validation model which splits all ratings into \
                        train/val/test and evaluate on val only')
    parser.add_argument('--device', default='0', type=int,
                        help='Running device. E.g `--device 0`, if using cpu, set `--device -1`')
    parser.add_argument('--seed', type=int, default=1234, metavar='S',
                        help='random seed (default: 1234)')
    parser.add_argument('--data_name', default='ml-100k', type=str,
                        help='The dataset name: ml-100k, ml-1m')
    parser.add_argument('--data_test_ratio', type=float, default=0.1) # for ml-100k the test ration is 0.2
    parser.add_argument('--num_workers', type=int, default=8)
    parser.add_argument('--data_valid_ratio', type=float, default=0.2)
    # parser.add_argument('--ensemble', action='store_true', default=False,
    #                     help='if True, load a series of model checkpoints and ensemble the results')               
    parser.add_argument('--train_log_interval', type=int, default=100)
    parser.add_argument('--valid_log_interval', type=int, default=10)
    parser.add_argument('--save_appendix', type=str, default='debug', 
                        help='what to append to save-names when saving results')
    # subgraph extraction settings
    parser.add_argument('--hop', default=1, metavar='S', 
                        help='enclosing subgraph hop number')
    parser.add_argument('--sample_ratio', type=float, default=1.0, 
                        help='if < 1, subsample nodes per hop according to the ratio')
    parser.add_argument('--max_nodes_per_hop', type=int, default=200, 
                        help='if > 0, upper bound the # nodes per hop by another subsampling')
    # parser.add_argument('--use_features', action='store_true', default=False,
    #                     help='whether to use node features (side information)')
    # edge dropout settings
    parser.add_argument('--edge_dropout', type=float, default=0.2, 
                        help='if not 0, random drops edges from adjacency matrix with this prob')
    parser.add_argument('--force_undirected', action='store_true', default=False, 
                        help='in edge dropout, force (x, y) and (y, x) to be dropped together')
    # optimization settings
    parser.add_argument('--train_lr', type=float, default=1e-3)
    parser.add_argument('--train_min_lr', type=float, default=1e-6)
    parser.add_argument('--train_lr_decay_factor', type=float, default=0.1)
    parser.add_argument('--train_lr_decay_step', type=int, default=50)
    parser.add_argument('--train_epochs', type=int, default=80)
    parser.add_argument('--batch_size', type=int, default=32)
    parser.add_argument('--arr_lambda', type=float, default=0.001)
    parser.add_argument('--num_rgcn_bases', type=int, default=4)
                
    args = parser.parse_args()
    args.device = th.device(args.device) if args.device >= 0 and th.cuda.is_available() else th.device('cpu')
    
    ### set save_dir according to localtime and test mode
    file_dir = os.path.dirname(os.path.realpath('__file__'))
    val_test_appendix = 'testmode' if args.testing else 'valmode'
    local_time = time.strftime('%y%m%d%H%M', time.localtime())
    args.save_dir = os.path.join(
        file_dir, 'log/{}_{}_{}_{}'.format(
            args.data_name, args.save_appendix, val_test_appendix, local_time
        )
    )
    if not os.path.exists(args.save_dir):
        os.makedirs(args.save_dir) 
    print(args)

    # backup current .py files
    for f in glob.glob(r"*.py"):
        copy(f, args.save_dir)

    # save command line input
    cmd_input = 'python3 ' + ' '.join(sys.argv)
    with open(os.path.join(args.save_dir, 'cmd_input.txt'), 'a') as f:
        f.write(cmd_input)
        f.write("\n")
    print('Command line input: ' + cmd_input + ' is saved.')
    
    return args

In [5]:
# if __name__ == '__main__':
#     args = config()
#     random.seed(args.seed)
#     np.random.seed(args.seed)
#     th.manual_seed(args.seed)
#     if th.cuda.is_available():
#         th.cuda.manual_seed_all(args.seed)
#     train(args)

## 1. Config

In [6]:
import easydict

args = easydict.EasyDict({ 
    'data_name':            'ml-100k',
    'testing':     	        False,
    'device':      	        0,
    'seed':        	        1234,
    'data_test_ratio':      0.1,
    'num_workers':   	    8,
    'data_valid_ratio':     0.2,
    'train_log_interval':   100,
    'valid_log_interval':   10,
    'save_appendix':   	    'debug',
    'hop':   	            1,
    'sample_ratio':    	    1.0,
    'max_nodes_per_hop':    200,
    'edge_dropout':   	    0.2,
    'force_undirected':     False,
    'train_lr':   	        1e-3,
    'train_min_lr':   	    1e-6,
    'train_lr_decay_factor':0.1,
    'train_lr_decay_step':  50,
    'train_epochs':   	    80,
    'batch_size':   	    32,
    'arr_lambda':   	    0.001,
    'num_rgcn_bases':   	4,
    'train_epochs':   	    1
})

In [7]:
### set save_dir according to localtime and test mode
file_dir = os.path.dirname(os.path.realpath('__file__'))
val_test_appendix = 'testmode' if args.testing else 'valmode'
local_time = time.strftime('%y%m%d%H%M', time.localtime())
args.save_dir = os.path.join(
    file_dir, 'log/{}_{}_{}_{}'.format(
        args.data_name, args.save_appendix, val_test_appendix, local_time
    )
)
if not os.path.exists(args.save_dir):
    os.makedirs(args.save_dir) 
print(args)

# backup current .py files
for f in glob.glob(r"*.py"):
    copy(f, args.save_dir)

# save command line input
cmd_input = 'python3 ' + ' '.join(sys.argv)
with open(os.path.join(args.save_dir, 'cmd_input.txt'), 'a') as f:
    f.write(cmd_input)
    f.write("\n")
print('Command line input: ' + cmd_input + ' is saved.')

{'data_name': 'ml-100k', 'testing': False, 'device': 0, 'seed': 1234, 'data_test_ratio': 0.1, 'num_workers': 8, 'data_valid_ratio': 0.2, 'train_log_interval': 100, 'valid_log_interval': 10, 'save_appendix': 'debug', 'hop': 1, 'sample_ratio': 1.0, 'max_nodes_per_hop': 200, 'edge_dropout': 0.2, 'force_undirected': False, 'train_lr': 0.001, 'train_min_lr': 1e-06, 'train_lr_decay_factor': 0.1, 'train_lr_decay_step': 50, 'train_epochs': 1, 'batch_size': 32, 'arr_lambda': 0.001, 'num_rgcn_bases': 4, 'save_dir': 'C:\\Users\\user\\Jupyter_project\\keejun\\graph\\Motif-based-inductive-GNN-training-master\\log/ml-100k_debug_valmode_2111062256'}
Command line input: python3 C:\Users\user\anaconda3\envs\graph\lib\site-packages\ipykernel_launcher.py -f C:\Users\user\AppData\Roaming\jupyter\runtime\kernel-09563cc1-42d1-4be6-80ad-88c2428df41e.json is saved.


In [8]:
args

{'data_name': 'ml-100k',
 'testing': False,
 'device': 0,
 'seed': 1234,
 'data_test_ratio': 0.1,
 'num_workers': 8,
 'data_valid_ratio': 0.2,
 'train_log_interval': 100,
 'valid_log_interval': 10,
 'save_appendix': 'debug',
 'hop': 1,
 'sample_ratio': 1.0,
 'max_nodes_per_hop': 200,
 'edge_dropout': 0.2,
 'force_undirected': False,
 'train_lr': 0.001,
 'train_min_lr': 1e-06,
 'train_lr_decay_factor': 0.1,
 'train_lr_decay_step': 50,
 'train_epochs': 1,
 'batch_size': 32,
 'arr_lambda': 0.001,
 'num_rgcn_bases': 4,
 'save_dir': 'C:\\Users\\user\\Jupyter_project\\keejun\\graph\\Motif-based-inductive-GNN-training-master\\log/ml-100k_debug_valmode_2111062256'}

In [9]:
random.seed(args.seed)
np.random.seed(args.seed)
th.manual_seed(args.seed)
if th.cuda.is_available():
    th.cuda.manual_seed_all(args.seed)    

In [10]:
# start_time = time.time()

# train(args)

# print("  Training epoch took: {:}".format(format_time(time.time() - start_time)))

## 2. Train

In [11]:
### prepare data and set model
movielens = MovieLens(args.data_name, testing=args.testing,
                        test_ratio=args.data_test_ratio, valid_ratio=args.data_valid_ratio)

Using official MovieLens dataset split u1.base/u1.test with 20% validation set size...
User features shape: (943, 23)
Item features shape: (1682, 18)
	Train rating pairs : 64000
	Valid rating pairs : 16000
	Test rating pairs  : 20000


In [12]:
args.testing

False

In [13]:
if args.testing:
    test_dataset = MovieLensDataset(
        movielens.test_rating_pairs, movielens.test_rating_values, movielens.train_graph, 
        args.hop, args.sample_ratio, args.max_nodes_per_hop) 
else:
    test_dataset = MovieLensDataset(
        movielens.valid_rating_pairs, movielens.valid_rating_values, movielens.train_graph, 
        args.hop, args.sample_ratio, args.max_nodes_per_hop)

In [14]:
train_dataset = MovieLensDataset(
    movielens.train_rating_pairs, movielens.train_rating_values, movielens.train_graph, 
    args.hop, args.sample_ratio, args.max_nodes_per_hop)

In [15]:
train_loader = th.utils.data.DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True, 
                        num_workers=args.num_workers, collate_fn=collate_movielens)
test_loader = th.utils.data.DataLoader(test_dataset, batch_size=args.batch_size, shuffle=False, 
                        num_workers=args.num_workers, collate_fn=collate_movielens)

In [16]:
in_feats = (args.hop+1)*2 #+ movielens.train_graph.ndata['refex'].shape[1]
model = IGMC(in_feats=in_feats, 
             latent_dim=[32, 32, 32, 32],
             num_relations=5, # movielens.num_rating, 
             num_bases=4, 
             regression=True, 
             edge_dropout=args.edge_dropout,
            #  side_features=args.use_features,
            #  n_side_features=n_features,
            #  multiply_by=args.multiply_by
        ).to(args.device)
loss_fn = nn.MSELoss().to(args.device)
optimizer = optim.Adam(model.parameters(), lr=args.train_lr, weight_decay=0)
print("Loading network finished ...\n")

Loading network finished ...



In [17]:
args.hop

1

In [18]:
in_feats

4

In [19]:
### prepare the logger
logger = MetricLogger(args.save_dir, args.valid_log_interval)

best_epoch = 0
best_rmse = np.inf
### declare the loss information
print("Start training ...")

Start training ...


In [20]:
start_time = time.time()

for epoch_idx in range(1, args.train_epochs+1):
    print ('Epoch', epoch_idx)

    train_loss = train_epoch(model, loss_fn, optimizer, args.arr_lambda, 
                            train_loader, args.device, args.train_log_interval)
    test_rmse = evaluate(model, test_loader, args.device)
    eval_info = {
        'epoch': epoch_idx,
        'train_loss': train_loss,
        'test_rmse': test_rmse,
    }
    print('=== Epoch {}, train loss {:.6f}, test rmse {:.6f} ==='.format(*eval_info.values()))

    if epoch_idx % args.train_lr_decay_step == 0:
        for param in optimizer.param_groups:
            param['lr'] = args.train_lr_decay_factor * param['lr']

    logger.log(eval_info, model, optimizer)
    if best_rmse > test_rmse:
        best_rmse = test_rmse
        best_epoch = epoch_idx

print("  Training epoch took: {:}".format(format_time(time.time() - start_time)))

Epoch 1
Iter=100, loss=1.5105, mse=1.5091, time=0.0576
Iter=200, loss=1.1026, mse=1.1012, time=0.0494
Iter=300, loss=1.0748, mse=1.0735, time=0.0474
Iter=400, loss=1.0725, mse=1.0712, time=0.0469
Iter=500, loss=1.0373, mse=1.0361, time=0.0462
Iter=600, loss=1.0709, mse=1.0697, time=0.0458
Iter=700, loss=1.0266, mse=1.0254, time=0.0455
Iter=800, loss=1.0370, mse=1.0359, time=0.0453
Iter=900, loss=1.0319, mse=1.0309, time=0.0452
Iter=1000, loss=1.0186, mse=1.0176, time=0.0450
Iter=1100, loss=0.9805, mse=0.9795, time=0.0449
Iter=1200, loss=1.0387, mse=1.0377, time=0.0447
Iter=1300, loss=1.0677, mse=1.0667, time=0.0446
Iter=1400, loss=1.0453, mse=1.0444, time=0.0445
Iter=1500, loss=1.0146, mse=1.0136, time=0.0445
Iter=1600, loss=1.0182, mse=1.0173, time=0.0445
Iter=1700, loss=1.0209, mse=1.0199, time=0.0444
Iter=1800, loss=1.0090, mse=1.0081, time=0.0444
Iter=1900, loss=0.9962, mse=0.9953, time=0.0443
Iter=2000, loss=0.9997, mse=0.9988, time=0.0442
=== Epoch 1, train loss 1.058681, test rm

In [21]:
eval_info = "Training ends. The best testing rmse is {:.6f} at epoch {}".format(best_rmse, best_epoch)
print(eval_info)
with open(os.path.join(args.save_dir, 'log.txt'), 'a') as f:
    f.write(eval_info)

Training ends. The best testing rmse is 0.949829 at epoch 1


- IGMC의 users, items 확인하기

In [23]:
model.block

Graph(num_nodes=7178, num_edges=160050,
      ndata_schemes={'_ID': Scheme(shape=(), dtype=torch.int64), 'nlabel': Scheme(shape=(4,), dtype=torch.float32), 'x': Scheme(shape=(4,), dtype=torch.float32)}
      edata_schemes={'etype': Scheme(shape=(), dtype=torch.int64), '_ID': Scheme(shape=(), dtype=torch.int64), 'edge_mask': Scheme(shape=(), dtype=torch.float32)})

In [29]:
model.block.ndata

{'_ID': tensor([ 463,    9,   12,  ..., 1819, 1983, 2205], device='cuda:0'), 'nlabel': tensor([[1., 0., 0., 0.],
        [0., 0., 1., 0.],
        [0., 0., 1., 0.],
        ...,
        [0., 0., 0., 1.],
        [0., 0., 0., 1.],
        [0., 0., 0., 1.]], device='cuda:0'), 'x': tensor([[1., 0., 0., 0.],
        [0., 0., 1., 0.],
        [0., 0., 1., 0.],
        ...,
        [0., 0., 0., 1.],
        [0., 0., 0., 1.],
        [0., 0., 0., 1.]], device='cuda:0')}

In [33]:
model.block.ndata['nlabel']

tensor([[1., 0., 0., 0.],
        [0., 0., 1., 0.],
        [0., 0., 1., 0.],
        ...,
        [0., 0., 0., 1.],
        [0., 0., 0., 1.],
        [0., 0., 0., 1.]], device='cuda:0')

In [32]:
model.block.ndata['nlabel'].shape

torch.Size([7178, 4])

In [34]:
model.block.ndata['nlabel'][:, 0]

tensor([1., 0., 0.,  ..., 0., 0., 0.], device='cuda:0')

In [27]:
model.block_x

tensor([[ 0.0336, -0.0045,  0.0624,  ..., -0.0243, -0.0064, -0.0604],
        [ 0.0254,  0.0257,  0.0426,  ...,  0.0041,  0.0395,  0.0285],
        [ 0.0158, -0.0024,  0.0317,  ...,  0.0129,  0.0010,  0.0245],
        ...,
        [ 0.0334,  0.0097,  0.0496,  ...,  0.0013, -0.0036,  0.1082],
        [ 0.0260,  0.0028,  0.0675,  ...,  0.0262, -0.0212,  0.0730],
        [ 0.0312,  0.0139,  0.0371,  ...,  0.0255,  0.0159,  0.0978]],
       device='cuda:0')

In [35]:
model.users.shape

torch.Size([7178])

In [33]:
model.items.shape

torch.Size([7178])

In [34]:
model.concat_states.shape

torch.Size([7178, 128])

In [35]:
model.concat_states

tensor([[-3.3687e-01,  9.8331e-01, -9.7956e-01,  ..., -3.6396e-02,
          2.4104e-02, -9.5912e-02],
        [-3.0464e-01, -6.2658e-02, -4.9929e-01,  ...,  1.0602e-02,
          6.0112e-02,  3.2951e-03],
        [-2.0967e-01,  3.7956e-01, -5.5484e-01,  ...,  1.2818e-02,
          2.0604e-02, -1.4120e-04],
        ...,
        [-1.4452e-01, -5.3499e-02, -1.1899e-01,  ...,  2.1447e-02,
          5.7941e-03,  1.1428e-01],
        [-9.7401e-02,  3.1842e-02,  1.4665e-01,  ...,  3.8342e-02,
         -2.0346e-02,  7.9878e-02],
        [ 2.6682e-03,  1.9309e-02, -1.2328e-01,  ...,  3.9916e-02,
          3.2358e-02,  9.4652e-02]], device='cuda:0')

In [37]:
model.concat_states[model.users]

tensor([[-0.3369,  0.9833, -0.9796,  ..., -0.0364,  0.0241, -0.0959],
        [ 0.9808,  1.0000, -0.9821,  ...,  0.0341, -0.3816, -0.2332],
        [-0.0617,  0.9998, -0.9972,  ..., -0.0597, -0.0965, -0.0768],
        ...,
        [-0.2799,  1.0000, -1.0000,  ..., -0.0302,  0.1381,  0.1117],
        [-0.0940,  0.9992, -0.9904,  ...,  0.0135, -0.0629, -0.0124],
        [-0.2346,  0.9743, -0.9402,  ..., -0.0601, -0.0506, -0.1147]],
       device='cuda:0')

In [38]:
model.concat_states[model.users].shape

torch.Size([32, 128])

In [41]:
model.concat_states[model.items].shape

torch.Size([32, 128])

In [43]:
concat = th.cat([model.concat_states[model.users], model.concat_states[model.items]], 1)

In [45]:
# user, item vector들을 합침
concat.shape

torch.Size([32, 256])

### Train_epoch 함수 테스트

In [61]:
model = model
loss_fn = loss_fn
optimizer = optimizer
arr_lambda = args.arr_lambda
loader = train_loader
device = args.device
log_interval = args.train_log_interval

In [62]:
log_interval

100

In [64]:
start_time = time.time()

model.train()

epoch_loss = 0.
iter_loss = 0.
iter_mse = 0.
iter_cnt = 0
iter_dur = []

# 서브그래프 단위로 학습
for iter_idx, batch in enumerate(loader, start=1):
    t_start = time.time()

    inputs = batch[0].to(device)
    labels = batch[1].to(device)
    preds = model(inputs)
    loss = loss_fn(preds, labels).mean() + arr_lambda * adj_rating_reg(model)

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    epoch_loss += loss.item() * preds.shape[0]
    iter_loss += loss.item() * preds.shape[0]
    iter_mse += ((preds - labels) ** 2).sum().item()
    iter_cnt += preds.shape[0]
    iter_dur.append(time.time() - t_start)

    if iter_idx % log_interval == 0:
        print("Iter={}, loss={:.4f}, mse={:.4f}, time={:.4f}".format(
            iter_idx, iter_loss/iter_cnt, iter_mse/iter_cnt, np.average(iter_dur)))
        iter_loss = 0.
        iter_mse = 0.
        iter_cnt = 0

train_epoch_loss = epoch_loss / len(loader.dataset)

print("  Time took: {:}".format(format_time(time.time() - start_time)))

Iter=100, loss=1.0291, mse=1.0282, time=0.0557
Iter=200, loss=0.9530, mse=0.9521, time=0.0567
Iter=300, loss=0.9845, mse=0.9837, time=0.0542
Iter=400, loss=0.9838, mse=0.9830, time=0.0591
Iter=500, loss=1.0093, mse=1.0084, time=0.0615
Iter=600, loss=0.9993, mse=0.9985, time=0.0615
Iter=700, loss=1.0434, mse=1.0426, time=0.0613
Iter=800, loss=1.0067, mse=1.0059, time=0.0598
Iter=900, loss=1.0343, mse=1.0335, time=0.0581
Iter=1000, loss=0.9951, mse=0.9943, time=0.0571
Iter=1100, loss=1.0109, mse=1.0101, time=0.0570
Iter=1200, loss=0.9816, mse=0.9808, time=0.0570
Iter=1300, loss=1.0083, mse=1.0075, time=0.0563
Iter=1400, loss=1.0086, mse=1.0078, time=0.0557
Iter=1500, loss=0.9849, mse=0.9841, time=0.0566
Iter=1600, loss=0.9397, mse=0.9390, time=0.0571
Iter=1700, loss=1.0033, mse=1.0025, time=0.0567
Iter=1800, loss=0.9553, mse=0.9545, time=0.0567
Iter=1900, loss=0.9699, mse=0.9691, time=0.0563
Iter=2000, loss=0.9503, mse=0.9494, time=0.0558
  Time took: 0:02:03


In [66]:
inputs

Graph(num_nodes=6450, num_edges=139018,
      ndata_schemes={'_ID': Scheme(shape=(), dtype=torch.int64), 'nlabel': Scheme(shape=(4,), dtype=torch.float32), 'x': Scheme(shape=(4,), dtype=torch.float32)}
      edata_schemes={'etype': Scheme(shape=(), dtype=torch.int64), '_ID': Scheme(shape=(), dtype=torch.int64), 'edge_mask': Scheme(shape=(), dtype=torch.float32)})

In [72]:
labels

tensor([1., 3., 2., 3., 0., 2., 1., 4., 4., 3., 4., 0., 1., 4., 1., 1., 3., 2.,
        2., 2., 3., 3., 2., 2., 3., 3., 1., 4., 3., 3., 4., 1.],
       device='cuda:0')

In [73]:
loss

tensor(1.3369, device='cuda:0', grad_fn=<AddBackward0>)

In [76]:
preds.shape[0]

32

In [81]:
train_epoch_loss

0.9925650261491538

In [75]:
preds

tensor([2.1965, 3.2606, 2.1261, 2.1432, 1.3766, 2.8473, 3.2015, 3.3880, 3.1641,
        3.2201, 2.5737, 4.1435, 2.3493, 3.9149, 1.9054, 1.8999, 3.1879, 2.9674,
        1.7518, 2.2147, 3.3358, 3.2158, 2.7466, 2.6548, 2.5784, 2.0909, 2.0695,
        3.3606, 2.8916, 4.1838, 4.0277, 2.7520], device='cuda:0',
       grad_fn=<MulBackward0>)

### Evaluate 함수 테스트

In [34]:
model = model
loader = test_loader
device = args.device

In [57]:
start_time = time.time()

# Evaluate RMSE
model.eval()
mse = 0.
for batch in loader:
    with th.no_grad():
        preds = model(batch[0].to(device))
    labels = batch[1].to(device)
    mse += ((preds - labels) ** 2).sum().item()
mse /= len(loader.dataset)
rmse = np.sqrt(mse)

print("  Time took: {:}".format(format_time(time.time() - start_time)))

  Training epoch took: 0:00:25


In [54]:
preds

tensor([3.6446, 2.4995, 3.3400, 4.1142, 2.6222, 2.7281, 2.7435, 2.6823, 2.5650,
        3.0737, 2.6525, 1.7842, 2.4336, 2.8425, 2.9337, 3.1345, 3.2127, 2.4646,
        2.2066, 2.2617, 2.9193, 2.4082, 2.0911, 3.1148, 1.7831, 3.2895, 2.2131,
        2.8884, 2.1818, 3.5460, 2.3100, 2.8405], device='cuda:0')

In [46]:
labels

tensor([4., 2., 3., 4., 2., 2., 3., 1., 3., 3., 3., 2., 2., 3., 3., 3., 4., 3.,
        2., 1., 2., 3., 3., 3., 1., 4., 2., 3., 3., 4., 4., 3.],
       device='cuda:0')

In [58]:
rmse

0.9488357172065862