In [68]:
import time
import datetime
# 시간 표시 함수
def format_time(elapsed):
    # 반올림
    elapsed_rounded = int(round((elapsed)))
    # hh:mm:ss으로 형태 변경
    return str(datetime.timedelta(seconds=elapsed_rounded))

start_time = time.time()
print("  Training epoch took: {:}".format(format_time(time.time() - start_time)))

  Training epoch took: 0:00:00


In [69]:
"""Training IGMC model on the MovieLens dataset."""

import os
import sys
import time
import glob
import random
import argparse
from shutil import copy

import numpy as np
import torch as th
import torch.nn as nn
import torch.optim as optim

from model import IGMC
from data_rotten import RottenTomato
from dataset_rotten import RottenTomatoDataset, collate_rotten_tomato
from utils import MetricLogger

In [70]:
def evaluate(model, loader, device):
    # Evaluate RMSE
    model.eval()
    mse = 0.
    for batch in loader:
        # 0.5 level로 변경함.
        with th.no_grad():
            preds = (model(batch[0].to(device)) + 1)/ 2
        labels = (batch[1].to(device) + 1)/ 2
        mse += ((preds - labels) ** 2).sum().item()
    mse /= len(loader.dataset)
    return np.sqrt(mse)

def adj_rating_reg(model):
    arr_loss = 0
    for conv in model.convs:
        weight = conv.weight.view(conv.num_bases, conv.in_feat * conv.out_feat)
        weight = th.matmul(conv.w_comp, weight).view(conv.num_rels, conv.in_feat, conv.out_feat)
        arr_loss += th.sum((weight[1:, :, :] - weight[:-1, :, :])**2)
    return arr_loss

# @profile
def train_epoch(model, loss_fn, optimizer, arr_lambda, loader, device, log_interval):
    model.train()

    epoch_loss = 0.
    iter_loss = 0.
    iter_mse = 0.
    iter_cnt = 0
    iter_dur = []

    for iter_idx, batch in enumerate(loader, start=1):
        t_start = time.time()

        inputs = batch[0].to(device)
        labels = batch[1].to(device)
        preds = model(inputs)
        loss = loss_fn(preds, labels).mean() + arr_lambda * adj_rating_reg(model)
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        epoch_loss += loss.item() * preds.shape[0]
        iter_loss += loss.item() * preds.shape[0]
        iter_mse += ((preds - labels) ** 2).sum().item()
        iter_cnt += preds.shape[0]
        iter_dur.append(time.time() - t_start)

        if iter_idx % log_interval == 0:
            print("Iter={}, loss={:.4f}, mse={:.4f}, time={:.4f}".format(
                iter_idx, iter_loss/iter_cnt, iter_mse/iter_cnt, np.average(iter_dur)))
            iter_loss = 0.
            iter_mse = 0.
            iter_cnt = 0

    return epoch_loss / len(loader.dataset)

def train(args):
    movielens = MovieLens(args.data_name, testing=args.testing,
                            test_ratio=args.data_test_ratio, valid_ratio=args.data_valid_ratio)
    if args.testing:
        test_dataset = RottenTomatoDataset(
            movielens.test_rating_pairs, movielens.test_rating_values, movielens.train_graph, 
            args.hop, args.sample_ratio, args.max_nodes_per_hop) 
    else:
        test_dataset = RottenTomatoDataset(
            movielens.valid_rating_pairs, movielens.valid_rating_values, movielens.train_graph, 
            args.hop, args.sample_ratio, args.max_nodes_per_hop)
    train_dataset = RottenTomatoDataset(
        movielens.train_rating_pairs, movielens.train_rating_values, movielens.train_graph, 
        args.hop, args.sample_ratio, args.max_nodes_per_hop)

    train_loader = th.utils.data.DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True, 
                            num_workers=args.num_workers, collate_fn=collate_movielens)
    test_loader = th.utils.data.DataLoader(test_dataset, batch_size=args.batch_size, shuffle=False, 
                            num_workers=args.num_workers, collate_fn=collate_movielens)

    in_feats = (args.hop+1)*2 #+ movielens.train_graph.ndata['refex'].shape[1]
    model = IGMC(in_feats=in_feats, 
                 latent_dim=[32, 32, 32, 32],
                 num_relations=5, # movielens.num_rating, 
                 num_bases=4, 
                 regression=True, 
                 edge_dropout=args.edge_dropout,
                #  side_features=args.use_features,
                #  n_side_features=n_features,
                #  multiply_by=args.multiply_by
            ).to(args.device)
    loss_fn = nn.MSELoss().to(args.device)
    optimizer = optim.Adam(model.parameters(), lr=args.train_lr, weight_decay=0)
    print("Loading network finished ...\n")

    ### prepare the logger
    logger = MetricLogger(args.save_dir, args.valid_log_interval)
    
    best_epoch = 0
    best_rmse = np.inf
    ### declare the loss information
    print("Start training ...")
    for epoch_idx in range(1, args.train_epochs+1):
        print ('Epoch', epoch_idx)
    
        train_loss = train_epoch(model, loss_fn, optimizer, args.arr_lambda, 
                                train_loader, args.device, args.train_log_interval)
        test_rmse = evaluate(model, test_loader, args.device)
        eval_info = {
            'epoch': epoch_idx,
            'train_loss': train_loss,
            'test_rmse': test_rmse,
        }
        print('=== Epoch {}, train loss {:.6f}, test rmse {:.6f} ==='.format(*eval_info.values()))

        if epoch_idx % args.train_lr_decay_step == 0:
            for param in optimizer.param_groups:
                param['lr'] = args.train_lr_decay_factor * param['lr']

        logger.log(eval_info, model, optimizer)
        if best_rmse > test_rmse:
            best_rmse = test_rmse
            best_epoch = epoch_idx
    eval_info = "Training ends. The best testing rmse is {:.6f} at epoch {}".format(best_rmse, best_epoch)
    print(eval_info)
    with open(os.path.join(args.save_dir, 'log.txt'), 'a') as f:
        f.write(eval_info)

In [71]:
def config():
    parser = argparse.ArgumentParser(description='IGMC')
    # general settings
    parser.add_argument('--testing', action='store_true', default=False,
                        help='if set, use testing mode which splits all ratings into train/test;\
                        otherwise, use validation model which splits all ratings into \
                        train/val/test and evaluate on val only')
    parser.add_argument('--device', default='0', type=int,
                        help='Running device. E.g `--device 0`, if using cpu, set `--device -1`')
    parser.add_argument('--seed', type=int, default=1234, metavar='S',
                        help='random seed (default: 1234)')
    parser.add_argument('--data_name', default='ml-100k', type=str,
                        help='The dataset name: ml-100k, ml-1m')
    parser.add_argument('--data_test_ratio', type=float, default=0.1) # for ml-100k the test ration is 0.2
    parser.add_argument('--num_workers', type=int, default=8)
    parser.add_argument('--data_valid_ratio', type=float, default=0.2)
    # parser.add_argument('--ensemble', action='store_true', default=False,
    #                     help='if True, load a series of model checkpoints and ensemble the results')               
    parser.add_argument('--train_log_interval', type=int, default=100)
    parser.add_argument('--valid_log_interval', type=int, default=10)
    parser.add_argument('--save_appendix', type=str, default='debug', 
                        help='what to append to save-names when saving results')
    # subgraph extraction settings
    parser.add_argument('--hop', default=1, metavar='S', 
                        help='enclosing subgraph hop number')
    parser.add_argument('--sample_ratio', type=float, default=1.0, 
                        help='if < 1, subsample nodes per hop according to the ratio')
    parser.add_argument('--max_nodes_per_hop', type=int, default=200, 
                        help='if > 0, upper bound the # nodes per hop by another subsampling')
    # parser.add_argument('--use_features', action='store_true', default=False,
    #                     help='whether to use node features (side information)')
    # edge dropout settings
    parser.add_argument('--edge_dropout', type=float, default=0.2, 
                        help='if not 0, random drops edges from adjacency matrix with this prob')
    parser.add_argument('--force_undirected', action='store_true', default=False, 
                        help='in edge dropout, force (x, y) and (y, x) to be dropped together')
    # optimization settings
    parser.add_argument('--train_lr', type=float, default=1e-3)
    parser.add_argument('--train_min_lr', type=float, default=1e-6)
    parser.add_argument('--train_lr_decay_factor', type=float, default=0.1)
    parser.add_argument('--train_lr_decay_step', type=int, default=50)
    parser.add_argument('--train_epochs', type=int, default=80)
    parser.add_argument('--batch_size', type=int, default=32)
    parser.add_argument('--arr_lambda', type=float, default=0.001)
    parser.add_argument('--num_rgcn_bases', type=int, default=4)
                
    args = parser.parse_args()
    args.device = th.device(args.device) if args.device >= 0 and th.cuda.is_available() else th.device('cpu')
    
    ### set save_dir according to localtime and test mode
    file_dir = os.path.dirname(os.path.realpath('__file__'))
    val_test_appendix = 'testmode' if args.testing else 'valmode'
    local_time = time.strftime('%y%m%d%H%M', time.localtime())
    args.save_dir = os.path.join(
        file_dir, 'log/{}_{}_{}_{}'.format(
            args.data_name, args.save_appendix, val_test_appendix, local_time
        )
    )
    if not os.path.exists(args.save_dir):
        os.makedirs(args.save_dir) 
    print(args)

    # backup current .py files
    for f in glob.glob(r"*.py"):
        copy(f, args.save_dir)

    # save command line input
    cmd_input = 'python3 ' + ' '.join(sys.argv)
    with open(os.path.join(args.save_dir, 'cmd_input.txt'), 'a') as f:
        f.write(cmd_input)
        f.write("\n")
    print('Command line input: ' + cmd_input + ' is saved.')
    
    return args

In [72]:
# if __name__ == '__main__':
#     args = config()
#     random.seed(args.seed)
#     np.random.seed(args.seed)
#     th.manual_seed(args.seed)
#     if th.cuda.is_available():
#         th.cuda.manual_seed_all(args.seed)
#     train(args)

## 1. Config

In [87]:
import easydict

args = easydict.EasyDict({ 
    'data_name':            'rotten',
    'testing':     	        True,
    'device':      	        0,
    'seed':        	        1234,
    'data_test_ratio':      0.1,
    'num_workers':   	    8,
    'data_valid_ratio':     0.2,
    'train_log_interval':   200,
    'valid_log_interval':   10,
    'save_appendix':   	    'debug',
    'hop':   	            1,
    'sample_ratio':    	    1.0,
    'max_nodes_per_hop':    100,
    'edge_dropout':   	    0.2,
    'force_undirected':     False,
    'train_lr':   	        1e-3,
    'train_min_lr':   	    1e-6,
    'train_lr_decay_factor':0.1,
    'train_lr_decay_step':  50,
    'train_epochs':   	    10,
    'batch_size':   	    32,
    'arr_lambda':   	    0.001,
    'num_rgcn_bases':   	4,
    'train_epochs':   	    5
})

In [88]:
### set save_dir according to localtime and test mode
file_dir = os.path.dirname(os.path.realpath('__file__'))
val_test_appendix = 'testmode' if args.testing else 'valmode'
local_time = time.strftime('%y%m%d%H%M', time.localtime())
args.save_dir = os.path.join(
    file_dir, 'log/{}_{}_{}_{}'.format(
        args.data_name, args.save_appendix, val_test_appendix, local_time
    )
)
if not os.path.exists(args.save_dir):
    os.makedirs(args.save_dir) 
print(args)

# backup current .py files
for f in glob.glob(r"*.py"):
    copy(f, args.save_dir)

# save command line input
cmd_input = 'python3 ' + ' '.join(sys.argv)
with open(os.path.join(args.save_dir, 'cmd_input.txt'), 'a') as f:
    f.write(cmd_input)
    f.write("\n")
print('Command line input: ' + cmd_input + ' is saved.')

{'data_name': 'rotten', 'testing': True, 'device': 0, 'seed': 1234, 'data_test_ratio': 0.1, 'num_workers': 8, 'data_valid_ratio': 0.2, 'train_log_interval': 200, 'valid_log_interval': 10, 'save_appendix': 'debug', 'hop': 1, 'sample_ratio': 1.0, 'max_nodes_per_hop': 100, 'edge_dropout': 0.2, 'force_undirected': False, 'train_lr': 0.001, 'train_min_lr': 1e-06, 'train_lr_decay_factor': 0.1, 'train_lr_decay_step': 50, 'train_epochs': 5, 'batch_size': 32, 'arr_lambda': 0.001, 'num_rgcn_bases': 4, 'save_dir': 'C:\\Users\\user\\Jupyter_project\\keejun\\IGMC_CX\\log/rotten_debug_testmode_2111131905'}
Command line input: python3 C:\Users\user\anaconda3\envs\graph\lib\site-packages\ipykernel_launcher.py -f C:\Users\user\AppData\Roaming\jupyter\runtime\kernel-7883924d-4dcf-438b-ba31-11be03f242f6.json is saved.


In [89]:
args

{'data_name': 'rotten',
 'testing': True,
 'device': 0,
 'seed': 1234,
 'data_test_ratio': 0.1,
 'num_workers': 8,
 'data_valid_ratio': 0.2,
 'train_log_interval': 200,
 'valid_log_interval': 10,
 'save_appendix': 'debug',
 'hop': 1,
 'sample_ratio': 1.0,
 'max_nodes_per_hop': 100,
 'edge_dropout': 0.2,
 'force_undirected': False,
 'train_lr': 0.001,
 'train_min_lr': 1e-06,
 'train_lr_decay_factor': 0.1,
 'train_lr_decay_step': 50,
 'train_epochs': 5,
 'batch_size': 32,
 'arr_lambda': 0.001,
 'num_rgcn_bases': 4,
 'save_dir': 'C:\\Users\\user\\Jupyter_project\\keejun\\IGMC_CX\\log/rotten_debug_testmode_2111131905'}

In [90]:
random.seed(args.seed)
np.random.seed(args.seed)
th.manual_seed(args.seed)
if th.cuda.is_available():
    th.cuda.manual_seed_all(args.seed)    

In [91]:
# start_time = time.time()

# train(args)

# print("  Training epoch took: {:}".format(format_time(time.time() - start_time)))

## 2. Train

In [92]:
### prepare data and set model
path = './raw_data/rotten_tomato/'
rotten_tomato = RottenTomato(path, testing=args.testing,test_ratio=args.data_test_ratio, valid_ratio=args.data_valid_ratio)

Create RottenTomato Class...
	Train rating pairs : 216328
	Valid rating pairs : 43266
	Test rating pairs  : 28766


In [93]:
if args.testing:
    test_dataset = RottenTomatoDataset(
        rotten_tomato.test_rating_pairs, rotten_tomato.test_rating_values, rotten_tomato.train_graph, 
        args.hop, args.sample_ratio, args.max_nodes_per_hop) 
else:
    test_dataset = RottenTomatoDataset(
        rotten_tomato.valid_rating_pairs, rotten_tomato.valid_rating_values, rotten_tomato.train_graph, 
        args.hop, args.sample_ratio, args.max_nodes_per_hop)

In [94]:
train_dataset = RottenTomatoDataset(
    rotten_tomato.train_rating_pairs, rotten_tomato.train_rating_values, rotten_tomato.train_graph, 
    args.hop, args.sample_ratio, args.max_nodes_per_hop)

In [95]:
train_loader = th.utils.data.DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True, 
                        num_workers=args.num_workers, collate_fn=collate_rotten_tomato)
test_loader = th.utils.data.DataLoader(test_dataset, batch_size=args.batch_size, shuffle=False, 
                        num_workers=args.num_workers, collate_fn=collate_rotten_tomato)

In [96]:
in_feats = (args.hop+1)*2 #+ rotten_tomato.train_graph.ndata['refex'].shape[1]
model = IGMC(in_feats=in_feats, 
             latent_dim=[32, 32, 32, 32],
             num_relations=10, # rotten_tomato.num_rating, 
             num_bases=4, 
             regression=True, 
             edge_dropout=args.edge_dropout,
            #  side_features=args.use_features,
            #  n_side_features=n_features,
            #  multiply_by=args.multiply_by
        ).to(args.device)
loss_fn = nn.MSELoss().to(args.device)
optimizer = optim.Adam(model.parameters(), lr=args.train_lr, weight_decay=0)
print("Loading network finished ...\n")

Loading network finished ...



In [97]:
### prepare the logger
logger = MetricLogger(args.save_dir, args.valid_log_interval)

best_epoch = 0
best_rmse = np.inf
### declare the loss information
print("Start training ...")

Start training ...


In [98]:
start_time = time.time()

for epoch_idx in range(1, args.train_epochs+1):
    print ('Epoch', epoch_idx)

    train_loss = train_epoch(model, loss_fn, optimizer, args.arr_lambda, 
                            train_loader, args.device, args.train_log_interval)
    test_rmse = evaluate(model, test_loader, args.device)
    eval_info = {
        'epoch': epoch_idx,
        'train_loss': train_loss,
        'test_rmse': test_rmse,
    }
    print('=== Epoch {}, train loss {:.6f}, test rmse {:.6f} ==='.format(*eval_info.values()))

    if epoch_idx % args.train_lr_decay_step == 0:
        for param in optimizer.param_groups:
            param['lr'] = args.train_lr_decay_factor * param['lr']

    logger.log(eval_info, model, optimizer)
    if best_rmse > test_rmse:
        best_rmse = test_rmse
        best_epoch = epoch_idx

print("  Training epoch took: {:}".format(format_time(time.time() - start_time)))

Epoch 1
Iter=200, loss=4.6878, mse=4.6826, time=0.0423
Iter=400, loss=3.1012, mse=3.0963, time=0.0414
Iter=600, loss=2.9967, mse=2.9920, time=0.0421
Iter=800, loss=3.1726, mse=3.1681, time=0.0428
Iter=1000, loss=3.1550, mse=3.1507, time=0.0432
Iter=1200, loss=2.9857, mse=2.9815, time=0.0434
Iter=1400, loss=2.8858, mse=2.8818, time=0.0437
Iter=1600, loss=2.9334, mse=2.9296, time=0.0436
Iter=1800, loss=2.9084, mse=2.9049, time=0.0436
Iter=2000, loss=3.0356, mse=3.0322, time=0.0437
Iter=2200, loss=2.9112, mse=2.9078, time=0.0440
Iter=2400, loss=2.9881, mse=2.9850, time=0.0444
Iter=2600, loss=2.9258, mse=2.9228, time=0.0442
Iter=2800, loss=2.9469, mse=2.9440, time=0.0442
Iter=3000, loss=2.9679, mse=2.9652, time=0.0443
Iter=3200, loss=2.7732, mse=2.7705, time=0.0442
Iter=3400, loss=2.8574, mse=2.8547, time=0.0444
Iter=3600, loss=2.9217, mse=2.9191, time=0.0454
Iter=3800, loss=2.8487, mse=2.8461, time=0.0458
Iter=4000, loss=2.9135, mse=2.9110, time=0.0457
Iter=4200, loss=2.8737, mse=2.8712, 

In [99]:
eval_info = "Training ends. The best testing rmse is {:.6f} at epoch {}".format(best_rmse, best_epoch)
print(eval_info)
with open(os.path.join(args.save_dir, 'log.txt'), 'a') as f:
    f.write(eval_info)

Training ends. The best testing rmse is 0.804960 at epoch 2


- IGMC의 users, items 확인하기

In [25]:
model.block

Graph(num_nodes=2654, num_edges=25262,
      ndata_schemes={'_ID': Scheme(shape=(), dtype=torch.int64), 'nlabel': Scheme(shape=(4,), dtype=torch.float32), 'x': Scheme(shape=(4,), dtype=torch.float32)}
      edata_schemes={'etype': Scheme(shape=(), dtype=torch.int64), '_ID': Scheme(shape=(), dtype=torch.int64), 'edge_mask': Scheme(shape=(), dtype=torch.float32)})

In [26]:
model.block.ndata

{'_ID': tensor([ 247,  169,  337,  ..., 9177, 9182, 9497], device='cuda:0'), 'nlabel': tensor([[1., 0., 0., 0.],
        [0., 0., 1., 0.],
        [0., 0., 1., 0.],
        ...,
        [0., 0., 0., 1.],
        [0., 0., 0., 1.],
        [0., 0., 0., 1.]], device='cuda:0'), 'x': tensor([[1., 0., 0., 0.],
        [0., 0., 1., 0.],
        [0., 0., 1., 0.],
        ...,
        [0., 0., 0., 1.],
        [0., 0., 0., 1.],
        [0., 0., 0., 1.]], device='cuda:0')}

In [27]:
model.block.ndata['x']

tensor([[1., 0., 0., 0.],
        [0., 0., 1., 0.],
        [0., 0., 1., 0.],
        ...,
        [0., 0., 0., 1.],
        [0., 0., 0., 1.],
        [0., 0., 0., 1.]], device='cuda:0')

In [28]:
model.block.ndata['nlabel']

tensor([[1., 0., 0., 0.],
        [0., 0., 1., 0.],
        [0., 0., 1., 0.],
        ...,
        [0., 0., 0., 1.],
        [0., 0., 0., 1.],
        [0., 0., 0., 1.]], device='cuda:0')

In [29]:
model.block.ndata['nlabel'].shape

torch.Size([2654, 4])

In [30]:
model.block.ndata['nlabel'][:, 0]

tensor([1., 0., 0.,  ..., 0., 0., 0.], device='cuda:0')

In [31]:
model.block_x

tensor([[-1.0000, -1.0000,  1.0000,  ..., -1.0000, -1.0000,  1.0000],
        [-0.9996, -1.0000,  1.0000,  ..., -0.9994, -0.9998,  0.9969],
        [-0.9911, -0.9853,  0.7839,  ..., -0.9915, -0.9761,  0.2078],
        ...,
        [-0.9998, -0.7304,  0.9367,  ..., -0.9985, -0.0034,  0.9997],
        [-0.9998, -0.8659,  0.9912,  ..., -0.9969, -0.2373,  0.9999],
        [-0.9995, -0.9325, -0.9580,  ..., -0.9998, -0.8721, -0.6978]],
       device='cuda:0')

In [32]:
model.users

tensor([ True, False, False,  ..., False, False, False], device='cuda:0')

In [33]:
model.users.shape

torch.Size([2654])

In [34]:
model.items.shape

torch.Size([2654])

In [35]:
model.concat_states.shape

torch.Size([2654, 128])

In [36]:
model.concat_states

tensor([[ 0.1139, -0.2489,  0.8683,  ..., -1.0000, -1.0000,  1.0000],
        [-0.0636,  0.2562,  0.3722,  ..., -0.9994, -0.9998,  0.9969],
        [-0.1214,  0.3484,  0.3754,  ..., -0.9915, -0.9761,  0.2078],
        ...,
        [-0.0840,  0.0495,  0.2329,  ..., -0.9985, -0.0034,  0.9997],
        [-0.1129, -0.1401,  0.2771,  ..., -0.9969, -0.2373,  0.9999],
        [-0.0653, -0.0418,  0.3392,  ..., -0.9998, -0.8721, -0.6978]],
       device='cuda:0')

In [37]:
model.concat_states[model.users]

tensor([[ 0.1139, -0.2489,  0.8683,  ..., -1.0000, -1.0000,  1.0000],
        [-0.1391,  0.3786,  0.4333,  ..., -0.9854, -0.9918,  0.8105],
        [ 0.1877, -0.6180,  0.8853,  ..., -1.0000, -1.0000,  1.0000],
        ...,
        [-0.3070,  0.2917,  0.6954,  ..., -1.0000, -1.0000,  1.0000],
        [-0.3070,  0.2917,  0.6954,  ..., -1.0000, -1.0000,  1.0000],
        [-0.0816,  0.4657,  0.5205,  ..., -1.0000, -1.0000,  1.0000]],
       device='cuda:0')

In [38]:
model.concat_states[model.users].shape

torch.Size([30, 128])

In [39]:
model.concat_states[model.items].shape

torch.Size([30, 128])

In [40]:
concat = th.cat([model.concat_states[model.users], model.concat_states[model.items]], 1)

In [41]:
# user, item vector들을 합침
concat.shape

torch.Size([30, 256])

### Train_epoch 함수 테스트

In [42]:
model = model
loss_fn = loss_fn
optimizer = optimizer
arr_lambda = args.arr_lambda
loader = train_loader
device = args.device
log_interval = args.train_log_interval

In [24]:
log_interval

200

In [25]:
start_time = time.time()

model.train()

epoch_loss = 0.
iter_loss = 0.
iter_mse = 0.
iter_cnt = 0
iter_dur = []

# 서브그래프 단위로 학습
for iter_idx, batch in enumerate(loader, start=1):
    t_start = time.time()

    inputs = batch[0].to(device)
    labels = batch[1].to(device)
    preds = model(inputs)
    loss = loss_fn(preds, labels).mean() + arr_lambda * adj_rating_reg(model)

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    epoch_loss += loss.item() * preds.shape[0]
    iter_loss += loss.item() * preds.shape[0]
    iter_mse += ((preds - labels) ** 2).sum().item()
    iter_cnt += preds.shape[0]
    iter_dur.append(time.time() - t_start)

    if iter_idx % log_interval == 0:
        print("Iter={}, loss={:.4f}, mse={:.4f}, time={:.4f}".format(
            iter_idx, iter_loss/iter_cnt, iter_mse/iter_cnt, np.average(iter_dur)))
        iter_loss = 0.
        iter_mse = 0.
        iter_cnt = 0

train_epoch_loss = epoch_loss / len(loader.dataset)

print("  Time took: {:}".format(format_time(time.time() - start_time)))

Iter=200, loss=2.8242, mse=2.8212, time=0.0387
Iter=400, loss=2.8218, mse=2.8187, time=0.0394
Iter=600, loss=2.8402, mse=2.8370, time=0.0385
Iter=800, loss=2.7299, mse=2.7267, time=0.0385
Iter=1000, loss=2.7731, mse=2.7698, time=0.0384
Iter=1200, loss=2.6972, mse=2.6941, time=0.0384
Iter=1400, loss=2.6883, mse=2.6853, time=0.0382
Iter=1600, loss=2.8233, mse=2.8202, time=0.0381
Iter=1800, loss=2.6687, mse=2.6657, time=0.0384
Iter=2000, loss=2.6620, mse=2.6590, time=0.0386
Iter=2200, loss=2.7521, mse=2.7491, time=0.0387
Iter=2400, loss=2.6550, mse=2.6518, time=0.0385
Iter=2600, loss=2.5922, mse=2.5890, time=0.0386
Iter=2800, loss=2.7456, mse=2.7424, time=0.0388
Iter=3000, loss=2.7262, mse=2.7228, time=0.0396
Iter=3200, loss=2.7200, mse=2.7165, time=0.0403
Iter=3400, loss=2.7538, mse=2.7502, time=0.0402
Iter=3600, loss=2.6883, mse=2.6845, time=0.0400
  Time took: 0:02:40


In [26]:
inputs

Graph(num_nodes=4775, num_edges=125174,
      ndata_schemes={'_ID': Scheme(shape=(), dtype=torch.int64), 'nlabel': Scheme(shape=(4,), dtype=torch.float32), 'x': Scheme(shape=(4,), dtype=torch.float32)}
      edata_schemes={'etype': Scheme(shape=(), dtype=torch.int64), '_ID': Scheme(shape=(), dtype=torch.int64), 'edge_mask': Scheme(shape=(), dtype=torch.float32)})

In [27]:
labels

tensor([7., 3., 5., 6., 8., 6., 7., 8., 5., 6., 5., 7., 7., 9., 9., 9., 5., 7.,
        5., 7., 3., 8., 3., 3., 2., 6., 9., 5., 5., 7., 7., 5.],
       device='cuda:0')

In [28]:
loss

tensor(2.5001, device='cuda:0', grad_fn=<AddBackward0>)

In [29]:
preds.shape[0]

32

In [30]:
train_epoch_loss

2.727407252259081

In [31]:
preds

tensor([7.0923, 5.4808, 6.1765, 8.1413, 7.6566, 4.6887, 6.3527, 6.4477, 6.0566,
        5.4327, 5.4816, 7.4522, 5.6513, 8.6706, 5.8030, 7.2858, 4.5068, 8.8621,
        3.4885, 7.3030, 3.4780, 6.1335, 2.9561, 4.1378, 5.3948, 6.8066, 5.6749,
        5.6852, 4.4658, 7.3438, 9.4554, 7.2134], device='cuda:0',
       grad_fn=<MulBackward0>)

### Evaluate 함수 테스트

In [43]:
model = model
loader = test_loader
device = args.device

In [63]:
start_time = time.time()
predict_ratings = list()
real_ratings = list()

# Evaluate RMSE
model.eval()
mse = 0.
for batch in loader:
    with th.no_grad():
        preds = (model(batch[0].to(device)) + 1)/ 2
    labels = (batch[1].to(device) + 1)/ 2
    mse += ((preds - labels) ** 2).sum().item()
    
    real_ratings.append(labels)
    predict_ratings.append(preds)
    
mse /= len(loader.dataset)
rmse = np.sqrt(mse)

print("  Time took: {:}".format(format_time(time.time() - start_time)))

  Time took: 0:00:27


In [64]:
preds

tensor([2.9968, 3.1041, 2.8940, 3.9403, 4.1405, 3.1857, 3.4408, 4.0894, 2.6228,
        3.2692, 2.8797, 2.1028, 2.8179, 3.5234, 3.3022, 4.0670, 4.0670, 2.9974,
        2.1890, 2.4611, 1.2208, 2.3978, 3.7354, 2.5466, 3.1223, 3.4322, 2.1431,
        2.5878, 2.5878, 3.6192], device='cuda:0')

In [65]:
labels

tensor([3.5000, 3.5000, 3.5000, 5.0000, 4.0000, 4.5000, 4.5000, 4.0000, 4.0000,
        3.5000, 4.5000, 1.0000, 3.0000, 2.5000, 4.0000, 2.0000, 2.0000, 4.0000,
        2.5000, 2.5000, 2.5000, 2.5000, 5.0000, 3.0000, 3.5000, 5.0000, 3.5000,
        3.0000, 3.0000, 4.5000], device='cuda:0')

In [66]:
rmse

0.838736481576567

In [57]:
predict_ratings[0][:10]

tensor([2.7650, 2.8793, 2.5193, 2.8136, 3.7364, 2.8414, 2.8343, 3.5618, 3.6456,
        3.5358], device='cuda:0')

In [58]:
real_ratings[0][:10]

tensor([3.0000, 3.0000, 3.0000, 2.5000, 3.0000, 3.0000, 3.0000, 4.5000, 3.0000,
        2.5000], device='cuda:0')