In [1]:
import numpy as np
import os, gc, json
import torch.nn
from torch.utils.data import DataLoader
from util.input_data import Dataset
from util.AdaBound import AdaBound
from torch.utils.tensorboard import SummaryWriter

def exec_model(
    scale,
    model_type,
    comment='',
    lr = 1e-5,
    wd = 1e-7,
    tries = 1,
    root_model = 'd:/MODELS/202204/nmm',
    root_data  = 'c:/WORKSPACE_KRICT/DATA/data_snu',
    num_epochs = 300,
    batch_size = 128,
    train_ratio = 0.7,
    valid_ratio = 0.2,
):
    gc.collect()

    dataset = Dataset()
    dataset.load_dataset(os.path.join(root_data, f'inputdata_{scale}.pickle'), silent=True)

    for n in range(0, tries):
        rseed  = 35 + n
        train_data, valid_data, test_data = dataset.train_test_split(train_ratio=train_ratio, 
                                                                     valid_ratio=valid_ratio,
                                                                     rseed=rseed)
        train_data_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True, 
                                    collate_fn=tr.collate_fn)
        val_data_loader = DataLoader(valid_data, batch_size=batch_size, collate_fn=tr.collate_fn)
        test_data_loader = DataLoader(test_data, batch_size=batch_size, collate_fn=tr.collate_fn)

        model = DistNN(dataset.n_atom_feats, dataset.n_rdf_feature, dataset.n_bdf_feature).cuda()
        optimizer = AdaBound(model.parameters(), lr=lr, weight_decay=wd)
        criterion = torch.nn.L1Loss()

        for i in range(99):
            root = os.path.join(root_model, model_type)
            if not os.path.isdir(root):
                os.makedirs(root)
            if '{}_{:02d}'.format(scale, i) not in ' '.join(os.listdir(root)):
                output_root = os.path.join(root, '{}_{:02d}'.format(scale, i))
                if len(comment) > 0: output_root += f'_{comment}'
                os.makedirs(output_root)
                break
        print(output_root)
        with open(os.path.join(output_root, 'params.json'),'w') as f:
            json.dump(dict(random_seed=rseed, learning_rate=lr, weight_decay=wd, 
                train_ratio=train_ratio, valid_ratio=valid_ratio, batch_size=batch_size), 
                f, indent=4)
        writer = SummaryWriter(output_root)
        #with torch.no_grad():
        #    dummy = iter(test_data_loader).next()
        #    writer.add_graph(model, dummy[:7])

        for epoch in range(1, num_epochs+1):
            train_loss, train_mae = tr.train(model, optimizer, train_data_loader, criterion)
            valid_loss, valid_mae, _, _, _ = tr.test(model, val_data_loader, criterion)
            if epoch > 20 and train_loss > 1: 
                break
            print('Epoch [{}/{}]\tTrain/Valid Loss: {:.4f} / {:.4f}\tMAE: {:.4f} / {:.4f}'
                    .format(epoch, num_epochs, train_loss, valid_loss, train_mae, valid_mae))

            writer.add_scalar('train/loss', train_loss, epoch)
            writer.add_scalar('train/MAE', train_mae, epoch)
#            writer.add_scalar('train/F1', train_f1, epoch)
            writer.add_scalar('valid/loss', valid_loss, epoch)
            writer.add_scalar('valid/MAE', valid_mae, epoch)
#            writer.add_scalar('valid/F1', valid_f1, epoch)

            if epoch%20 == 0:
                torch.save(model.state_dict(), 
                           os.path.join(output_root, 'model.{:05d}.pt'.format(epoch)))
                _, _, idxs, targets, preds = tr.test(model, test_data_loader, criterion)
                np.savetxt(os.path.join(output_root, 'pred.{:05d}.txt'.format(epoch)), 
                           np.hstack([idxs, targets, preds]), delimiter=',')

In [2]:
from model.model_02r import DistNN
from util import trainer_mix as tr

for scale in ['metal_FFF','metal_TFF','metal_TTT']:
    exec_model(scale=scale, model_type='M02R', comment='L1_logL1', batch_size=256)

d:/MODELS/202204/nmm\M02R\metal_FFF_01_L1_logL1


	add(Number alpha, Tensor other)
Consider using one of the following signatures instead:
	add(Tensor other, *, Number alpha) (Triggered internally at  ..\torch\csrc\utils\python_arg_parser.cpp:1050.)
  grad = grad.add(group['weight_decay'], p.data)


Epoch [1/300]	Train/Valid Loss: 4.3221 / 3.2318	MAE: 1.1447 / 0.8762
Epoch [2/300]	Train/Valid Loss: 2.9084 / 2.6019	MAE: 0.8394 / 0.7657
Epoch [3/300]	Train/Valid Loss: 2.5202 / 3.2411	MAE: 0.7573 / 0.9393
Epoch [4/300]	Train/Valid Loss: 2.4580 / 2.3678	MAE: 0.7176 / 0.8017
Epoch [5/300]	Train/Valid Loss: 2.1267 / 1.9340	MAE: 0.5674 / 0.3816
Epoch [6/300]	Train/Valid Loss: 1.6722 / 1.5347	MAE: 0.3072 / 0.2499
Epoch [7/300]	Train/Valid Loss: 1.5612 / 2.5685	MAE: 0.2470 / 0.3586
Epoch [8/300]	Train/Valid Loss: 1.4781 / 1.4239	MAE: 0.2285 / 0.2078
Epoch [9/300]	Train/Valid Loss: 1.4108 / 1.3461	MAE: 0.2106 / 0.1950
Epoch [10/300]	Train/Valid Loss: 1.3079 / 1.4887	MAE: 0.1949 / 0.1836
Epoch [11/300]	Train/Valid Loss: 1.2645 / 1.1705	MAE: 0.1846 / 0.1782
Epoch [12/300]	Train/Valid Loss: 1.1752 / 1.2678	MAE: 0.1795 / 0.2358
Epoch [13/300]	Train/Valid Loss: 1.1282 / 0.9978	MAE: 0.1742 / 0.1641
Epoch [14/300]	Train/Valid Loss: 1.0796 / 1.1331	MAE: 0.1678 / 0.1901
Epoch [15/300]	Train/Valid Lo