In [2]:
import numpy as np
import os, gc, json
import torch.nn
from torch.utils.data import DataLoader
from util.input_data import Dataset
from util.AdaBound import AdaBound
from torch.utils.tensorboard import SummaryWriter

def exec_model(
    scale,
    model_type,
    dataset,
    comment='',
    lr = 1e-5,
    wd = 1e-7,
    tries = 1,
    root_model = 'd:/MODELS/202204/nmm',
    num_epochs = 300,
    batch_size = 128,
    train_ratio = 0.7,
    valid_ratio = 0.2,
    metal_ratio = 1,
):
    gc.collect()
    torch.cuda.empty_cache()

    for n in range(0, tries):
        rseed  = 35 + n
        train_data, valid_data, test_data = dataset.train_test_split(train_ratio=train_ratio, 
                                                                     valid_ratio=valid_ratio,
                                                                     rseed=rseed,
                                                                     metal_ratio=metal_ratio)
        train_data_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True, 
                                    collate_fn=tr.collate_fn)
        val_data_loader = DataLoader(valid_data, batch_size=batch_size, collate_fn=tr.collate_fn)
        test_data_loader = DataLoader(test_data, batch_size=batch_size, collate_fn=tr.collate_fn)

        model = DistNN(dataset.n_atom_feats, dataset.n_rdf_feature, dataset.n_bdf_feature).cuda()
        optimizer = AdaBound(model.parameters(), lr=lr, weight_decay=wd)
        criterion = torch.nn.L1Loss()

        for i in range(99):
            root = os.path.join(root_model, model_type)
            if not os.path.isdir(root):
                os.makedirs(root)
            if '{}_{:02d}'.format(scale, i) not in ' '.join(os.listdir(root)):
                output_root = os.path.join(root, '{}_{:02d}'.format(scale, i))
                if len(comment) > 0: output_root += f'_{comment}'
                os.makedirs(output_root)
                break
        print(output_root)
        with open(os.path.join(output_root, 'params.json'),'w') as f:
            json.dump(dict(random_seed=rseed, learning_rate=lr, weight_decay=wd, 
                train_ratio=train_ratio, valid_ratio=valid_ratio, batch_size=batch_size,
                metal_ratio=metal_ratio), 
                f, indent=4)
        writer = SummaryWriter(output_root)
        #with torch.no_grad():
        #    dummy = iter(test_data_loader).next()
        #    writer.add_graph(model, dummy[:7])

        for epoch in range(1, num_epochs+1):
            train_loss, train_mae = tr.train(model, optimizer, train_data_loader, criterion)
            valid_loss, valid_mae, valid_rmse, _, _, _ = tr.test(model, val_data_loader, criterion)

            print('Epoch [{}/{}]\tTrain / Valid Loss: {:.4f} / {:.4f}\tMAE: {:.4f} / {:.4f}'
                    .format(epoch, num_epochs, train_loss, valid_loss, train_mae, valid_mae))

            writer.add_scalar('train/loss', train_loss, epoch)
            writer.add_scalar('train/MAE', train_mae, epoch)
#            writer.add_scalar('train/F1', train_f1, epoch)
            writer.add_scalar('valid/loss', valid_loss, epoch)
            writer.add_scalar('valid/MAE', valid_mae, epoch)
#            writer.add_scalar('valid/F1', valid_f1, epoch)

            if epoch%20 == 0:
                torch.save(model.state_dict(), 
                           os.path.join(output_root, 'model.{:05d}.pt'.format(epoch)))
                _, _, idxs, targets, preds = tr.test(model, test_data_loader, criterion)
                np.savetxt(os.path.join(output_root, 'pred.{:05d}.txt'.format(epoch)), 
                           np.hstack([idxs, targets, preds]), delimiter=',')

In [10]:
from model.model_02r import DistNN
from util import trainer_mix2 as tr
dataset = Dataset()

for scale in ['metal_TTT']:
    dataset.load_dataset(f'c:/WORKSPACE_KRICT/DATA/data_snu/inputdata_{scale}.pickle', True)
    for mr in [0.2, 0.4, 0.6, 1]:
        exec_model(scale=scale, model_type='M02R', dataset=dataset, 
                   comment='M2_L1_logL1', metal_ratio=mr, batch_size=256)

20% of metal data used - 1649 metal with 10387 insulator (14%)
d:/MODELS/202204/nmm\M02R\metal_TTT_06_M2_L1_logL1
Epoch [1/300]	Train/Valid Loss: 4.2565 / 3.4001	MAE: 2.1070 / 1.8546
Epoch [2/300]	Train/Valid Loss: 2.6597 / 2.1957	MAE: 1.3223 / 1.0979
Epoch [3/300]	Train/Valid Loss: 2.0696 / 1.8098	MAE: 0.9336 / 0.8652
Epoch [4/300]	Train/Valid Loss: 1.7087 / 1.3129	MAE: 0.7272 / 0.6201
Epoch [5/300]	Train/Valid Loss: 1.1370 / 0.8584	MAE: 0.4950 / 0.4508
Epoch [6/300]	Train/Valid Loss: 0.8620 / 0.6081	MAE: 0.3279 / 0.2629
Epoch [7/300]	Train/Valid Loss: 0.7275 / 0.5668	MAE: 0.2685 / 0.2646
Epoch [8/300]	Train/Valid Loss: 0.7508 / 0.5897	MAE: 0.2801 / 0.2563
Epoch [9/300]	Train/Valid Loss: 0.6589 / 0.5572	MAE: 0.2419 / 0.2456
Epoch [10/300]	Train/Valid Loss: 0.6443 / 0.5483	MAE: 0.2325 / 0.2495
Epoch [11/300]	Train/Valid Loss: 0.6292 / 0.4905	MAE: 0.2254 / 0.2288
Epoch [12/300]	Train/Valid Loss: 0.6141 / 0.5017	MAE: 0.2215 / 0.2240
Epoch [13/300]	Train/Valid Loss: 0.6047 / 0.5226	MAE: 0