In [1]:
# -*- coding: utf-8 -*-
# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
# SPDX-License-Identifier: Apache-2.0
import time
import sys
import json
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
from copy import deepcopy
from dgllife.utils import Meter, EarlyStopping
from hyperopt import fmin, tpe
from shutil import copyfile
from torch.optim import Adam
from torch.utils.data import DataLoader
from sklearn.model_selection import train_test_split
from hyper import init_hyper_space
from utils import get_configure, mkdir_p, init_trial_path, \
    split_dataset, collate_molgraphs, load_model, predict, init_featurizer, load_dataset
import torch
import torch.nn.functional as F
from scipy.stats import pearsonr
from sklearn.metrics import roc_curve,roc_auc_score, confusion_matrix, precision_recall_curve, auc, mean_squared_error, \
    r2_score, mean_absolute_error,cohen_kappa_score,accuracy_score,f1_score,matthews_corrcoef,precision_score,recall_score

Using backend: pytorch


In [3]:
start_time = time.time()
torch.backends.cudnn.enabled = True
torch.backends.cudnn.benchmark = True
# patienceNum = 50
# batch_size = 128

In [4]:
class AllMeter(object):
    
    def statistical(self):
        
        y_test = pd.DataFrame(self.y_test)
        y_predict = pd.DataFrame(self.y_predict)
        fpr, tpr, threshold = roc_curve(y_test, y_predict)
        auc_prc = auc(precision_recall_curve(y_test, y_predict, pos_label=1)[1],
                              precision_recall_curve(y_test, y_predict, pos_label=1)[0])
        auc_roc = auc(fpr, tpr)
        output_tran = []
        for x in y_predict[0]:
            if x > 0.5:
                output_tran.append(1)
            else:
                output_tran.append(0)
        acc = accuracy_score(y_test, output_tran)
        recall = recall_score(y_test, output_tran)
        precision = precision_score(y_test, output_tran)
        f1 = f1_score(y_test, output_tran)
        kappa = cohen_kappa_score(y_test,output_tran)   
        mcc = matthews_corrcoef(y_test,output_tran)
        
        c_mat = confusion_matrix(y_test, output_tran)            
        tn, fp, fn, tp = list(c_mat.flatten())
        se = tp / (tp + fn)
        sp = tn / (tn + fp)
        acc_ = (tp + tn) / (tn + fp + fn + tp)          
        recall_ = se
        precision_ = tp / (tp + fp)
        f1_ = 2 * (precision * recall) / (precision + recall) # F1 = 2 * (precision * recall) / (precision + recall)
        mcc_ = (tp * tn - fp * fn) / np.sqrt((tp + fp) * (tp + fn) * (tn + fp) * (tn + fn) + 1e-8)
    
        scores_dict = {}
        scores_dict['auc_prc'] = auc_prc
        scores_dict['acc'] = acc
        scores_dict['auc_roc'] = auc_roc
        scores_dict['recall'] = recall
        scores_dict['precision'] = precision
        scores_dict['f1'] = f1
        scores_dict['kappa'] = kappa
        scores_dict['mcc'] = mcc 

        print(scores_dict) 
        print({'acc':acc_,'recall_':recall_,'precision_':precision_,'f1_':f1_,'mcc_':mcc_})
        return scores_dict
         
    
    def __init__(self, mean=None, std=None):
        self.y_predict = []
        self.y_test = []
            
    def update(self, output, label, mask=None):
            output = torch.sigmoid(output)
            output = output.cpu().detach().numpy()
            label = label.cpu().detach().numpy()
            for i in output:
                self.y_predict.append(i)
            for j in label:
                self.y_test.append(j)        
    def compute_metric(self, metric_name, reduction='mean'):
        if metric_name == 'getAllMetrics':
            return self.statistical()

In [6]:
def run_a_train_epoch(args, epoch, model, data_loader, loss_criterion, optimizer):
    model.train()
    train_meter = Meter()
    all_train_meter = AllMeter()
    for batch_id, batch_data in enumerate(data_loader):
        smiles, bg, labels, masks = batch_data
        if len(smiles) == 1:
            # Avoid potential issues with batch normalization
            continue

        labels, masks = labels.to(args['device']), masks.to(args['device'])
        logits = predict(args, model, bg)
        # Mask non-existing labels
        loss = (loss_criterion(logits, labels) * (masks != 0).float()).mean()
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        train_meter.update(logits, labels, masks)
        all_train_meter.update(logits, labels)
        if batch_id % args['print_every'] == 0:
            print('epoch {:d}/{:d}, batch {:d}/{:d}, loss {:.4f}'.format(
                epoch + 1, args['num_epochs'], batch_id + 1, len(data_loader), loss.item()))
    train_score = np.mean(train_meter.compute_metric(args['metric']))
    print('epoch {:d}/{:d}, training {} {:.4f}'.format(
        epoch + 1, args['num_epochs'], args['metric'], train_score))
    roc_score = np.mean(train_meter.compute_metric(args['metric']))  
    prc_score = np.mean(train_meter.compute_metric('pr_auc_score'))  
    all_score = all_train_meter.compute_metric('getAllMetrics') 
#     return {'roc_auc_score': roc_score, 'pr_auc_score': prc_score, 'all_score': all_score}

def run_an_eval_epoch(args, model, data_loader):
    model.eval()
    eval_meter = Meter()
    all_eval_meter = AllMeter()
    with torch.no_grad():
        for batch_id, batch_data in enumerate(data_loader):
            smiles, bg, labels, masks = batch_data
            labels = labels.to(args['device'])
            logits = predict(args, model, bg)
            eval_meter.update(logits, labels, masks)
            all_eval_meter.update(logits, labels, masks)
    roc_score = np.mean(eval_meter.compute_metric(args['metric']))  # in case of multi-tasks
    prc_score = np.mean(eval_meter.compute_metric('pr_auc_score'))  # in case of multi-task
    all_score = all_eval_meter.compute_metric('getAllMetrics') 
    return {'roc_auc_score': roc_score, 'pr_auc_score': prc_score, 'all_score': all_score}
#     return np.mean(eval_meter.compute_metric(args['metric']))

def main(args, exp_config, train_set, val_set, test_set):
    # Record settings
    exp_config.update({
        'model': args['model'],
        'n_tasks': args['n_tasks'],
        'atom_featurizer_type': args['atom_featurizer_type'],
        'bond_featurizer_type': args['bond_featurizer_type'],
#         'patience': patienceNum,
#         'batch_size':batch_size
    })
    if args['atom_featurizer_type'] != 'pre_train':
        exp_config['in_node_feats'] = args['node_featurizer'].feat_size()
    if args['edge_featurizer'] is not None and args['bond_featurizer_type'] != 'pre_train':
        exp_config['in_edge_feats'] = args['edge_featurizer'].feat_size()

    # Set up directory for saving results
    args = init_trial_path(args)

    train_loader = DataLoader(dataset=train_set, batch_size=exp_config['batch_size'], shuffle=True,
                              collate_fn=collate_molgraphs, num_workers=args['num_workers'])
    val_loader = DataLoader(dataset=val_set, batch_size=exp_config['batch_size'],
                            collate_fn=collate_molgraphs, num_workers=args['num_workers'])
    test_loader = DataLoader(dataset=test_set, batch_size=exp_config['batch_size'],
                             collate_fn=collate_molgraphs, num_workers=args['num_workers'])
    model = load_model(exp_config).to(args['device'])

    loss_criterion = nn.BCEWithLogitsLoss(reduction='none')
    optimizer = Adam(model.parameters(), lr=exp_config['lr'],
                     weight_decay=exp_config['weight_decay'])
    stopper = EarlyStopping(patience=exp_config['patience'],
                            filename=args['trial_path'] + '/model.pth',
                            metric=args['metric'])

    for epoch in range(args['num_epochs']):
        # Train
        run_a_train_epoch(args, epoch, model, train_loader, loss_criterion, optimizer)

        # Validation and early stop
        val_score = run_an_eval_epoch(args, model, val_loader)
        early_stop = stopper.step(val_score['roc_auc_score'], model)
        print('epoch {:d}/{:d}, validation {} {:.4f}, best validation {} {:.4f}'.format(
            epoch + 1, args['num_epochs'], args['metric'],
            val_score['roc_auc_score'], args['metric'], stopper.best_score))

        if early_stop:
            break

    stopper.load_checkpoint(model)
    test_score = run_an_eval_epoch(args, model, test_loader)
    print('test {} {:.4f}'.format(args['metric'], test_score['roc_auc_score']))

    with open(args['trial_path'] + '/eval.txt', 'w') as f:
        f.write('Best val {}: {}\n'.format(args['metric'], stopper.best_score))
        f.write('Test {}: {}\n'.format(args['metric'], test_score['roc_auc_score']))

    with open(args['trial_path'] + '/configure.json', 'w') as f:
        json.dump(exp_config, f, indent=2)

    return args['trial_path'], stopper.best_score

def bayesian_optimization(args, train_set, val_set, test_set):
    # Run grid search
    results = []

    candidate_hypers = init_hyper_space(args['model'])

    def objective(hyperparams):
        configure = deepcopy(args)
        trial_path, val_metric = main(configure, hyperparams, train_set, val_set, test_set)

        if args['metric'] in ['roc_auc_score', 'pr_auc_score']:
            # Maximize ROCAUC is equivalent to minimize the negative of it
            val_metric_to_minimize = -1 * val_metric
        else:
            val_metric_to_minimize = val_metric

        results.append((trial_path, val_metric_to_minimize))

        return val_metric_to_minimize

    fmin(objective, candidate_hypers, algo=tpe.suggest, max_evals=args['num_evals'])
    results.sort(key=lambda tup: tup[1])
    best_trial_path, best_val_metric = results[0]

    return best_trial_path

In [7]:
import argparse
parser = argparse.ArgumentParser()

parser.add_argument('-c', '--csv-path', type=str, required=True,
                    help='Path to a csv file for loading a dataset')
parser.add_argument('-sc', '--smiles-column', type=str, required=True,
                    help='Header for the SMILES column in the CSV file')
parser.add_argument('-lv', '--log-values', action='store_true', default=False,
                    help='Whether to take logarithm of the labels for modeling')
parser.add_argument('-t', '--task-names', default=None, type=str,
                    help='Header for the tasks to model. If None, we will model '
                         'all the columns except for the smiles_column in the CSV file. '
                         '(default: None)')
parser.add_argument('-s', '--split',
                    choices=['scaffold_decompose', 'scaffold_smiles', 'random'],
                    default='scaffold_smiles',
                    help='Dataset splitting method (default: scaffold_smiles). For scaffold '
                         'split based on rdkit.Chem.AllChem.MurckoDecompose, '
                         'use scaffold_decompose. For scaffold split based on '
                         'rdkit.Chem.Scaffolds.MurckoScaffold.MurckoScaffoldSmiles, '
                         'use scaffold_smiles.')
parser.add_argument('-sr', '--split-ratio', default='0.8,0.1,0.1', type=str,
                    help='Proportion of the dataset to use for training, validation and test '
                         '(default: 0.8,0.1,0.1)')
parser.add_argument('-me', '--metric', choices=['roc_auc_score', 'pr_auc_score'],
                        default='roc_auc_score',
                        help='Metric for evaluation (default: roc_auc_score)')
parser.add_argument('-mo', '--model', choices=['GCN', 'GAT', 'Weave', 'MPNN', 'AttentiveFP',
                                               'gin_supervised_contextpred',
                                               'gin_supervised_infomax',
                                               'gin_supervised_edgepred',
                                               'gin_supervised_masking',
                                               'NF'],
                    default='GCN', help='Model to use (default: GCN)')
parser.add_argument('-a', '--atom-featurizer-type', choices=['canonical', 'attentivefp'],
                    default='canonical',
                    help='Featurization for atoms (default: canonical)')
parser.add_argument('-b', '--bond-featurizer-type', choices=['canonical', 'attentivefp'],
                    default='canonical',
                    help='Featurization for bonds (default: canonical)')
parser.add_argument('-n', '--num-epochs', type=int, default=1000,
                    help='Maximum number of epochs allowed for training. '
                         'We set a large number by default as early stopping '
                         'will be performed. (default: 1000)')
parser.add_argument('-nw', '--num-workers', type=int, default=0,
                    help='Number of processes for data loading (default: 0)')
parser.add_argument('-pe', '--print-every', type=int, default=20,
                    help='Print the training progress every X mini-batches')
parser.add_argument('-p', '--result-path', type=str, default='regression_results',
                    help='Path to save training results (default: regression_results)')
parser.add_argument('-ne', '--num-evals', type=int, default=None,
                    help='Number of trials for hyperparameter search (default: None)')
parser.add_argument('-au', '--augmentation', action='store_true', default=False,
                    help='Whether to augmentation')

_StoreTrueAction(option_strings=['-au', '--augmentation'], dest='augmentation', nargs=0, const=True, default=False, type=None, choices=None, help='Whether to augmentation', metavar=None)

In [8]:
GPUNum = '0'
repetitions = 50
seed = 0 
args = parser.parse_args(args=['--csv-path','data/0-CF-2274.csv',
                               '--task-names','active_label',
                               '--smiles-column','SMILES',
                               '--result-path','result/CF-2274_NoAug_GAT_20220906',
                               '--num-evals','50',
                               '--num-epochs','300',
#                                '--split-ratio',
                                '--split','random',                     
                               '--metric','roc_auc_score',
                               '--model','GAT',
#                                '--atom-featurizer-type','attentivefp',
#                                '--bond-featurizer-type','attentivefp',
#                                  '--augmentation',
#                                '--num-workers',
#                                '--print-every',
                                  ]).__dict__
args

{'atom_featurizer_type': 'canonical',
 'augmentation': False,
 'bond_featurizer_type': 'canonical',
 'csv_path': 'data/0-CF-2274.csv',
 'log_values': False,
 'metric': 'roc_auc_score',
 'model': 'GAT',
 'num_epochs': 300,
 'num_evals': 50,
 'num_workers': 0,
 'print_every': 20,
 'result_path': 'result/CF-2274_NoAug_GAT_20220906',
 'smiles_column': 'SMILES',
 'split': 'random',
 'split_ratio': '0.8,0.1,0.1',
 'task_names': 'active_label'}

In [9]:
import os
import shutil

def del_file(filepath):
    """
    :param filepath: 路径
    :return:
    """
    del_list = os.listdir(filepath)
    for f in del_list:
        file_path = os.path.join(filepath, f)
        if os.path.isfile(file_path):
            os.remove(file_path)
        elif os.path.isdir(file_path):
            shutil.rmtree(file_path)
            
path_data = args['result_path']

if not os.path.exists(path_data):
    os.makedirs(path_data)

del_file(path_data)

dirs = args['result_path']+'/saved_model'

if not os.path.exists(dirs):
    os.makedirs(dirs)

In [10]:
# data augmentation
import pandas as pd
import numpy as np
from maxsmi.augmentation_strategies import no_augmentation
from maxsmi.augmentation_strategies import augmentation_with_duplication
from maxsmi.augmentation_strategies import augmentation_without_duplication
from maxsmi.augmentation_strategies import (
    augmentation_with_reduced_duplication
)
from maxsmi.augmentation_strategies import augmentation_maximum_estimation
from sklearn.utils import shuffle

def data_augmentation(dataSet,args):
    smi_col = args['smiles_column']
    task_names = args['task_names'][0]
    df_empty = pd.DataFrame(columns=[smi_col,task_names])
    for index,row in dataSet.iterrows():
        smiles = row[smi_col]
        non_duplicated_smiles = augmentation_with_reduced_duplication(smiles, 50)
        for data in non_duplicated_smiles:
            df = pd.DataFrame({smi_col:data,task_names:float(row[task_names])},index=[0])
            df_empty = df_empty.append(df,ignore_index=True)
    return df_empty

def split_dataset_augmentation(my_df,seed,args):

    training_data, data_test = train_test_split(my_df, test_size=0.1, random_state=seed)
    data_train, data_val = train_test_split(training_data, test_size=0.1, random_state=seed)
    data_test = data_augmentation(data_test,args)
    data_test = shuffle(data_test,random_state=seed) 
    data_train = data_augmentation(data_train,args)
    data_train = shuffle(data_train,random_state=seed)
    data_val = data_augmentation(data_val,args)
    data_val = shuffle(data_val,random_state=seed)
    test_set = load_dataset(args, data_test)
    train_set = load_dataset(args, data_train)
    val_set = load_dataset(args, data_val)
       
    return train_set, val_set, test_set

In [11]:
if torch.cuda.is_available():
    args['device'] = torch.device('cuda:'+ GPUNum)
else:
    args['device'] = torch.device('cpu')

if args['task_names'] is not None:
    args['task_names'] = args['task_names'].split(',')

args = init_featurizer(args)
df = pd.read_csv(args['csv_path'])
mkdir_p(args['result_path'])

Directory result/CF-2274_NoAug_GAT_20220906 already exists.


In [12]:
if args['augmentation']:
    train_set, val_set, test_set = split_dataset_augmentation(df,seed,args)
    args['n_tasks'] = train_set.n_tasks
else:
    dataset = load_dataset(args, df)
    train_set, val_set, test_set = split_dataset(args, dataset,seed)
    args['n_tasks'] = dataset.n_tasks
    
# Whether to take the logarithm of labels for narrowing the range of values
if args['log_values']:
    train_set.labels = train_set.labels.log()
    val_set.labels = val_set.labels.log()
    test_set.labels = test_set.labels.log()

Processing dgl graphs from scratch...
Processing molecule 1000/2274
Processing molecule 2000/2274


In [None]:
if args['num_evals'] is not None:
    assert args['num_evals'] > 0, 'Expect the number of hyperparameter search trials to ' \
                                  'be greater than 0, got {:d}'.format(args['num_evals'])
    print('Start hyperparameter search with Bayesian '
          'optimization for {:d} trials'.format(args['num_evals']))
    trial_path = bayesian_optimization(args, train_set, val_set, test_set)
else:
    print('Use the manually specified hyperparameters')
    exp_config = get_configure(args['model'])
    main(args, exp_config, train_set, val_set, test_set)
    trial_path = args['result_path'] + '/1'

# Copy final
copyfile(trial_path + '/model.pth', args['result_path'] + '/model.pth')
copyfile(trial_path + '/configure.json', args['result_path'] + '/configure.json')
copyfile(trial_path + '/eval.txt', args['result_path'] + '/eval.txt')

with open(args['result_path']+'/configure.json', 'r') as f:
    config = json.load(f)

In [14]:
print('best hyper file: '+ trial_path)  

best hyper file: result/CF-2274_NoAug_GAT_20220906/23


In [15]:
config

{'alpha': 0.6741100115599353,
 'atom_featurizer_type': 'canonical',
 'batch_size': 32,
 'bond_featurizer_type': 'canonical',
 'dropout': 0.1750572819092699,
 'gnn_hidden_feats': 128,
 'in_node_feats': 74,
 'lr': 0.002378781549117981,
 'model': 'GAT',
 'n_tasks': 1,
 'num_gnn_layers': 1,
 'num_heads': 8,
 'patience': 30,
 'predictor_hidden_feats': 128,
 'residual': True,
 'weight_decay': 0.0029333774037381562}

In [16]:
def trainWithHyper (args, exp_config, train_set, val_set, test_set):

    # Record settings
    exp_config.update({
        'model': args['model'],
        'n_tasks': args['n_tasks'],
        'atom_featurizer_type': args['atom_featurizer_type'],
        'bond_featurizer_type': args['bond_featurizer_type'],
#         'patience': patienceNum,
#         'batch_size':batch_size
    })
    if args['atom_featurizer_type'] != 'pre_train':
        exp_config['in_node_feats'] = args['node_featurizer'].feat_size()
    if args['edge_featurizer'] is not None and args['bond_featurizer_type'] != 'pre_train':
        exp_config['in_edge_feats'] = args['edge_featurizer'].feat_size()

    # Set up directory for saving results
#     args = init_trial_path(args)

    train_loader = DataLoader(dataset=train_set, batch_size=exp_config['batch_size'], shuffle=True,
                              collate_fn=collate_molgraphs, num_workers=args['num_workers'])
    val_loader = DataLoader(dataset=val_set, batch_size=exp_config['batch_size'],
                            collate_fn=collate_molgraphs, num_workers=args['num_workers'])
    test_loader = DataLoader(dataset=test_set, batch_size=exp_config['batch_size'],
                             collate_fn=collate_molgraphs, num_workers=args['num_workers'])
    model = load_model(exp_config).to(args['device'])
    
    best_model_file = args['result_path']+'/saved_model/%s_bst_%s.pth' % (args['model'], split)

#     loss_criterion = nn.SmoothL1Loss(reduction='none')
#     optimizer = Adam(model.parameters(), lr=exp_config['lr'],
#                      weight_decay=exp_config['weight_decay'])
    
#     stopper = EarlyStopping(patience=exp_config['patience'],
#                             filename=best_model_file,
#                             metric=args['metric'])
    loss_criterion = nn.BCEWithLogitsLoss(reduction='none')
    optimizer = Adam(model.parameters(), lr=exp_config['lr'],
                     weight_decay=exp_config['weight_decay'])
    stopper = EarlyStopping(patience=exp_config['patience'],
                            filename=best_model_file,
                            metric=args['metric'])
    for epoch in range(args['num_epochs']):
#         # Train
#         run_a_train_epoch(args, epoch, model, train_loader, loss_criterion, optimizer)

#         # Validation and early stop
#         val_score = run_an_eval_epoch(args, model, val_loader)
#         early_stop = stopper.step(val_score[args['metric']], model)
        # Train
        run_a_train_epoch(args, epoch, model, train_loader, loss_criterion, optimizer)

        # Validation and early stop
        val_score = run_an_eval_epoch(args, model, val_loader)
        early_stop = stopper.step(val_score['roc_auc_score'], model)

        if early_stop:
            break

    stopper.load_checkpoint(model)
    
    tr_scores = run_an_eval_epoch(args, model, train_loader)
    val_scores = run_an_eval_epoch(args, model, val_loader)
    te_scores = run_an_eval_epoch(args, model, test_loader)
    
    return tr_scores,val_scores,te_scores

In [None]:
tr_res = []
val_res = []
te_res = []
# for split in range(1, repetitions + 1):
for split in range(1, repetitions + 1):
    
    if args['augmentation']:
        train_set, val_set, test_set = split_dataset_augmentation(df, split,args)
        args['n_tasks'] = train_set.n_tasks
    else:
        train_set, val_set, test_set = split_dataset(args,dataset,split)
        args['n_tasks'] = dataset.n_tasks

    # Whether to take the logarithm of labels for narrowing the range of values
    if args['log_values']:
        train_set.labels = train_set.labels.log()
        val_set.labels = val_set.labels.log()
        test_set.labels = test_set.labels.log()
    print('n_tasks : '+ str(args['n_tasks']))

    tr_scores,val_scores,te_scores = trainWithHyper(args, config, train_set, val_set, test_set)

    tr_res.append(tr_scores);
    val_res.append(val_scores);
    te_res.append(te_scores) 

In [18]:
def getList(res):
    auc_prc_list = []
    acc_list = []
    auc_roc_list = []
    recall_list = []
    precision_list = []
    f1_list = []
    kappa_list = []
    mcc_list = []
    for item in res:
        auc_prc_list.append(item['all_score']['auc_prc'])
        acc_list.append(item['all_score']['acc'])
        auc_roc_list.append(item['all_score']['auc_roc'])
        recall_list.append(item['all_score']['recall'])
        precision_list.append(item['all_score']['precision'])
        f1_list.append(item['all_score']['f1'])
        kappa_list.append(item['all_score']['kappa'])
        mcc_list.append(item['all_score']['mcc'])
    return auc_prc_list,acc_list,auc_roc_list,recall_list,precision_list,f1_list,kappa_list,mcc_list

tr_auc_prc_list,tr_acc_list,tr_auc_roc_list,tr_recall_list,tr_precision_list,tr_f1_list,tr_kappa_list,tr_mcc_list = getList(tr_res)
val_auc_prc_list,val_acc_list,val_auc_roc_list,val_recall_list,val_precision_list,val_f1_list,val_kappa_list,val_mcc_list = getList(val_res)
te_auc_prc_list,te_acc_list,te_auc_roc_list,te_recall_list,te_precision_list,te_f1_list,te_kappa_list,te_mcc_list = getList(te_res)

In [19]:
    # acc auc_roc recall precision f1 kappa mcc
    acc_str = 'acc of training set is {:.3f}±{:.3f}, validation set is {:.3f}±{:.3f}, test set is {:.3f}±{:.3f}'.format(
                    np.mean(tr_acc_list), 
                    np.std(tr_acc_list),
                    np.mean(val_acc_list), 
                    np.std(val_acc_list),
                    np.mean(te_acc_list), 
                    np.std(te_acc_list),
    )
    auc_str = 'auc_roc of training set is {:.3f}±{:.3f}, validation set is {:.3f}±{:.3f}, test set is {:.3f}±{:.3f}'.format(
                    np.mean(tr_auc_roc_list), 
                    np.std(tr_auc_roc_list),
                    np.mean(val_auc_roc_list), 
                    np.std(val_auc_roc_list),
                    np.mean(te_auc_roc_list), 
                    np.std(te_auc_roc_list),
    )
    recall_str = 'recall of training set is {:.3f}±{:.3f}, validation set is {:.3f}±{:.3f}, test set is {:.3f}±{:.3f}'.format(
                    np.mean(tr_recall_list), 
                    np.std(tr_recall_list),
                    np.mean(val_recall_list), 
                    np.std(val_recall_list),
                    np.mean(te_recall_list), 
                    np.std(te_recall_list),
    )
    precision_str = 'precision of training set is {:.3f}±{:.3f}, validation set is {:.3f}±{:.3f}, test set is {:.3f}±{:.3f}'.format(
                    np.mean(tr_precision_list), 
                    np.std(tr_precision_list),
                    np.mean(val_precision_list), 
                    np.std(val_precision_list),
                    np.mean(te_precision_list), 
                    np.std(te_precision_list),
    )
    f1_str = 'f1 of training set is {:.3f}±{:.3f}, validation set is {:.3f}±{:.3f}, test set is {:.3f}±{:.3f}'.format(
                    np.mean(tr_f1_list), 
                    np.std(tr_f1_list),
                    np.mean(val_f1_list), 
                    np.std(val_f1_list),
                    np.mean(te_f1_list), 
                    np.std(te_f1_list),
    )
    kappa_str = 'kappa of training set is {:.3f}±{:.3f}, validation set is {:.3f}±{:.3f}, test set is {:.3f}±{:.3f}'.format(
                    np.mean(tr_kappa_list), 
                    np.std(tr_kappa_list),
                    np.mean(val_kappa_list), 
                    np.std(val_kappa_list),
                    np.mean(te_kappa_list), 
                    np.std(te_kappa_list),
    )
    mcc_str = 'mcc of training set is {:.3f}±{:.3f}, validation set is {:.3f}±{:.3f}, test set is {:.3f}±{:.3f}'.format(
                    np.mean(tr_mcc_list), 
                    np.std(tr_mcc_list),
                    np.mean(val_mcc_list), 
                    np.std(val_mcc_list),
                    np.mean(te_mcc_list), 
                    np.std(te_mcc_list),
    )
    auc_prc_str = 'auc_prc of training set is {:.3f}±{:.3f}, validation set is {:.3f}±{:.3f}, test set is {:.3f}±{:.3f}'.format(
                    np.mean(tr_auc_prc_list), 
                    np.std(tr_auc_prc_list),
                    np.mean(val_auc_prc_list), 
                    np.std(val_auc_prc_list),
                    np.mean(te_auc_prc_list), 
                    np.std(te_auc_prc_list),
    )

In [20]:
print(acc_str)
print(auc_str)
print(recall_str)
print(precision_str)
print(f1_str)
print(kappa_str)
print(mcc_str)
print(auc_prc_str)
print(args['model'])
end_time = time.time()

acc of training set is 0.841±0.014, validation set is 0.841±0.025, test set is 0.834±0.028
auc_roc of training set is 0.812±0.009, validation set is 0.821±0.035, test set is 0.803±0.036
recall of training set is 0.215±0.145, validation set is 0.212±0.163, test set is 0.227±0.162
precision of training set is 0.832±0.137, validation set is 0.806±0.210, test set is 0.767±0.220
f1 of training set is 0.303±0.150, validation set is 0.293±0.161, test set is 0.305±0.165
kappa of training set is 0.253±0.124, validation set is 0.244±0.138, test set is 0.252±0.141
mcc of training set is 0.335±0.096, validation set is 0.326±0.117, test set is 0.323±0.125
auc_prc of training set is 0.584±0.021, validation set is 0.588±0.070, test set is 0.571±0.077
GAT


In [22]:
with open(args['result_path'] + '/output.txt', 'w') as f:
    f.write(acc_str+'\n')
    f.write(auc_str+'\n')
    f.write(recall_str+'\n')
    f.write(precision_str+'\n')
    f.write(f1_str+'\n')
    f.write(kappa_str+'\n')
    f.write(mcc_str+'\n')

In [23]:
tr_acc_list,tr_auc_roc_list,tr_recall_list,tr_precision_list,tr_f1_list,tr_kappa_list,tr_mcc_list,tr_auc_prc_list
val_acc_list,val_auc_roc_list,val_recall_list,val_precision_list,val_f1_list,val_kappa_list,val_mcc_list,tr_auc_prc_list
te_acc_list,te_auc_roc_list,te_recall_list,te_precision_list,te_f1_list,te_kappa_list,te_mcc_list,tr_auc_prc_list

import pandas as pd
import collections
dict1 = {"model: "+args['model']:['acc','auc_roc','recall','precision','f1','kappa','mcc','auc_prc'],
         "Train":[np.mean(tr_acc_list),np.mean(tr_auc_roc_list),np.mean(tr_recall_list),np.mean(tr_precision_list), 
                  np.mean(tr_f1_list),np.mean(tr_kappa_list), np.mean(tr_mcc_list),np.mean(tr_auc_prc_list)],
         "Tr_STD":[np.std(tr_acc_list),np.std(tr_auc_roc_list),np.std(tr_recall_list),np.std(tr_precision_list), 
                  np.std(tr_f1_list),np.std(tr_kappa_list), np.std(tr_mcc_list),np.std(tr_auc_prc_list)],
         "Validation":[np.mean(val_acc_list),np.mean(val_auc_roc_list),np.mean(val_recall_list),np.mean(val_precision_list), 
                  np.mean(val_f1_list),np.mean(val_kappa_list), np.mean(val_mcc_list),np.mean(val_auc_prc_list)],
         "Va_STD":[np.std(val_acc_list),np.std(val_auc_roc_list),np.std(val_recall_list),np.std(val_precision_list), 
                  np.std(val_f1_list),np.std(val_kappa_list), np.std(val_mcc_list),np.std(val_auc_prc_list)],
         "Test":[np.mean(te_acc_list),np.mean(te_auc_roc_list),np.mean(te_recall_list),np.mean(te_precision_list), 
                  np.mean(te_f1_list),np.mean(te_kappa_list), np.mean(te_mcc_list),np.mean(te_auc_prc_list)],
          "Te_STD":[np.std(te_acc_list),np.std(te_auc_roc_list),np.std(te_recall_list),np.std(te_precision_list), 
                  np.std(te_f1_list),np.std(te_kappa_list), np.std(te_mcc_list),np.std(te_auc_prc_list)]}
dict1 = collections.OrderedDict(dict1)
df = pd.DataFrame(dict1,index = None)
df

Unnamed: 0,model: GAT,Train,Tr_STD,Validation,Va_STD,Test,Te_STD
0,acc,0.84099,0.013607,0.840617,0.024712,0.833596,0.028443
1,auc_roc,0.812105,0.009415,0.82052,0.034533,0.802948,0.036275
2,recall,0.215069,0.145378,0.212326,0.16252,0.226736,0.161806
3,precision,0.832409,0.137392,0.805892,0.210109,0.767302,0.219669
4,f1,0.302609,0.149819,0.292673,0.161482,0.305009,0.165336
5,kappa,0.252837,0.123829,0.244438,0.138384,0.251659,0.140835
6,mcc,0.335137,0.096272,0.325791,0.11719,0.322971,0.125435
7,auc_prc,0.583633,0.02115,0.587654,0.070043,0.571404,0.076546


In [24]:
df.to_csv(args['result_path'] + '/output.csv',index = False)

In [25]:
args['result_path']

'result/CF-2274_NoAug_GAT_20220906'

In [26]:
torch.cuda.empty_cache()

In [27]:
print('best hyper file: '+ trial_path)

best hyper file: result/CF-2274_NoAug_GAT_20220906/23
