In [1]:
import argparse
import os, datetime
import torch
import random 
import numpy as np 
import torch.optim
import torch.nn as nn
import torch.utils.data
import torch.nn.functional as F
from torch.autograd import Variable
import torch.backends.cudnn as cudnn
import torchvision.datasets as datasets
import torchvision.transforms as transforms
import matplotlib.pyplot as plt 
from itertools import cycle
import pickle
from sklearn.metrics import roc_auc_score, confusion_matrix
from pytorch_lightning.metrics.functional.classification import f1_score, auroc
from sklearn.metrics import average_precision_score
from sklearn.preprocessing import StandardScaler
import pandas as pd
from sklearn.model_selection import KFold, StratifiedKFold
from sklearn.impute import SimpleImputer
from torch.utils.data import TensorDataset, DataLoader

from resnetv2 import PreActResNet18 as ResNet18  
from utils import Labeled_dataset
from MLP_base import Net as mlp

from xgboost import XGBClassifier
from sklearn.ensemble import RandomForestClassifier

from resblock_searched import Appended_Model

import warnings
warnings.filterwarnings('ignore')

parser = argparse.ArgumentParser(description='PyTorch Cifar10_100 Training')
parser.add_argument('--lr', default=0.1, type=float, help='initial learning rate')
parser.add_argument('--data_dir', help='The directory for data', default='trans_data', type=str)
parser.add_argument('--momentum', default=0.9, type=float, help='momentum')
parser.add_argument('--weight_decay', default=5e-4, type=float, help='weight decay')
parser.add_argument('--epochs', default=100, type=int, help='number of total epochs to run')
parser.add_argument('--print_freq', default=50, type=int, help='print frequency')
parser.add_argument('--decreasing_lr', default='60,80', help='decreasing strategy')
parser.add_argument('--save_dir', help='The directory used to save the trained models', default='cifar10_cil', type=str)
parser.add_argument('--gpu', type=int, default=0, help='gpu device id')
parser.add_argument('--seed', type=int, default=None, help='random seed')
parser.add_argument('--batch_size', default=128, type=int, help='batch size')
parser.add_argument('--load_model', default=False, type=eval, choices=[True, False], help='load last checkpoint to continue training')
parser.add_argument('--drop_r', default=0.3, type=float, help='drop out rate')
parser.add_argument('--out_size', default=10, type=int, help='total possible labels (binary is 1)')
parser.add_argument('--lr_Plateau_factor', default=0.1, type=float, help='torch.optim.lr_scheduler.ReduceLROnPlateau: Factor by which the learning rate will be reduced')
parser.add_argument('--lr_Plateau_patience', default=10, type=int, help='torch.optim.lr_scheduler.ReduceLROnPlateau: Number of epochs with no improvement after which learning rate will be reduced')


best_prec1 = 0



In [2]:
outcome_cols = [
    'death', 'cvd_death', 'time_death', 'anyhosp', 'time_anyhosp',
    'hfhosp', 'time_hfhosp', 'abortedca', 'time_abortedca', 'mi',
    'time_mi', 'stroke', 'time_stroke', 'primary_ep', 'time_primary_ep'
]

In [3]:
con_cat_cols = [
    'GLUCOSE_FAST', 'GLUCOSE_RAND', 'CO2_mmolL', 'GLUCOSE_mgdL','WBC_kuL',
    'HCT_p', 'HB_gdL', 'PLT_kuL', 'ALP_UL', 'TBILI_mgdL', 'ALB_gdL'
]

In [4]:
contin_cols = [
    'BNP_VAL', 'age_entry', 'EF', 'visit_dt1_hf', 'chfdc_dt3', 'mi_dt3',
    'stroke_dt3', 'cabg_dt3', 'pci_dt3', 'DM_AGE_YR', 'DM_DUR_YR', 'cigs',
    'SMOKE_YRS', 'QUIT_YRS', 'HEAVY_MIN', 'HEAVY_WK', 'MED_WK', 'MED_MIN',
    'LIGHT_WK', 'LIGHT_MIN', 'metsperweek', 'cooking_salt_score', 'height',
    'weight', 'waistc', 'HR', 'SBP', 'DBP', 'CR_mgdl', 'gfr', 'labs_dt1',
    'NA_mmolL', 'K_mmolL', 'CL_mmolL', 'BUN_mgdL', 'ALT_UL', 'AST_UL',
    'urine_val_mgg', 'QRS_DUR', 'CR_mgdL', 'BMI'
]

In [5]:
priep = pd.read_csv('/data/datasets/topcat/nch/nn_baseline/primary_ep_set.csv', index_col=0)
death = pd.read_csv('/data/datasets/topcat/nch/nn_baseline/death_set.csv'     , index_col=0)
hfhos = pd.read_csv('/data/datasets/topcat/nch/nn_baseline/hfhosp_set.csv'    , index_col=0)

In [6]:
mode = 2

if mode == 1:
    outcome = 'primary_ep'
    outcome_time = 'time_primary_ep'
    df = priep.copy()
elif mode == 2:
    outcome = 'death'
    outcome_time = 'time_death'
    df = death.copy()
elif mode == 3:
    outcome = 'hfhosp'
    outcome_time = 'time_hfhosp'
    df = hfhos.copy()

labels = df[outcome].copy()
complete_labels = labels.copy()

In [7]:
def validate(val_loader, model, criterion, if_main=False):
    
    losses = AverageMeter()
    aucrocs = AverageMeter()
    aucprs = AverageMeter()
    f1_scores = AverageMeter()
    
    # for confusion matrix
    #pred_ls = torch.empty(len(val_loader.dataset))
    #true_ls = torch.empty(len(val_loader.dataset))
    
    model = model.eval()
    
    for i, (input, target) in enumerate(val_loader):
        #true_ls[val_loader.batch_size * i, val_loader.batch_size * (i+1)] = target
        
        
        input = input.cuda()
        target = target.long().cuda()
        
        with torch.no_grad():
            output = model(input)
            
            if args.out_size == 1:
                output = torch.sigmoid(output)
            
            #loss = criterion(output, target)
            loss = criterion_ExpandTarget(output, target, criterion)
            
            
        output = output.float()
        #pred_ls[val_loader.batch_size * i, val_loader.batch_size * (i+1)] = np.argmax(output, axis=1)
        
        loss = loss.float()    
        
          
        aurocsore = auroc(output[:, 1].data, target)  
        #aurocsore = roc_auc_score(target, output[:, 1])
        #aucprscore = aucpr(output.data, target)  
        #f1score = f1_score(output.data, target, pos_label=1)
        
        
        aucrocs.update(aurocsore.item(), input.size(0))
        #aucprs.update(aucprscore.item(), input.size(0))
        #f1_scores.update(f1score.item(), input.size(0))
        losses.update(loss.item(), input.size(0))

        '''   
        if i % args.print_freq == 0:
            print('Test: [{0}/{1}]  '
                  'Loss {loss.avg:.4f}  '
                  'aucroc {aucrocs.avg:.3f}  '
                  #'aucpr {aucprs.avg:.3f}  '
                  #'f1 {f1_scores.avg:.3f}'
                  .format(i, len(val_loader), loss=losses, 
                          aucrocs=aucrocs, 
                          #aucprs=aucprs, 
                          #f1_scores=f1_scores
                         ))
          
    
    print('aucroc {aucrocs.avg:.3f}  '
          #'aucpr {aucprs.avg:.3f}  '
          #'f1 {f1_scores.avg:.3f}'
          .format(aucrocs=aucrocs, 
                  #aucprs=aucprs, f1_scores=f1_scores
                 ))'''
    
    
    #print_confusion_matrix(pred_ls, true_ls, labels=[0, 1])
    
    #return statistics.mean(f1_score_ls), statistics.mean(accuracy_ls), statistics.mean(auroc_score_ls), statistics.mean(auc_pr_ls)
    return losses.avg, aucrocs.avg
    

In [8]:
def training(rand_loader, new_balance_loader, old_balance_loader, model, criterion, optimizer_BB, optimizer_CB, optimizer_R, epoch):
    
    losses_rand = AverageMeter()
    top1_rand = AverageMeter()
    
    losses_bal = AverageMeter()
    top1_bal = AverageMeter()
    

    coef_old = 0.5
    coef_new = 0.5

    # switch to train mode
    model.train()

    new_balance = iter(new_balance_loader)
    old_balance = iter(old_balance_loader)

    for i, (input, target) in enumerate(rand_loader):

        
        try:
            bal_new_img, bal_new_target = next(new_balance)
        except StopIteration:
            new_balance = iter(new_balance_loader)
            bal_new_img, bal_new_target = next(new_balance)

        try:
            bal_old_img, bal_old_target = next(old_balance)
        except StopIteration:
            old_balance = iter(old_balance_loader)
            bal_old_img, bal_old_target = next(old_balance)
        

        bal_new_img = bal_new_img.cuda()
        bal_old_img = bal_old_img.cuda()
        
        input = input.cuda()

        
        bal_new_target = bal_new_target.long().cuda()
        bal_old_target = bal_old_target.long().cuda()
        
        target = target.long().cuda()

        # random input
        output_gt = model(input, main_fc=False)
        #loss_rand = criterion(output_gt, target)
        loss_rand = criterion_ExpandTarget(output_gt, target, criterion)

        
        # balance inputs
        output_bal_new = model(bal_new_img, main_fc=True)
        output_bal_old = model(bal_old_img, main_fc=True)
        
        
        #loss_new = criterion(output_bal_new, bal_new_target.unsqueeze(1).type_as(output_bal_new))*coef_new
        loss_new = criterion_ExpandTarget(output_bal_new, bal_new_target, criterion)*coef_new
        
        
        #loss_old = criterion(output_bal_old, bal_old_target.unsqueeze(1).type_as(output_bal_old))*coef_old
        loss_old = criterion_ExpandTarget(output_bal_old, bal_old_target, criterion)*coef_old
        
        loss_balance = loss_new + loss_old
        
        # check if any output is NaN
        if tensor_allNaN(output_gt):
            print('output_gt')
            sys.exit()
        if tensor_allNaN(loss_rand):
            print('loss_rand')
            sys.exit()
        
        if tensor_allNaN(output_bal_new):
            print('output_bal_new')
            sys.exit()
        if tensor_allNaN(output_bal_old):
            print('output_bal_old')
            sys.exit()
        if tensor_allNaN(loss_balance):
            print('loss_balance')  
            sys.exit()
        
        
        
        loss = (loss_balance + loss_rand)*0.5
        
        #optimizer.zero_grad()
        optimizer_BB.zero_grad()
        optimizer_CB.zero_grad()
        optimizer_R.zero_grad()
        
        loss.backward()
        
        #optimizer.step()
        optimizer_BB.step()
        optimizer_CB.step()
        optimizer_R.step()

        output = output_gt.float()
        loss = loss.float()
        # measure accuracy and record loss, for balanced classifier
        output_cpu = output.cpu().detach().numpy()
        output_bal_new_cpu = output_bal_new.cpu().detach().numpy()
        output_bal_old_cpu = output_bal_old.cpu().detach().numpy()
        output_bal_cpu = np.concatenate((output_bal_new_cpu, output_bal_old_cpu), axis=0)
        
        target_cpu = target.cpu().detach().numpy()
        bal_new_target_cpu = bal_new_target.cpu().detach().numpy()
        bal_old_target_cpu = bal_old_target.cpu().detach().numpy()
        bal_target_cpu = np.concatenate((bal_new_target_cpu, bal_old_target_cpu), axis=0)
        
        
        #prec1 = auroc(output.data, target)   
        auroc_rand = roc_auc_score(target_cpu, output_cpu[:, 1])
        auroc_bal = roc_auc_score(bal_target_cpu, output_bal_cpu[:, 1])
        
        
        losses_rand.update(loss_rand.item(), input.size(0))        
        losses_bal.update(loss_balance.item(), output_bal_cpu.shape[0])
        top1_rand.update(auroc_rand.item(), input.size(0))
        top1_bal.update(auroc_bal.item(), output_bal_cpu.shape[0])

        '''
        if i % args.print_freq == 0:
            print('Epoch: [{0}][{1}/{2}]\t'
                  'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
                  'auroc {top1_rand.val:.3f} ({top1_rand.avg:.3f})'.format(
                      epoch, i, len(rand_loader), loss=losses_rand, top1_rand=top1_rand))
            

    print('train_accuracy {top1_rand.avg:.3f}'.format(top1_rand=top1_rand))'''

    return losses_rand.avg, losses_bal.avg, top1_bal.avg, top1_rand.avg





In [9]:
# a = torch.tensor([[0.1, 0.9], [0.8, 0.2], [0.7, 0.3]])
# b = torch.tensor([1, 1, 0])
# aucpr(a, b)
# tensor(0.8333, dtype=torch.float64)

def aucpr(pred, true, average='macro'):
    pred = pred.numpy()[:, 1]
    true = true.numpy()
    aucprscore = average_precision_score(true, pred, average=average)
    aucprscore = torch.tensor(aucprscore)
    
    return aucprscore

In [10]:
def print_confusion_matrix(pred, true, labels=[0, 1]):
    pred = pred.numpy()
    y_pred = np.argmax(pred, axis=1)
    y_true = true.numpy()
    
    unique_label = np.unique([y_true, y_pred])
    cmtx = pd.DataFrame(
        confusion_matrix(y_true, y_pred, labels=unique_label), 
        index=['true:{:}'.format(x) for x in unique_label], 
        columns=['pred:{:}'.format(x) for x in unique_label]
    )
    print(cmtx)

In [11]:
class AverageMeter(object):
    """Computes and stores the average and current value"""
    def __init__(self):
        self.reset()

    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count

In [12]:
def tensor_allNaN(tensor):
    all_Nan = torch.isnan(tensor).all()
    if all_Nan:
        return True
    else:
        return False

In [13]:
def save_checkpoint(state, filename='weight.pt'):
    """
    Save the training model
    """
    torch.save(state, filename)




In [14]:
class define_NN_model(object):
    def __init__(self, h_sizes, drop_r, out_size):
        self.h_sizes = h_sizes
        self.drop_r = drop_r
        self.out_size = out_size
    
    def def_model(self):
        model = mlp(self.h_sizes, self.drop_r, self.out_size)
        optimizer = torch.optim.Adam(model.parameters(), args.lr)
        scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=decreasing_lr, gamma=0.1)
        
        return model, optimizer, scheduler

        

In [15]:
# expand target labels to one hot encoding

# Example:
#target = torch.ones([10, 1], dtype=torch.float32)  # 64 classes, batch size = 10
#output = torch.full([10, 2], 1.5)  # A prediction (logit)
#pos_weight = torch.ones([2])  # All weights are equal to 1
#criterion = torch.nn.BCEWithLogitsLoss(pos_weight=pos_weight)

def criterion_ExpandTarget(output, target, criterion):

    nb_classes = 2
    batch_size = output.size()[0]
    
    #target_one_hot = torch.nn.functional.one_hot(target.to(torch.int64))
    
    target_one_hot = torch.FloatTensor(batch_size, nb_classes)
    target_one_hot.zero_()
    target_one_hot = target_one_hot.cuda()
    
    target = target.view(-1,1)
    
    target_one_hot.scatter_(1, target, 1)
    
    loss = criterion(output, target_one_hot.type_as(output)) 
    #loss = criterion(output, target)
    
    return loss



# ==== below ================

### prepare data

In [16]:
global args, best_auroc

In [17]:
# jupyter notebook input workaround
# args = parser.parse_args()
args = parser.parse_args(args=['--save_dir', 'output', 
                               '--data_dir', 'trans_data', 
                               '--gpu', '3', 
                               '--epochs', '20', 
                               '--load_model', 'False',
                               '--seed', '1',
                               '--lr', '0.058711765521739734',
                               '--print_freq', '200',
                               '--out_size', '2',
                               '--batch_size', '440',
                               '--weight_decay', '0.014324063825534989',
                               '--momentum', '0.11292347293920574',
                               '--lr_Plateau_factor', '0.4937429181024774',
                               '--lr_Plateau_patience', '8',                               
                              ])

print(args)

Namespace(batch_size=440, data_dir='trans_data', decreasing_lr='60,80', drop_r=0.3, epochs=20, gpu=3, load_model=False, lr=0.058711765521739734, lr_Plateau_factor=0.4937429181024774, lr_Plateau_patience=8, momentum=0.11292347293920574, out_size=2, print_freq=200, save_dir='output', seed=1, weight_decay=0.014324063825534989)


In [18]:
if args.out_size == 2:
    criterion = nn.BCEWithLogitsLoss()
else:
    criterion = nn.CrossEntropyLoss()

criterion = criterion.cuda()
decreasing_lr = list(map(int, args.decreasing_lr.split(',')))



# model = mlp(h_sizes, args.drop_r, args.out_size)
# model.cuda()

starting_epoch = 0

#optimizer = torch.optim.SGD(model.parameters(), args.lr, momentum=args.momentum, weight_decay=args.weight_decay)

#optimizer = torch.optim.Adam(model.parameters(), args.lr)

#scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=decreasing_lr, gamma=0.1)

In [19]:
skf = StratifiedKFold(n_splits=5, shuffle=False)

auc_df = pd.DataFrame(index=range(5))

for i, (train, test) in enumerate(skf.split(df, labels)):
    best_prec1_fold = 0

    ########## prepare data ##################
    print(f'Fold {i}: {datetime.datetime.now().strftime("%I:%M:%S %p")}  ', end='')
    train_data = df.iloc[train].copy()
    test_data = df.iloc[test].copy()

    train_labels=labels.iloc[train].copy()
    test_labels=labels.iloc[test].copy()

    weights = len(train_labels)/test_labels.sum()
    glm_weights = pd.Series(data=1, index=train_labels.index)
    glm_weights.loc[train_labels==1] = weights

    ## preprocessing: remove ID/label/std=0 columns, mean imputation, normalization
    train_id = train_data['ID'].copy()
    test_id = test_data['ID'].copy()

    train_data.drop(columns=outcome_cols+['ID'], inplace=True)
    test_data.drop(columns= outcome_cols+['ID'], inplace=True)

    #print(f'Fold {i} Imputation')
    #imp = SimpleImputer(missing_values=np.nan, strategy='mean')
    #train_data.values = imp.fit_transform(train_data)
    #test_data.values  = imp.transform(test_data)
    test_data = test_data.fillna(train_data.mean())
    train_data = train_data.fillna(train_data.mean())

    sd_0_cols = train_data.columns[(train_data.std() == 0)]
    train_data.drop(columns=sd_0_cols, inplace=True)
    test_data.drop(columns=sd_0_cols, inplace=True)

    cols_to_scale = [foo for foo in con_cat_cols + contin_cols if foo in train_data.columns]
    scaler = StandardScaler()
    train_data.loc[:,cols_to_scale] = scaler.fit_transform(train_data.loc[:,cols_to_scale])
    test_data.loc[:,cols_to_scale]  = scaler.transform(test_data.loc[:,cols_to_scale])

    ############# prepare data for balanced batch ###############
    pos_idx = np.where(train_labels == 1)[0]
    neg_idx = np.where(train_labels == 0)[0]

    train_data_pos = torch.Tensor(train_data.iloc[pos_idx].values)
    train_data_neg = torch.Tensor(train_data.iloc[neg_idx].values)
    train_data_t = torch.Tensor(train_data.values)

    train_label_pos = torch.Tensor(train_labels.iloc[pos_idx].values)
    train_label_neg = torch.Tensor(train_labels.iloc[neg_idx].values)
    train_labels_t = torch.Tensor(train_labels.values)

    train_Dataset_pos = TensorDataset(train_data_pos, train_label_pos)
    train_Dataset_neg = TensorDataset(train_data_neg, train_label_neg)
    train_Dataset = TensorDataset(train_data_t, train_labels_t)

    train_loader_pos = DataLoader(train_Dataset_pos, batch_size=int(args.batch_size/2), shuffle=True, num_workers=2, pin_memory=True)
    train_loader_neg = DataLoader(train_Dataset_neg, batch_size=int(args.batch_size/2), shuffle=True, num_workers=2, pin_memory=True)
    train_loader = DataLoader(train_Dataset, batch_size=args.batch_size, shuffle=True, num_workers=2, pin_memory=True)
    
    ############# prepare data for balanced batch (validation) ###############
    pos_idx = np.where(test_labels == 1)[0]
    neg_idx = np.where(test_labels == 0)[0]
    neg_idx = np.random.choice(neg_idx, len(pos_idx))
    
    test_data_pos = torch.Tensor(test_data.iloc[pos_idx].values)
    test_data_neg = torch.Tensor(test_data.iloc[neg_idx].values)
    test_data_bal = torch.cat((test_data_pos, test_data_neg), 0)
    test_labels_pos = torch.Tensor(test_labels.iloc[pos_idx].values)
    test_labels_neg = torch.Tensor(test_labels.iloc[neg_idx].values)
    test_labels_bal = torch.cat((test_labels_pos, test_labels_neg), 0)
    valid_Dataset_bal = TensorDataset(test_data_bal, test_labels_bal)
    val_loader_bal = DataLoader(valid_Dataset_bal, batch_size=args.batch_size, shuffle=True, num_workers=2, pin_memory=True)
    

    valid_data_t = torch.Tensor(test_data.values)
    test_labels_t = torch.Tensor(test_labels.values)
    valid_Dataset = TensorDataset(valid_data_t, test_labels_t)
    val_loader = DataLoader(valid_Dataset, batch_size=args.batch_size, shuffle=False, num_workers=2, pin_memory=True)


    ############ XGB ################
    xgb = XGBClassifier()
    xgb.fit(train_data, train_labels)
    auc = roc_auc_score(test_labels, xgb.predict_proba(test_data)[:,1])
    print(f'XGB AUC={auc:.3f}\tNNs:')
    if i == 0:
        auc_df['xgb'] = np.nan
    auc_df['xgb'].iloc[i] = auc

    ############ RF ################
    rf = RandomForestClassifier()
    rf.fit(train_data, train_labels)
    auc = roc_auc_score(test_labels, rf.predict_proba(test_data)[:,1])
    print(f'RF AUC={auc:.3f}\nNNs:')
    if i == 0:
        auc_df['rf'] = np.nan
    auc_df['rf'].iloc[i] = auc


    ############ ResBlock ################
    '''
    model = torch.load('Auto-PyTorch/examples/basics/TOPCAT_search.pt')
    model.cuda()
    auroc_meter = AverageMeter()
    for data, target in val_loader:
        data = data.cuda()
        target = target.long().cuda()
        output = model(data)
        output = torch.sigmoid(output)
        aurocsore = auroc(output.data[:,1], target) 
        auroc_meter.update(aurocsore, target.size(0))
    print(auroc_meter.avg.cpu().detach().numpy())
    if i == 0:
        auc_df['res'] = np.nan
    auc_df['res'].iloc[i] = auroc_meter.avg.cpu().detach().numpy()
    '''

    model = Appended_Model(last_in=79, last_out=2)
    model.cuda()
    optimizer_BB = torch.optim.SGD(model.backbone.parameters(), args.lr * 10e-8, momentum=args.momentum, weight_decay=args.weight_decay)
    scheduler_BB = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer_BB, factor=args.lr_Plateau_factor, patience=args.lr_Plateau_patience)
    
    optimizer_CB = torch.optim.SGD(model.linear_main.parameters(), args.lr, momentum=args.momentum, weight_decay=args.weight_decay)
    scheduler_CB = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer_CB, factor=args.lr_Plateau_factor, patience=args.lr_Plateau_patience)
    
    optimizer_R = torch.optim.SGD(model.linear.parameters(), args.lr* 10e-5, momentum=args.momentum, weight_decay=args.weight_decay)
    scheduler_R = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer_R, factor=args.lr_Plateau_factor, patience=args.lr_Plateau_patience)
    
    

    # [index, acc]
    train_acc_bal = [[],[]]
    train_acc_rand = [[],[]]
    ta_bal = [[],[]]
    ta_imba = [[],[]]

    for epoch in range(starting_epoch, starting_epoch + args.epochs):
        loss_rand_train, loss_bal_train, top1_bal_train, top1_rand_train = training(train_loader, train_loader_pos, train_loader_neg, model, criterion, optimizer_BB, optimizer_CB, optimizer_R, epoch)


        loss_rand_test, prec1_bal_test = validate(val_loader_bal, model, criterion, if_main=True)
        loss_bal_test, prec1_imba_test = validate(val_loader_bal, model, criterion, if_main=False)

        train_acc_bal[0].append(epoch)
        train_acc_rand[0].append(epoch)
        ta_bal[0].append(epoch)
        ta_imba[0].append(epoch)

        train_acc_bal[1].append(top1_bal_train)
        train_acc_rand[1].append(top1_rand_train)
        ta_bal[1].append(prec1_bal_test)
        ta_imba[1].append(prec1_imba_test)

        #scheduler.step(metrics=prec1_bal_test)
        scheduler_BB.step(metrics=prec1_bal_test)
        scheduler_CB.step(metrics=prec1_bal_test)
        scheduler_R.step(metrics=prec1_bal_test)

        # remember best prec@1 and save checkpoint
        # balanced, testing result
        if prec1_bal_test > best_prec1:
            is_best = prec1_bal_test > best_prec1
            best_prec1 = max(prec1_bal_test, best_prec1)
            best_epoch = epoch

        if is_best:
            save_checkpoint({
                'epoch': epoch + 1,
                'state_dict': model.state_dict(),
                'best_prec1': best_prec1,
                'optimizer_BB': optimizer_BB.state_dict(),
                'optimizer_CB': optimizer_CB.state_dict(),
                'optimizer_R': optimizer_R.state_dict(),
                'scheduler_BB': scheduler_BB.state_dict(),
                'scheduler_CB': scheduler_CB.state_dict(),
                'scheduler_R': scheduler_R.state_dict(),
            }, filename=os.path.join(args.save_dir, 'best_model.pt'))

        save_checkpoint({
            'epoch': epoch + 1,
            'state_dict': model.state_dict(),
            'best_prec1': best_prec1,
            'optimizer_BB': optimizer_BB.state_dict(),
            'optimizer_CB': optimizer_CB.state_dict(),
            'optimizer_R': optimizer_R.state_dict(),
            'scheduler_BB': scheduler_BB.state_dict(),
            'scheduler_CB': scheduler_CB.state_dict(),
            'scheduler_R': scheduler_R.state_dict(),
        }, filename=os.path.join(args.save_dir, 'checkpoint.pt'))

        plt.plot(train_acc_bal[0], train_acc_bal[1], label='train_acc_bal')
        plt.plot(train_acc_rand[0], train_acc_rand[1], label='train_acc_rand')
        plt.plot(ta_imba[0], ta_imba[1], label='TA_imba')
        plt.plot(ta_bal[0], ta_bal[1], label='TA_bal')
        plt.legend()
        plt.grid(True)
        plt.savefig(os.path.join(args.save_dir, 'net_train.png'))
        if is_best:
            plt.savefig(os.path.join(args.save_dir, 'net_train_best_epoch'+ str(epoch) + '.png'))
        plt.close()

        # for dataframe
        auc = best_prec1
        if best_prec1 < best_prec1_fold:
            auc = best_prec1_fold
        else:
            best_prec1_fold = best_prec1


    '''
    # quick validation (when without validation function)
    auroc_meter = AverageMeter()
    model.eval()
    for data, target in val_loader:
        data = data.cuda()
        target = target.long().cuda()
        output = model(data)
        output = torch.sigmoid(output)
        aurocsore = auroc(output.data[:,1], target) 
        auroc_meter.update(aurocsore, target.size(0))
    print(auroc_meter.avg.cpu().detach().numpy())
    '''

    if i == 0:
        auc_df['res'] = np.nan

    '''
    # only quick test
    auc_df['res'].iloc[i] = auroc_meter.avg.cpu().detach().numpy()
    '''

    # real training
    auc_df['res'].iloc[i] = best_prec1_fold
    print('AUC=', best_prec1_fold)


auc_scores = auc_df.T
auc_scores['lower_ci'] = auc_df.T.mean(axis=1) - 1.96*auc_df.T.std(axis=1)/auc_df.T.count(axis=1)
auc_scores['mean'] = auc_df.T.mean(axis=1)
auc_scores['upper_ci'] = auc_df.T.mean(axis=1) + 1.96*auc_df.T.std(axis=1)/auc_df.T.count(axis=1)
#auc_scores.sort_values(by='mean', ascending=False)

print(auc_scores)    


Fold 0: 02:40:15 PM  XGB AUC=0.705	NNs:
RF AUC=0.735
NNs:
AUC= 0.7470395565032959
Fold 1: 02:40:44 PM  XGB AUC=0.737	NNs:
RF AUC=0.746
NNs:
AUC= 0.791509211063385
Fold 2: 02:41:15 PM  XGB AUC=0.731	NNs:
RF AUC=0.756
NNs:
AUC= 0.791509211063385
Fold 3: 02:41:46 PM  XGB AUC=0.688	NNs:
RF AUC=0.719
NNs:
AUC= 0.791509211063385
Fold 4: 02:42:18 PM  XGB AUC=0.676	NNs:
RF AUC=0.666
NNs:
AUC= 0.791509211063385
            0         1         2         3         4  lower_ci      mean  \
xgb  0.704557  0.736918  0.731493  0.688345  0.676275  0.697162  0.707518   
rf   0.735279  0.746237  0.755738  0.719303  0.665609  0.710491  0.724433   
res  0.747040  0.791509  0.791509  0.791509  0.791509  0.774819  0.782615   

     upper_ci  
xgb  0.717873  
rf   0.738375  
res  0.790411  


In [20]:
model.linear.parameters()

<generator object Module.parameters at 0x7f5cc6975a40>

In [21]:
auc_df

Unnamed: 0,xgb,rf,res
0,0.704557,0.735279,0.74704
1,0.736918,0.746237,0.791509
2,0.731493,0.755738,0.791509
3,0.688345,0.719303,0.791509
4,0.676275,0.665609,0.791509


In [22]:
auc_scores

Unnamed: 0,0,1,2,3,4,lower_ci,mean,upper_ci
xgb,0.704557,0.736918,0.731493,0.688345,0.676275,0.697162,0.707518,0.717873
rf,0.735279,0.746237,0.755738,0.719303,0.665609,0.710491,0.724433,0.738375
res,0.74704,0.791509,0.791509,0.791509,0.791509,0.774819,0.782615,0.790411
