In [None]:
import torch
import torch.nn as nn
import torch.backends.cudnn as cudnn
import torch.optim
import torch.utils.data
from torch.utils.data import Dataset
import torchvision.transforms as transforms
import torchvision.datasets as datasets
import torchvision.models as models
from torch.utils.tensorboard import SummaryWriter

import argparse
import os
import math
import random
import time
import mymodel.model as model
import numpy as np
import glob
import shutil
import pandas as pd
from PIL import Image
import mydataset.mydataset as mydataset
import utils as utils
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score

In [None]:
device=torch.device("cuda" if torch.cuda.is_available() else "cpu")

def seed_torch(seed=7):
    random.seed(seed)
    # os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    if device.type == 'cuda':
        torch.cuda.manual_seed(seed)
        torch.cuda.manual_seed_all(seed) # if you are using multi-GPU.
    torch.backends.cudnn.benchmark = False
    torch.backends.cudnn.deterministic = True

In [None]:
def run(args):
    net = getattr(model, args['arch'])(inputd=args['inputd'])

    parameters = list(filter(lambda p: p.requires_grad, net.parameters()))

    optimizer = torch.optim.Adam(parameters, lr=args['lr'], weight_decay=args['reg'])
    #scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=args['epochs'], eta_min=0, last_epoch=-1, verbose=True)

    train_dataset = getattr(mydataset, args['data'])(train='train', r=args['r'], k=args['k'], keys=args['keys'], split=args['split'])

    train_loader = torch.utils.data.DataLoader(
        train_dataset, batch_size=args['batch_size'], shuffle=True,
        num_workers=32, pin_memory=True)
    
    """
    labels = np.array([])
    for _, label in train_loader:
        labels = np.append(labels, label.item())
    
    #print(labels, np.argwhere(labels == 1))
    pos_weight = len(np.argwhere(labels == 0)) / len(np.argwhere(labels == 1))
    """
    
    # BCE loss, Adam opt, POS_Weight computed using above method (Harcoded to reduce time)
    criterion = nn.BCEWithLogitsLoss(pos_weight = torch.tensor(args['posRate'])).cuda('cuda')
    
    val_dataset = getattr(mydataset, args['data'])(train='val', r=args['r'], k=args['k'], keys=args['keys'], split=args['split'])

    val_loader = torch.utils.data.DataLoader(
        val_dataset, batch_size=args['batch_size'], shuffle=False,
        num_workers=32, pin_memory=True)

    test_dataset = getattr(mydataset, args['data'])(train='test', r=args['r'], k=args['k'], keys=args['keys'], split=args['split'])

    test_loader = torch.utils.data.DataLoader(
        test_dataset, batch_size=args['batch_size'], shuffle=False,
        num_workers=32, pin_memory=True)


    net.cuda()
    writer = SummaryWriter(os.path.join(args['save'], time.strftime("%Y-%m-%d-%H:%M:%S", time.gmtime())))
    
    # set-up early stopping
    EarlyStopping = utils.EarlyStopping(save_dir=args['save'], args=args)
    monitor_values = {'acc':0, 'auc':1, 'loss':4}
    monitor_idx = monitor_values[args['monitor']]
    bestValidationMetrics = None
    
    for epoch in range(args['epochs']):
        
        # train for one epoch
        train(train_loader, net, criterion, optimizer, epoch, args, writer)

        # evaluate on validation set (acc, auc, sen, spe, loss)
        metrics = validate(val_loader, net, epoch, criterion, args, writer, 'val')

        # early stopping based on validation performance
        EarlyStopping(epoch, metrics[monitor_idx], net, optimizer)

        # evaluate on testing set
        if EarlyStopping.early_stop:
            _ = validate(test_loader, net, epoch, criterion, args, writer, 'test')
            print('****Early stop at epoch:{}'.format(epoch-args['patience']))
            break
        else:
            if EarlyStopping.counter == 0:
                bestValidationMetrics = metrics
                best_metrics = validate(test_loader, net, epoch, criterion, args, writer, 'test')
                best_epoch = epoch
            else:
                _ = validate(test_loader, net, epoch, criterion, args, writer, 'test')
        #print("Last LR :", scheduler.get_last_lr())
        #scheduler.step()
    print('****testing result: epoch: {}, acc: {}, auc: {}, sen: {}, spe: {}, loss: {}'.format(best_epoch, best_metrics[0], best_metrics[1], \
    best_metrics[2], best_metrics[3], best_metrics[4]))
    return best_metrics, bestValidationMetrics

def train(train_loader, model, criterion,  optimizer, epoch, args, writer):
    losses = utils.AverageMeter('Loss', ':.4e')

    progress = utils.ProgressMeter(
        len(train_loader),
        [losses],
        prefix="Epoch: [{}]".format(epoch))

    model.train()

    for i, (images, target) in enumerate(train_loader):
        images = images.cuda()
        target = target.cuda().float()

        output, _ = model(images)
        output = output.view(-1).float()
        if i == 0:
            outputs = output
            targets = target
        else:
            outputs = torch.cat((outputs, output), 0)
            targets = torch.cat((targets, target), 0)

        loss = criterion(output, target)
        if args['sd']:
            loss += (args['sd']/2) * torch.norm(output).pow(2)
        
        losses.update(loss.item(), images.size(0))

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if i % args['freq'] == 0:
            progress.display(i)

    acc, sen, spe = utils.accuracy(outputs, targets, args['threshold'], False)

    if writer:
        writer.add_scalar("Loss/train", losses.avg, epoch)
        writer.add_scalar("Accuracy/train", acc, epoch)
        writer.add_scalar("sen/train", sen, epoch)
        writer.add_scalar("spe/train", spe, epoch)


def validate(val_loader, model, epoch, criterion, args, writer, val='val'):
    losses = utils.AverageMeter('Loss', ':.4e')

    model.eval()
    with torch.no_grad():
        for i, (images, target) in enumerate(val_loader):    
            images = images.cuda()
            target = target.cuda().float()

            output, _ = model(images)
            output = output.view(-1).float()
            if i == 0:
                outputs = output
                targets = target
            else:
                outputs = torch.cat((outputs, output), 0)
                targets = torch.cat((targets, target), 0)
            loss = criterion(output, target)
            losses.update(loss.item(), images.size(0))

    acc, sen, spe, auc = utils.accuracy(outputs, targets, args['threshold'])

    if val == 'val':
        print(' **Validation Acc {acc:.3f} sen {sen:.3f} spe {spe:.3f} AUC {auc:.3f} LOSS {loss:.3f}'
            .format(acc=acc, sen=sen, spe=spe, auc=auc, loss=losses.avg))
    else:
        print(' ***Testing Acc {acc:.3f} sen {sen:.3f} spe {spe:.3f} AUC {auc:.3f} LOSS {loss:.3f}'
            .format(acc=acc, sen=sen, spe=spe, auc=auc, loss=losses.avg))

    if writer:
        writer.add_scalar("Loss/"+val, losses.avg, epoch)
        writer.add_scalar("Accuracy/"+val, acc, epoch)
        writer.add_scalar("sen/"+val, sen, epoch)
        writer.add_scalar("spe/"+val, spe, epoch)
        writer.add_scalar("auc/"+val, auc, epoch)

    return acc, auc, sen, spe, losses.avg

In [None]:
args = {
    'model' : '',
    'arch' : 'attmil_ctranspath',
    'data' : 'cam16_curcos',
    'split': 42,
    'batch_size': 1,
    'epochs': 100,
    'inputd': 1024,
    'code': 'cam_17',
    'threshold': 0.5,
    'lr': 2e-4,
    'reg': 1e-5,
    'freq': 100,
    'pretrained': "",
    'patience': 10,
    'stop_epoch': 30,
    'monitor': 'loss',
    'sd': None,
    'r': 0.10,
    'k': -1,
    'posRate': 1.45,
    'keys': 'cam16_indexes_dict_CompareEachFE_non_redundant_threshold_0.95_mean_5.npy'#'indexes_dict_all.npy'
}

if args['sd']:
    print('spectrum decoupling')  

save_code = './CM16/results/runs/'+args["code"]
if not os.path.exists(save_code):
    os.mkdir(save_code)
        
for split in range(42, 47): 
    
    args["split"] = split
    save_dir = save_code + '/' + str(args["split"])
    if not os.path.exists(save_dir):
        os.mkdir(save_dir) 
        
    testRes = {'k':[], 'r':[], 'auc':[], 'acc':[], 'sen':[], 'spe': []}
    validationRes = {'k':[], 'r':[], 'auc':[], 'acc':[], 'sen':[], 'spe': []}
    
    for i in [[-1, "max"], [-5, "mean_5"], [-10, "mean_10"], [-20, "mean_20"], [-50, "mean_50"], [-100, "mean_100"], [-150, "mean_150"]]:   
        for j in [0.1, 0.2, 0.3, 0.5, 0.7]:
            
            seed_torch(7)
            
            print(i[0], "_", j)
            args["k"] = i[0]
            args["r"] = j
            args["keys"] = "indexes_dict_CompareEachFE_non_redundant_threshold_0.95_{}.npy".format(i[1])
            args["save"] = os.path.join(save_dir, str(-1*i[0])+'_'+str(j))
        
            testMetrics, validationMetrics = run(args)
            
            testRes['k'].append(i[1])
            testRes['r'].append(j)
            testRes['auc'].append(testMetrics[1])
            testRes['acc'].append(testMetrics[0])
            testRes['sen'].append(testMetrics[2])
            testRes['spe'].append(testMetrics[3])
            
            validationRes['k'].append(i[1])
            validationRes['r'].append(j)
            validationRes['auc'].append(validationMetrics[1])
            validationRes['acc'].append(validationMetrics[0])
            validationRes['sen'].append(validationMetrics[2])
            validationRes['spe'].append(validationMetrics[3])
            
    df = pd.DataFrame(testRes)
    df.to_csv(os.path.join(save_dir, 'testResults.csv'))
    
    df = pd.DataFrame(validationRes)
    df.to_csv(os.path.join(save_dir, 'validationResults.csv'))