In [1]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split, KFold
from statistics import mean


from torch.utils.data import DataLoader
import torch

from helpers.data_generation.file_management import read_hdf5
from helpers.data_generation.error_generation_chi2 import Residual, CombineDataset
from helpers.model.helpers_model import NeuralNet

import warnings
warnings.filterwarnings('ignore')

In [2]:
ratio = 0.75
percent = np.array([0.005, 0.015, 0.005])
size = 6000

batch_size = 50

res = Residual()
res.build(size, ratio = ratio, per_error = percent)
print('Data building finished')

KeyboardInterrupt: 

In [None]:
str_ID =  "S"+str(size)+"R"+str(int(ratio*100))
[final_array, metadata] = read_hdf5(str_ID)
metadata ['ID'] = np.arange(0,final_array.shape[0])

data_set = CombineDataset(metadata,'ID','class',final_array)

k_folds = 5 
kfold = KFold(n_splits = k_folds, shuffle = True)
batch_size = 50; max_epoch = 100

print('Reading Data Finished')

## 2. Convolutionnal neural network (CNN)
### 2.0 Baseline

Results:
* Baseline - SGD : 
    * Mean AUCROC - Not normalized : 0.870
    * Mean AUCROC - Normalized : 
* Baseline - Adam : 
    * Mean AUCROC - Not normalized: 0.903
    * Mean AUCROC - Normalized : 0.
* Baseline - SGD/momentum : 
    * Mean AUCROC - Not normalized : 0.906
    * Mean AUCROC - Normalized : 

In [None]:
max_AUC = []
for fold, (train_ids, test_ids) in enumerate(kfold.split(data_set)):
    print(f'FOLD {fold}')
    print('----------------------')
    
    train_subsampler = torch.utils.data.SubsetRandomSampler(train_ids)
    test_subsampler = torch.utils.data.SubsetRandomSampler(test_ids)
    
    loader_train = DataLoader(data_set, batch_size = batch_size, sampler = train_subsampler)
    loader_test = DataLoader(data_set, batch_size = batch_size, sampler = test_subsampler)
    
    netbasic = NeuralNet('BasicCNN', 'SGD/momentum')
    while netbasic.current_epoch < max_epoch:
        netbasic.train(loader_train)
        res = netbasic.test(loader_test, verbose = False)
    max_AUC.append(netbasic.max_met)

print(f'AUC : {mean(max_AUC)}')
    

In [None]:
max_AUC = []
for fold, (train_ids, test_ids) in enumerate(kfold.split(data_set)):
    print(f'FOLD {fold}')
    print('----------------------')
    data_set = CombineDataset(metadata,'ID','class',final_array, False)
    train_subsampler = torch.utils.data.SubsetRandomSampler(train_ids)
    test_subsampler = torch.utils.data.SubsetRandomSampler(test_ids)
    
    loader_train = DataLoader(data_set, batch_size = batch_size, sampler = train_subsampler)
    loader_test = DataLoader(data_set, batch_size = batch_size, sampler = test_subsampler)
    
    netbasic = NeuralNet('BasicCNN', 'SGD/momentum')
    while netbasic.current_epoch < max_epoch:
        netbasic.train(loader_train)
        res = netbasic.test(loader_test, verbose = False)
    max_AUC.append(netbasic.max_met)

print(f'AUC : {mean(max_AUC)}')

In [None]:
max_AUC = []
for fold, (train_ids, test_ids) in enumerate(kfold.split(data_set)):
    print(f'FOLD {fold}')
    print('----------------------')
    
    train_subsampler = torch.utils.data.SubsetRandomSampler(train_ids)
    test_subsampler = torch.utils.data.SubsetRandomSampler(test_ids)
    
    loader_train = DataLoader(data_set, batch_size = batch_size, sampler = train_subsampler)
    loader_test = DataLoader(data_set, batch_size = batch_size, sampler = test_subsampler)
    
    netbasic = NeuralNet('BasicCNN', 'Adam')
    while netbasic.current_epoch < max_epoch:
        netbasic.train(loader_train)
        res = netbasic.test(loader_test, verbose = False)
    max_AUC.append(netbasic.max_met)

print(f'AUC : {mean(max_AUC)}')

In [None]:
max_AUC = []
for fold, (train_ids, test_ids) in enumerate(kfold.split(data_set)):
    print(f'FOLD {fold}')
    print('----------------------')
    data_set = CombineDataset(metadata,'ID','class',final_array, True)
    train_subsampler = torch.utils.data.SubsetRandomSampler(train_ids)
    test_subsampler = torch.utils.data.SubsetRandomSampler(test_ids)
    
    loader_train = DataLoader(data_set, batch_size = batch_size, sampler = train_subsampler)
    loader_test = DataLoader(data_set, batch_size = batch_size, sampler = test_subsampler)
    
    netbasic = NeuralNet('BasicCNN', 'Adam')
    while netbasic.current_epoch < max_epoch:
        netbasic.train(loader_train)
        res = netbasic.test(loader_test, verbose = False)
    max_AUC.append(netbasic.max_met)

print(f'AUC : {mean(max_AUC)}')

In [None]:
max_AUC = []
for fold, (train_ids, test_ids) in enumerate(kfold.split(data_set)):
    print(f'FOLD {fold}')
    print('----------------------')
    
    train_subsampler = torch.utils.data.SubsetRandomSampler(train_ids)
    test_subsampler = torch.utils.data.SubsetRandomSampler(test_ids)
    
    loader_train = DataLoader(data_set, batch_size = batch_size, sampler = train_subsampler)
    loader_test = DataLoader(data_set, batch_size = batch_size, sampler = test_subsampler)
    
    netbasic = NeuralNet('BasicCNN', 'SGD')
    while netbasic.current_epoch < max_epoch:
        netbasic.train(loader_train)
        res = netbasic.test(loader_test, verbose = False)
    max_AUC.append(netbasic.max_met)

print(f'AUC : {mean(max_AUC)}')

### 2.1. Spatial exploitation CNN

Results :
* AlexNet : Epoch : 17 - AUCROC : 0.974 - AUCROC Mass : 0.998 - AUCROC Source 0.949
* VGG16 : Epoch : - AUCROC : 
* GoogleNet : Epoch : - AUCROC : 

In [None]:
max_AUC = []
for fold, (train_ids, test_ids) in enumerate(kfold.split(data_set)):
    print(f'FOLD {fold}')
    print('----------------------')
    
    train_subsampler = torch.utils.data.SubsetRandomSampler(train_ids)
    test_subsampler = torch.utils.data.SubsetRandomSampler(test_ids)
    
    loader_train = DataLoader(data_set, batch_size = batch_size, sampler = train_subsampler)
    loader_test = DataLoader(data_set, batch_size = batch_size, sampler = test_subsampler)
    
    netbasic = NeuralNet('AlexNet', 'SGD/momentum')
    while netbasic.current_epoch < max_epoch:
        netbasic.train(loader_train)
        res = netbasic.test(loader_test, verbose = False)
    max_AUC.append(netbasic.max_met)

print(f'AUC : {mean(max_AUC)}')

print('Finished Training : AlexNet')

In [None]:
max_AUC = []
for fold, (train_ids, test_ids) in enumerate(kfold.split(data_set)):
    print(f'FOLD {fold}')
    print('----------------------')
    
    train_subsampler = torch.utils.data.SubsetRandomSampler(train_ids)
    test_subsampler = torch.utils.data.SubsetRandomSampler(test_ids)
    
    loader_train = DataLoader(data_set, batch_size = batch_size, sampler = train_subsampler)
    loader_test = DataLoader(data_set, batch_size = batch_size, sampler = test_subsampler)
    
    netbasic = NeuralNet('VGG11', 'SGD/momentum')
    while netbasic.current_epoch < max_epoch:
        netbasic.train(loader_train)
        res = netbasic.test(loader_test, verbose = False)
    max_AUC.append(netbasic.max_met)

print(f'AUC : {mean(max_AUC)}')

print('Finished Training : VGG11')

In [None]:
max_AUC = []
for fold, (train_ids, test_ids) in enumerate(kfold.split(data_set)):
    print(f'FOLD {fold}')
    print('----------------------')
    
    train_subsampler = torch.utils.data.SubsetRandomSampler(train_ids)
    test_subsampler = torch.utils.data.SubsetRandomSampler(test_ids)
    
    loader_train = DataLoader(data_set, batch_size = batch_size, sampler = train_subsampler)
    loader_test = DataLoader(data_set, batch_size = batch_size, sampler = test_subsampler)
    
    netbasic = NeuralNet('GoogleNet', 'SGD/momentum')
    while netbasic.current_epoch < max_epoch:
        netbasic.train(loader_train)
        res = netbasic.test(loader_test, verbose = False)
    max_AUC.append(netbasic.max_met)

print(f'AUC : {mean(max_AUC)}')

print('Finished Training : GoogleNet')

### 2.2. Multi-path exploitation CNN

Results :
* ResNet18 : Epoch : - AUCROC : 
* DenseNet161 : Epoch : - AUCROC : 

In [None]:
max_AUC = []
for fold, (train_ids, test_ids) in enumerate(kfold.split(data_set)):
    print(f'FOLD {fold}')
    print('----------------------')
    
    train_subsampler = torch.utils.data.SubsetRandomSampler(train_ids)
    test_subsampler = torch.utils.data.SubsetRandomSampler(test_ids)
    
    loader_train = DataLoader(data_set, batch_size = batch_size, sampler = train_subsampler)
    loader_test = DataLoader(data_set, batch_size = batch_size, sampler = test_subsampler)
    
    netbasic = NeuralNet('ResNet18', 'SGD/momentum')
    while netbasic.current_epoch < max_epoch:
        netbasic.train(loader_train)
        res = netbasic.test(loader_test, verbose = False)
    max_AUC.append(netbasic.max_met)

print(f'AUC : {mean(max_AUC)}')

print('Finished Training : ResNet18')

In [None]:
max_AUC = []
for fold, (train_ids, test_ids) in enumerate(kfold.split(data_set)):
    print(f'FOLD {fold}')
    print('----------------------')
    
    train_subsampler = torch.utils.data.SubsetRandomSampler(train_ids)
    test_subsampler = torch.utils.data.SubsetRandomSampler(test_ids)
    
    loader_train = DataLoader(data_set, batch_size = batch_size, sampler = train_subsampler)
    loader_test = DataLoader(data_set, batch_size = batch_size, sampler = test_subsampler)
    
    netbasic = NeuralNet('DenseNet161', 'SGD/momentum')
    while netbasic.current_epoch < max_epoch:
        netbasic.train(loader_train)
        res = netbasic.test(loader_test, verbose = False)
    max_AUC.append(netbasic.max_met)

print(f'AUC : {mean(max_AUC)}')

print('Finished Training : DenseNet161')

### 2.3. Ressource limited CNNs

In [None]:
max_AUC = []
for fold, (train_ids, test_ids) in enumerate(kfold.split(data_set)):
    print(f'FOLD {fold}')
    print('----------------------')
    
    train_subsampler = torch.utils.data.SubsetRandomSampler(train_ids)
    test_subsampler = torch.utils.data.SubsetRandomSampler(test_ids)
    
    loader_train = DataLoader(data_set, batch_size = batch_size, sampler = train_subsampler)
    loader_test = DataLoader(data_set, batch_size = batch_size, sampler = test_subsampler)
    
    netbasic = NeuralNet('SqueezeNet', 'SGD/momentum')
    while netbasic.current_epoch < max_epoch:
        netbasic.train(loader_train)
        res = netbasic.test(loader_test, verbose = False)
    max_AUC.append(netbasic.max_met)

print(f'AUC : {mean(max_AUC)}')

print('Finished Training : SqueezeNet')