In [2]:
import glob
import pickle
import numpy as np
import pandas as pd
import seaborn as sns
from src.SVM_baseline import SVM_experiment
from sklearn import metrics

# SVM result

In [None]:
dict_SVM_result = {'dataset': [], 'perf_name': [], 'perf_val': []}
list_dataset = ['DB_S']
list_C = [0.0001, 0.001, 0.01, 0.1, 1., 10., 100., 1000.]
list_ifold = range(5)


for dataset in list_dataset:
    y_filename = 'data/' + dataset + '/' + dataset + '_y.npy'
    Y = np.load(y_filename)
    list_folds = pickle.load(open('data/' + dataset + '/' + dataset + '_folds.data', 'rb'))
    temp = {}
    best_param, best_perf = None, 0.
    for C in list_C:
        temp[C] = {'auc': [], 'aupr': []}
        for ifold in list_ifold:
            y_te = Y[list_folds[ifold]]
            name = 'result/SVM_exp_' + dataset + '_C:' + str(C) + '_i:' + str(ifold) + '.data'
            model = pickle.load(name, 'rb')
            list_pred = model.cv_nested_pred
            auc, aupr = [], []
            for pred in list_pred:
                auc.append(metrics.roc_auc_score(y_true=y_te, y_score=pred))
                aupr.append(metrics.average_precision_score(y_true=y_te, y_score=pred))
            temp[C]['auc'] += auc
            temp[C]['aupr'] += aupr
        if np.mean(temp[C]['aupr']) > best_perf:
            best_param, best_perf = C, np.mean(temp[C]['aupr'])
    for i in range(len(temp[best_param]['aupr'])):
        dict_SVM_result['dataset'].append(dataset)
        dict_SVM_result['perf_name'].append('auc')
        dict_SVM_result['perf_val'].append(auc)
        dict_SVM_result['dataset'].append(dataset)
        dict_SVM_result['perf_name'].append('aupr')
        dict_SVM_result['perf_val'].append(aupr)    

In [None]:
df_SVM = pd.DataFrame(dict_SVM_result)
df_SVM.head(5)

In [None]:
%matplotlib inline
f, list_ax = plt.subplots(1, 1, figsize=(7, 7))
sns.boxplot(x="dataset", y="perf_val", hue="perf_name", data=df_SVM, palette="Set3")

# ST NN

In [None]:
def get_param_dict(filename):
    file_list = filename.split('/')[1].split('_')
    print(file_list)
    list_cle = ['dataset', 'P_architecture', 'M_architecture',
                'batch_size', 'init_lr', 'decay_steps',
                'lr_decay_factor', 'early_stopping_counter', 'nb_epochs',
                'nb_fully_con_units', 'dropout_keep_prob', 'balance_class', 'stream_type']
    param_dict = {cle: file_list[i] for i, cle in enumerate(list_cle)}
    if param_dict['M_architecture'] == 'ConvModel':
        list_cle += ['M_nb_emb_layers', 'M_hidden_units_emb', 'M_hidden_units_up', 'M_l2_reg_coef']

    if param_dict['P_architecture'] == 'ConvModel':
        list_cle += ['P_nb_filters', 'P_filter_size', 'P_l2_reg_coef']
        if self.P_conv_strides != 1:
            list_cle.append('P_conv_strides')
    param_dict = {cle: file_list[i] for i, cle in enumerate(list_cle)}

    return param_dict


In [None]:
param_reg = ['M_l2_reg_coef', 'P_l2_reg_coef', 'dropout_keep_prob']

dict_param_per_archi = {'P_ConvModel': ['P_nb_filters', 'P_filter_size', 'P_conv_strides', 'nb_fully_con_units'],
                        'M_ConvModel': ['M_nb_emb_layers', 'M_hidden_units_emb', 'M_hidden_units_up', 'nb_fully_con_units']}

param_per_learning_process = ['batch_size', 'init_lr', 'lr_decay_factor']

In [None]:
from src.param_selection.gridsearch import str_model

In [None]:
#list_dataset = ['CellLoc', 'SCOPe']
#list_architecture = ['ConvModel', 'ConvConcatModel', 'biConvModel']
#list_init_lr = [0.001, 0.0001, 0.00001]
#list_lr_decay_factor = [0.999, 0.99, 0.9, 0.8]

dict_result_lp = {'dataset': [], 'model': [], 'archi_param': [], 'learning_param': [], 'perf_name': [], 'perf_val': []}
dict_result_ap_best, dict_result_mp_best = {}, {}



for file in glob.glob("result/*"):
    param_dict = get_param_dict(file)
    if param_dict is not None and param_dict['stream_type'] == 'TFrecords':
        auc, aupr = pickle.load(open(file, 'rb'))
        auc, aupr = auc[1], aupr[1]  # we take outer test result
        print(file)
        print(param_dict)
        dataset, archi = param_dict['dataset'], '_&&_'.join([param_dict['P_architecture'], param_dict['M_architecture']])
        archi_param = '_'.join([str(param_dict[cle]) for cle in dict_param_per_archi[param_dict['P_architecture']]]) + \
            '&& ' + '_'.join([str(param_dict[cle]) for cle in dict_param_per_archi[param_dict['M_architecture']]])
        learning_param = '_'.join([str(param_dict[cle]) for cle in param_per_learning_process])
        
        reg_param = '_'.join([str(param_dict[cle]) for cle in param_reg])
        learning_param = '_;_'.join([learning_param, reg_param])
        
        print('####', archi_param)
        for i in range(len(auc)):
            dict_result_lp['dataset'].append(dataset)
            dict_result_lp['model'].append(archi)
            dict_result_lp['archi_param'].append(archi_param)
            dict_result_lp['learning_param'].append(learning_param)
            dict_result_lp['perf_name'].append('auc')
            dict_result_lp['perf_val'].append(np.mean(auc[i]))
        for i in range(len(aupr)):
            dict_result_lp['dataset'].append(dataset)
            dict_result_lp['model'].append(archi)
            dict_result_lp['archi_param'].append(archi_param)
            dict_result_lp['learning_param'].append(learning_param)
            dict_result_lp['perf_name'].append('aupr')
            dict_result_lp['perf_val'].append(np.mean(aupr[i]))

        if dataset not in dict_result_ap_best.keys():
            dict_result_ap_best[dataset] = {}
        if archi not in dict_result_ap_best[dataset].keys():
            dict_result_ap_best[dataset][archi] = {}
        if archi_param not in dict_result_ap_best[dataset][archi].keys():
            dict_result_ap_best[dataset][archi][archi_param] = [0., [], []]
        if np.mean([np.mean(aupr[i]) for i in range(len(aupr))]) > dict_result_ap_best[dataset][archi][archi_param][0]:
            dict_result_ap_best[dataset][archi][archi_param][0] = np.mean([np.mean(aupr[i]) for i in range(len(aupr))])
            dict_result_ap_best[dataset][archi][archi_param][1] = auc
            dict_result_ap_best[dataset][archi][archi_param][2] = aupr

        if dataset not in dict_result_mp_best.keys():
            dict_result_mp_best[dataset] = {}
        if archi not in dict_result_mp_best[dataset].keys():
            dict_result_mp_best[dataset][archi] = [0., [], []]
        if np.mean([np.mean(aupr[i]) for i in range(len(aupr))]) > dict_result_mp_best[dataset][archi][0]:
            dict_result_mp_best[dataset][archi][0] = np.mean([np.mean(aupr[i]) for i in range(len(aupr))])
            dict_result_mp_best[dataset][archi][1] = auc
            dict_result_mp_best[dataset][archi][2] = aupr

dict_result_ap = {'dataset': [], 'model': [], 'archi_param': [], 'perf_name': [], 'perf_val': []}
for dataset in dict_result_ap_best.keys():
    for archi in dict_result_ap_best[dataset].keys():
        for archi_param in dict_result_ap_best[dataset][archi].keys():
            auc, aupr = dict_result_ap_best[dataset][archi][archi_param][1], dict_result_ap_best[dataset][archi][archi_param][2]
            for i in range(len(auc)):
                dict_result_ap['dataset'].append(dataset)
                dict_result_ap['model'].append(archi)
                dict_result_ap['archi_param'].append(archi_param)
                dict_result_ap['perf_name'].append('auc')
                dict_result_ap['perf_val'].append(np.mean(auc[i]))
            for i in range(len(aupr)):
                dict_result_ap['dataset'].append(dataset)
                dict_result_ap['model'].append(archi)
                dict_result_ap['archi_param'].append(archi_param)
                dict_result_ap['perf_name'].append('aupr')
                dict_result_ap['perf_val'].append(np.mean(aupr[i]))


dict_result_mp = {'dataset': [], 'model': [], 'perf_name': [], 'perf_val': []}
for dataset in dict_result_mp_best.keys():
    for archi in dict_result_mp_best[dataset].keys():
        auc, aupr = dict_result_mp_best[dataset][archi][1], dict_result_mp_best[dataset][archi][2]
        for i in range(len(auc)):
            dict_result_mp['dataset'].append(dataset)
            dict_result_mp['model'].append(archi)
            dict_result_mp['perf_name'].append('auc')
            dict_result_mp['perf_val'].append(np.mean(auc[i]))
        for i in range(len(aupr)):
            dict_result_mp['dataset'].append(dataset)
            dict_result_mp['model'].append(archi)
            dict_result_mp['perf_name'].append('aupr')
            dict_result_mp['perf_val'].append(np.mean(aupr[i]))

### perf for the best architecture

In [None]:
archi_result = pd.DataFrame(dict_result_mp)
archi_result.head(5)

In [None]:
%matplotlib inline
nb_plot = len(dict_result_mp_best.keys())
print(nb_plot)
f, list_ax = plt.subplots(nb_plot, 1, figsize=(7, 7 * nb_plot))
if nb_plot == 1:
    list_ax = [list_ax]
else:
    print('ok')

i = 0
for dataset in dict_result_mp_best.keys():
    print(dataset)
    local_result = archi_result[archi_result['dataset']==dataset]
    title = dataset
    sns.boxplot(x="model", y="perf_val", hue="perf_name", data=local_result, palette="Set3", ax=list_ax[i]).set_title(title) 
    i+=1

### perf for the best architecture parameters

In [3]:
archiparam_result = pd.DataFrame(dict_result_ap)
archiparam_result.head(5)

In [None]:
%matplotlib inline
nb_plot = sum([len(dict_result_ap_best[dataset].keys()) for dataset in dict_result_ap_best.keys()])
f, list_ax = plt.subplots(nb_plot, 1, figsize=(20, 7 * nb_plot))
i=0
for dataset in dict_result_ap_best.keys():
    print(dataset)
    local_result_1 = archiparam_result[archiparam_result['dataset']==dataset]
    for archi in dict_result_ap_best[dataset].keys():
        print(archi)
        local_result = local_result_1[local_result_1['model']==archi]
        title = dataset + '_' + archi
        if nb_plot > 1:
            sns.boxplot(x="archi_param", y="perf_val", hue="perf_name", data=local_result, palette="Set3", ax=list_ax[i]).set_title(title)
            list_ax[i].set_xticklabels(list_ax[i].get_xticklabels(), rotation=80)
        else:
            sns.boxplot(x="archi_param", y="perf_val", hue="perf_name", data=local_result, palette="Set3", ax=list_ax).set_title(title)
            list_ax.set_xticklabels(list_ax.get_xticklabels(),rotation=80)
        i+=1
f.tight_layout()

### perf of a specific model

In [None]:
learningparam_result = pd.DataFrame(dict_result_lp)
learningparam_result.head(5)

In [None]:
dataset, archi, archi_param = 'DB_S', 'ConvModel_&&_ConvModel', '(40,80)_6_(100)_1_&&_3_(50)_(50)'

local_result = learningparam_result[learningparam_result['dataset']==dataset]
local_result = local_result[local_result['model']==archi]
local_result = local_result[local_result['archi_param']==archi_param]
title = dataset + '_' + archi + '_' + archi_param

In [None]:
%matplotlib inline
f, list_ax = plt.subplots(1, 1, figsize=(18, 7))

sns.boxplot(x="learning_param", y="perf_val", hue="perf_name", data=local_result, palette="Set3", ax=list_ax).set_title(title)
list_ax.set_xticklabels(list_ax.get_xticklabels(),rotation=30)
i+=1