# 0. Imports

In [None]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LogisticRegression
from tqdm.notebook import tqdm
from tqdm import tqdm_notebook
import classifiers
import importlib
import pickle
import warnings
warnings.filterwarnings("ignore")

In [None]:
%%javascript
(function(on) {
const e=$( "<a>Setup failed</a>" );
const ns="js_jupyter_suppress_warnings";
var cssrules=$("#"+ns);
if(!cssrules.length) cssrules = $("<style id='"+ns+"' type='text/css'>div.output_stderr { } </style>").appendTo("head");
e.click(function() {
    var s='Showing';  
    cssrules.empty()
    if(on) {
        s='Hiding';
        cssrules.append("div.output_stderr, div[data-mime-type*='.stderr'] { display:none; }");
    }
    e.text(s+' warnings (click to toggle)');
    on=!on;
}).click();
$(element).append(e);
})(true);

# 1. Data Load

In [None]:
hypergeometric = np.array(pd.read_csv("../../data/processed/metrics/disease_hypergeometric_apid_huri.csv", sep=',', header=0, index_col=0).transpose())
closeness = np.array(pd.read_csv("../../data/processed/metrics/disease_closeness_apid_huri.csv", sep=',', header=0, index_col=0).transpose())
betweenness = np.array(pd.read_csv("../../data/processed/metrics/disease_betweenness_apid_huri.csv", sep=',', header=0, index_col=0).transpose())
rwr = np.array(pd.read_csv("../../data/processed/metrics/disease_rwr_apid_huri.csv", sep=',', header=0, index_col=0).transpose())
fraction_betweenness = np.array(pd.read_csv("../../data/processed/metrics/disease_fraction_betweenness_apid_huri.csv", sep=',', header=0, index_col=0).transpose())

hypergeometric_80 = pd.read_csv("../../data/processed/metrics/disease_ppi80_hyper_apid_huri.csv", sep=',', header=0, index_col=0)
closeness_80 = pd.read_csv("../../data/processed/metrics/disease_ppi80_closeness_apid_huri.csv", sep=',', header=0, index_col=0)
betweenness_80 = pd.read_csv("../../data/processed/metrics/disease_ppi80_betweenness_apid_huri.csv", sep=',', header=0, index_col=0)
fraction_betweenness_80 = pd.read_csv("../../data/processed/metrics/disease_ppi80_fraction_betweenness_apid_huri.csv", sep=',', header=0, index_col=0)
rwr_80 = pd.read_csv("../../data/processed/metrics/disease_ppi80_rwr_apid_huri.csv", sep=',', header=0, index_col=0)

hypergeometric_protein80 = pd.read_csv("../../data/processed/metrics/disease_protein80_hyper_apid_huri.csv", sep=',', header=0, index_col=0)
closeness_protein80 = pd.read_csv("../../data/processed/metrics/disease_protein80_closeness_apid_huri.csv", sep=',', header=0, index_col=0)
betweenness_protein80 = pd.read_csv("../../data/processed/metrics/disease_protein80_betweenness_apid_huri.csv", sep=',', header=0, index_col=0)
fraction_betweenness_protein80 = pd.read_csv("../../data/processed/metrics/disease_protein80_fraction_betweenness_apid_huri.csv", sep=',', header=0, index_col=0)
rwr_protein80 = pd.read_csv("../../data/processed/metrics/disease_protein80_rwr_apid_huri.csv", sep=',', header=0, index_col=0)

In [None]:
hypergeometric_conservative = np.array(pd.read_csv("../../data/processed/metrics/disease_hypergeometric_conservative_apid_huri.csv", sep=',', header=0, index_col=0).transpose())
closeness_conservative = np.array(pd.read_csv("../../data/processed/metrics/disease_closeness_conservative_apid_huri.csv", sep=',', header=0, index_col=0).transpose())
betweenness_conservative = np.array(pd.read_csv("../../data/processed/metrics/disease_betweenness_conservative_apid_huri.csv", sep=',', header=0, index_col=0).transpose())
rwr_conservative = np.array(pd.read_csv("../../data/processed/metrics/disease_rwr_conservative_apid_huri.csv", sep=',', header=0, index_col=0).transpose())
fraction_betweenness_conservative = np.array(pd.read_csv("../../data/processed/metrics/disease_fraction_betweenness_conservative_apid_huri.csv", sep=',', header=0, index_col=0).transpose())

hypergeometric_conservative_80 = pd.read_csv("../../data/processed/metrics/disease_ppi80_hyper_conservative_apid_huri.csv", sep=',', header=0, index_col=0)
closeness_conservative_80 = pd.read_csv("../../data/processed/metrics/disease_ppi80_closeness_conservative_apid_huri.csv", sep=',', header=0, index_col=0)
betweenness_conservative_80 = pd.read_csv("../../data/processed/metrics/disease_ppi80_betweenness_conservative_apid_huri.csv", sep=',', header=0, index_col=0)
fraction_betweenness_conservative_80 = pd.read_csv("../../data/processed/metrics/disease_ppi80_fraction_betweenness_conservative_apid_huri.csv", sep=',', header=0, index_col=0)
rwr_conservative_80 = pd.read_csv("../../data/processed/metrics/disease_ppi80_rwr_conservative_apid_huri.csv", sep=',', header=0, index_col=0)

hypergeometric_conservative_protein80 = pd.read_csv("../../data/processed/metrics/disease_protein80_hyper_conservative_apid_huri.csv", sep=',', header=0, index_col=0)
closeness_conservative_protein80 = pd.read_csv("../../data/processed/metrics/disease_protein80_closeness_conservative_apid_huri.csv", sep=',', header=0, index_col=0)
betweenness_conservative_protein80 = pd.read_csv("../../data/processed/metrics/disease_protein80_betweenness_conservative_apid_huri.csv", sep=',', header=0, index_col=0)
fraction_betweenness_conservative_protein80 = pd.read_csv("../../data/processed/metrics/disease_protein80_fraction_betweenness_conservative_apid_huri.csv", sep=',', header=0, index_col=0)
rwr_conservative_protein80 = pd.read_csv("../../data/processed/metrics/disease_protein80_rwr_conservative_apid_huri.csv", sep=',', header=0, index_col=0)

In [None]:
hypergeometric_df = pd.read_csv("../../data/processed/metrics/disease_hypergeometric_apid_huri.csv", sep=',', header=0, index_col=0)
disgenet_labels_df = pd.read_csv("../../data/processed/disgenet_filtered_labels_apid_huri.csv", sep=',', names=hypergeometric_df.columns)
disgenet_labels_df['protein_id'] = hypergeometric_df.index
disgenet_labels_df.set_index('protein_id', inplace=True)

In [None]:
hypergeometric_conservative_df = pd.read_csv("../../data/processed/metrics/disease_hypergeometric_conservative_apid_huri.csv", sep=',', header=0, index_col=0)
disgenet_labels_conservative_df = pd.read_csv('../../data/processed/disgenet_conservative_labels_apid_huri.csv', names=hypergeometric_conservative_df.columns)
disgenet_labels_conservative_df['protein_id'] = hypergeometric_conservative_df.index
disgenet_labels_conservative_df.set_index('protein_id', inplace=True)

In [None]:
ppi80_hyper_list = []
ppi80_closeness_list = []
ppi80_betweenness_list = []
ppi80_fraction_betweenness_list = []
ppi80_rwr_list = []
label_list = []

for i, df in hypergeometric_80.groupby(level=0):
    df.set_index('level_1', inplace=True)
    ppi80_hyper_list.append(df)
    label_list.append(disgenet_labels_df[disgenet_labels_df.index.isin(df.index)])

for i, df in closeness_80.groupby(level=0):
    df.set_index('level_1', inplace=True)
    ppi80_closeness_list.append(df)
    
for i, df in betweenness_80.groupby(level=0):
    df.set_index('level_1', inplace=True)
    ppi80_betweenness_list.append(df)
    
for i, df in fraction_betweenness_80.groupby(level=0):
    df.set_index('index', inplace=True)
    ppi80_fraction_betweenness_list.append(df)
    
for i, df in rwr_80.groupby(level=0):
    df.set_index('level_1', inplace=True)
    ppi80_rwr_list.append(df)

protein80_hyper_list = []
protein80_closeness_list = []
protein80_betweenness_list = []
protein80_fraction_betweenness_list = []
protein80_rwr_list = []
label_list_protein80 = []

for i, df in hypergeometric_protein80.groupby(level=0):
    df.set_index('level_1', inplace=True)
    protein80_hyper_list.append(df)
    label_list_protein80.append(disgenet_labels_df[disgenet_labels_df.index.isin(df.index)])

for i, df in closeness_protein80.groupby(level=0):
    df.set_index('level_1', inplace=True)
    protein80_closeness_list.append(df)
    
for i, df in betweenness_protein80.groupby(level=0):
    df.set_index('level_1', inplace=True)
    protein80_betweenness_list.append(df)
    
for i, df in fraction_betweenness_protein80.groupby(level=0):
    df.set_index('index', inplace=True)
    protein80_fraction_betweenness_list.append(df)
    
for i, df in rwr_protein80.groupby(level=0):
    df.set_index('level_1', inplace=True)
    protein80_rwr_list.append(df)

In [None]:
ppi80_hyper_list_conservative = []
ppi80_closeness_list_conservative = []
ppi80_betweenness_list_conservative = []
ppi80_fraction_betweenness_list_conservative = []
ppi80_rwr_list_conservative = []
label_list_conservative = []

for i, df in hypergeometric_conservative_80.groupby(level=0):
    df.set_index('level_1', inplace=True)
    ppi80_hyper_list_conservative.append(df)
    label_list_conservative.append(disgenet_labels_conservative_df[disgenet_labels_conservative_df.index.isin(df.index)])

for i, df in closeness_conservative_80.groupby(level=0):
    df.set_index('level_1', inplace=True)
    ppi80_closeness_list_conservative.append(df)
    
for i, df in betweenness_conservative_80.groupby(level=0):
    df.set_index('level_1', inplace=True)
    ppi80_betweenness_list_conservative.append(df)
    
for i, df in fraction_betweenness_conservative_80.groupby(level=0):
    df.set_index('index', inplace=True)
    ppi80_fraction_betweenness_list_conservative.append(df)
    
for i, df in rwr_conservative_80.groupby(level=0):
    df.set_index('level_1', inplace=True)
    ppi80_rwr_list_conservative.append(df)
    
protein80_hyper_list_conservative = []
protein80_closeness_list_conservative = []
protein80_betweenness_list_conservative = []
protein80_fraction_betweenness_list_conservative = []
protein80_rwr_list_conservative = []
label_list_protein80_conservative = []

for i, df in hypergeometric_conservative_protein80.groupby(level=0):
    df.set_index('level_1', inplace=True)
    protein80_hyper_list_conservative.append(df)
    label_list_protein80_conservative.append(disgenet_labels_conservative_df[disgenet_labels_conservative_df.index.isin(df.index)])

for i, df in closeness_conservative_protein80.groupby(level=0):
    df.set_index('level_1', inplace=True)
    protein80_closeness_list_conservative.append(df)
    
for i, df in betweenness_conservative_protein80.groupby(level=0):
    df.set_index('level_1', inplace=True)
    protein80_betweenness_list_conservative.append(df)
    
for i, df in fraction_betweenness_conservative_protein80.groupby(level=0):
    df.set_index('index', inplace=True)
    protein80_fraction_betweenness_list_conservative.append(df)
    
for i, df in rwr_conservative_protein80.groupby(level=0):
    df.set_index('level_1', inplace=True)
    protein80_rwr_list_conservative.append(df)

In [None]:
hypergeometric_fs_ar = pd.read_csv("../../data/processed/fs/disease/disease_hyper_fs_apid_huri.csv", sep=',', header=0)
closeness_fs_ar = pd.read_csv("../../data/processed/fs/disease/disease_closeness_fs_apid_huri.csv", sep=',', header=0)
betweenness_fs_ar = pd.read_csv("../../data/processed/fs/disease/disease_betweenness_fs_apid_huri.csv", sep=',', header=0)
rwr_fs_ar = pd.read_csv("../../data/processed/fs/disease/disease_rwr_fs_apid_huri.csv", sep=',', header=0)
fraction_betweenness_fs_ar = pd.read_csv("../../data/processed/fs/disease/disease_fraction_betweenness_fs_apid_huri.csv", sep=',', header=0)

hypergeometric_80_fs_ar = np.array(np.array_split(pd.read_csv("../../data/processed/fs/disease/disgenet_hyper_ppi80_fs_apid_huri.csv", sep=',', header=0).transpose(),10, axis=1))
closeness_80_fs_ar = np.array(np.array_split(pd.read_csv("../../data/processed/fs/disease/disgenet_closeness_ppi80_fs_apid_huri.csv", sep=',', header=0).transpose(),10, axis=1))
betweenness_80_fs_ar = np.array(np.array_split(pd.read_csv("../../data/processed/fs/disease/disgenet_betweenness_ppi80_fs_apid_huri.csv", sep=',', header=0).transpose(),10, axis=1))
rwr_80_fs_ar = np.array(np.array_split(pd.read_csv("../../data/processed/fs/disease/disgenet_rwr_ppi80_fs_apid_huri.csv", sep=',', header=0).transpose(),10, axis=1))
fraction_betweenness_80_fs_ar = np.array(np.array_split(pd.read_csv("../../data/processed/fs/disease/disgenet_fraction_betweenness_ppi80_fs_apid_huri.csv", sep=',', header=0).transpose(),10, axis=1))

hypergeometric_protein80_fs_ar = np.array(np.array_split(pd.read_csv("../../data/processed/fs/disease/disgenet_hyper_protein80_fs_apid_huri.csv", sep=',', header=0).transpose(),10, axis=1))
closeness_protein80_fs_ar = np.array(np.array_split(pd.read_csv("../../data/processed/fs/disease/disgenet_closeness_protein80_fs_apid_huri.csv", sep=',', header=0).transpose(),10, axis=1))
betweenness_protein80_fs_ar = np.array(np.array_split(pd.read_csv("../../data/processed/fs/disease/disgenet_betweenness_protein80_fs_apid_huri.csv", sep=',', header=0).transpose(),10, axis=1))
rwr_protein80_fs_ar = np.array(np.array_split(pd.read_csv("../../data/processed/fs/disease/disgenet_rwr_protein80_fs_apid_huri.csv", sep=',', header=0).transpose(),10, axis=1))
fraction_betweenness_protein80_fs_ar = np.array(np.array_split(pd.read_csv("../../data/processed/fs/disease/disgenet_fraction_betweenness_protein80_fs_apid_huri.csv", sep=',', header=0).transpose(),10, axis=1))

In [None]:
hypergeometric_fs_ar_conservative = pd.read_csv("../../data/processed/fs/disease/disease_hyper_fs_conservative_apid_huri.csv", sep=',', header=0)
closeness_fs_ar_conservative = pd.read_csv("../../data/processed/fs/disease/disease_closeness_fs_conservative_apid_huri.csv", sep=',', header=0)
betweenness_fs_ar_conservative = pd.read_csv("../../data/processed/fs/disease/disease_betweenness_fs_conservative_apid_huri.csv", sep=',', header=0)
rwr_fs_ar_conservative = pd.read_csv("../../data/processed/fs/disease/disease_rwr_fs_conservative_apid_huri.csv", sep=',', header=0)
fraction_betweenness_fs_ar_conservative = pd.read_csv("../../data/processed/fs/disease/disease_fraction_betweenness_fs_conservative_apid_huri.csv", sep=',', header=0)

hypergeometric_80_fs_ar_conservative = np.array(np.array_split(pd.read_csv("../../data/processed/fs/disease/disgenet_hyper_ppi80_fs_conservative_apid_huri.csv", sep=',', header=0).transpose(),10, axis=1))
closeness_80_fs_ar_conservative = np.array(np.array_split(pd.read_csv("../../data/processed/fs/disease/disgenet_closeness_ppi80_fs_conservative_apid_huri.csv", sep=',', header=0).transpose(),10, axis=1))
betweenness_80_fs_ar_conservative = np.array(np.array_split(pd.read_csv("../../data/processed/fs/disease/disgenet_betweenness_ppi80_fs_conservative_apid_huri.csv", sep=',', header=0).transpose(),10, axis=1))
rwr_80_fs_ar_conservative = np.array(np.array_split(pd.read_csv("../../data/processed/fs/disease/disgenet_rwr_ppi80_fs_conservative_apid_huri.csv", sep=',', header=0).transpose(),10, axis=1))
fraction_betweenness_80_fs_ar_conservative = np.array(np.array_split(pd.read_csv("../../data/processed/fs/disease/disgenet_fraction_betweenness_ppi80_fs_conservative_apid_huri.csv", sep=',', header=0).transpose(),10, axis=1))

hypergeometric_protein80_fs_ar_conservative = np.array(np.array_split(pd.read_csv("../../data/processed/fs/disease/disgenet_hyper_protein80_fs_conservative_apid_huri.csv", sep=',', header=0).transpose(),10, axis=1))
closeness_protein80_fs_ar_conservative = np.array(np.array_split(pd.read_csv("../../data/processed/fs/disease/disgenet_closeness_protein80_fs_conservative_apid_huri.csv", sep=',', header=0).transpose(),10, axis=1))
betweenness_protein80_fs_ar_conservative = np.array(np.array_split(pd.read_csv("../../data/processed/fs/disease/disgenet_betweenness_protein80_fs_conservative_apid_huri.csv", sep=',', header=0).transpose(),10, axis=1))
rwr_protein80_fs_ar_conservative = np.array(np.array_split(pd.read_csv("../../data/processed/fs/disease/disgenet_rwr_protein80_fs_conservative_apid_huri.csv", sep=',', header=0).transpose(),10, axis=1))
fraction_betweenness_protein80_fs_ar_conservative = np.array(np.array_split(pd.read_csv("../../data/processed/fs/disease/disgenet_fraction_betweenness_protein80_fs_conservative_apid_huri.csv", sep=',', header=0).transpose(),10, axis=1))

In [None]:
hypergeometric_test_indices = pd.read_csv("../../data/processed/fs/disease/disease_hyper_test_apid_huri.csv", sep=',', header=0).transpose().to_numpy(dtype='int')-1
closeness_test_indices = pd.read_csv("../../data/processed/fs/disease/disease_closeness_test_apid_huri.csv", sep=',', header=0).transpose().to_numpy(dtype='int')-1
betweenness_test_indices = pd.read_csv("../../data/processed/fs/disease/disease_betweenness_test_apid_huri.csv", sep=',', header=0).transpose().to_numpy(dtype='int')-1
fraction_betweenness_test_indices = pd.read_csv("../../data/processed/fs/disease/disease_fraction_betweenness_test_apid_huri.csv", sep=',', header=0).transpose().to_numpy(dtype='int')-1
rwr_test_indices = pd.read_csv("../../data/processed/fs/disease/disease_rwr_test_apid_huri.csv", sep=',', header=0).transpose().to_numpy(dtype='int')-1

hypergeometric_80_test_index = np.moveaxis((np.array(np.array_split(pd.read_csv("../../data/processed/fs/disease/disgenet_hyper_ppi80_test_apid_huri.csv", sep=',', header=0).transpose(), 203, axis = 1)).astype(int) -1), 0, 1)
closeness_80_test_index = np.moveaxis((np.array(np.array_split(pd.read_csv("../../data/processed/fs/disease/disgenet_closeness_ppi80_test_apid_huri.csv", sep=',', header=0).transpose(), 203, axis = 1)).astype(int) -1), 0, 1)
betweenness_80_test_index = np.moveaxis((np.array(np.array_split(pd.read_csv("../../data/processed/fs/disease/disgenet_betweenness_ppi80_test_apid_huri.csv", sep=',', header=0).transpose(), 203, axis = 1)).astype(int) -1), 0, 1)
fraction_betweenness_80_test_index = np.moveaxis((np.array(np.array_split(pd.read_csv("../../data/processed/fs/disease/disgenet_fraction_betweenness_ppi80_test_apid_huri.csv", sep=',', header=0).transpose(), 203, axis = 1)).astype(int) -1), 0, 1)
rwr_80_test_index = np.moveaxis((np.array(np.array_split(pd.read_csv("../../data/processed/fs/disease/disgenet_rwr_ppi80_test_apid_huri.csv", sep=',', header=0).transpose(), 203, axis = 1)).astype(int) -1), 0, 1)

hypergeometric_protein80_test_index = np.moveaxis((np.array(np.array_split(pd.read_csv("../../data/processed/fs/disease/disgenet_hyper_protein80_test_apid_huri.csv", sep=',', header=0).transpose(), 203, axis = 1)).astype(int) -1), 0, 1)
closeness_protein80_test_index = np.moveaxis((np.array(np.array_split(pd.read_csv("../../data/processed/fs/disease/disgenet_closeness_protein80_test_apid_huri.csv", sep=',', header=0).transpose(), 203, axis = 1)).astype(int) -1), 0, 1)
betweenness_protein80_test_index = np.moveaxis((np.array(np.array_split(pd.read_csv("../../data/processed/fs/disease/disgenet_betweenness_protein80_test_apid_huri.csv", sep=',', header=0).transpose(), 203, axis = 1)).astype(int) -1), 0, 1)
fraction_betweenness_protein80_test_index = np.moveaxis((np.array(np.array_split(pd.read_csv("../../data/processed/fs/disease/disgenet_fraction_betweenness_protein80_test_apid_huri.csv", sep=',', header=0).transpose(), 203, axis = 1)).astype(int) -1), 0, 1)
rwr_protein80_test_index = np.moveaxis((np.array(np.array_split(pd.read_csv("../../data/processed/fs/disease/disgenet_rwr_protein80_test_apid_huri.csv", sep=',', header=0).transpose(), 203, axis = 1)).astype(int) -1), 0, 1)

In [None]:
hypergeometric_test_indices_conservative = pd.read_csv("../../data/processed/fs/disease/disease_hyper_test_conservative_apid_huri.csv", sep=',', header=0).transpose().to_numpy(dtype='int')-1
closeness_test_indices_conservative = pd.read_csv("../../data/processed/fs/disease/disease_closeness_test_conservative_apid_huri.csv", sep=',', header=0).transpose().to_numpy(dtype='int')-1
betweenness_test_indices_conservative = pd.read_csv("../../data/processed/fs/disease/disease_betweenness_test_conservative_apid_huri.csv", sep=',', header=0).transpose().to_numpy(dtype='int')-1
fraction_betweenness_test_indices_conservative = pd.read_csv("../../data/processed/fs/disease/disease_fraction_betweenness_test_conservative_apid_huri.csv", sep=',', header=0).transpose().to_numpy(dtype='int')-1
rwr_test_indices_conservative = pd.read_csv("../../data/processed/fs/disease/disease_rwr_test_conservative_apid_huri.csv", sep=',', header=0).transpose().to_numpy(dtype='int')-1

hypergeometric_80_test_index_conservative = np.moveaxis((np.array(np.array_split(pd.read_csv("../../data/processed/fs/disease/disgenet_hyper_ppi80_test_conservative_apid_huri.csv", sep=',', header=0).transpose(), 301, axis = 1)).astype(int) -1), 0, 1)
closeness_80_test_index_conservative = np.moveaxis((np.array(np.array_split(pd.read_csv("../../data/processed/fs/disease/disgenet_closeness_ppi80_test_conservative_apid_huri.csv", sep=',', header=0).transpose(), 301, axis = 1)).astype(int) -1), 0, 1)
betweenness_80_test_index_conservative = np.moveaxis((np.array(np.array_split(pd.read_csv("../../data/processed/fs/disease/disgenet_betweenness_ppi80_test_conservative_apid_huri.csv", sep=',', header=0).transpose(), 301, axis = 1)).astype(int) -1), 0, 1)
fraction_betweenness_80_test_index_conservative = np.moveaxis((np.array(np.array_split(pd.read_csv("../../data/processed/fs/disease/disgenet_fraction_betweenness_ppi80_test_conservative_apid_huri.csv", sep=',', header=0).transpose(), 301, axis = 1)).astype(int) -1), 0, 1)
rwr_80_test_index_conservative = np.moveaxis((np.array(np.array_split(pd.read_csv("../../data/processed/fs/disease/disgenet_rwr_ppi80_test_conservative_apid_huri.csv", sep=',', header=0).transpose(), 301, axis = 1)).astype(int) -1), 0, 1)

hypergeometric_protein80_test_index_conservative = np.moveaxis((np.array(np.array_split(pd.read_csv("../../data/processed/fs/disease/disgenet_hyper_protein80_test_conservative_apid_huri.csv", sep=',', header=0).transpose(), 301, axis = 1)).astype(int) -1), 0, 1)
closeness_protein80_test_index_conservative = np.moveaxis((np.array(np.array_split(pd.read_csv("../../data/processed/fs/disease/disgenet_closeness_protein80_test_conservative_apid_huri.csv", sep=',', header=0).transpose(), 301, axis = 1)).astype(int) -1), 0, 1)
betweenness_protein80_test_index_conservative = np.moveaxis((np.array(np.array_split(pd.read_csv("../../data/processed/fs/disease/disgenet_betweenness_protein80_test_conservative_apid_huri.csv", sep=',', header=0).transpose(), 301, axis = 1)).astype(int) -1), 0, 1)
fraction_betweenness_protein80_test_index_conservative = np.moveaxis((np.array(np.array_split(pd.read_csv("../../data/processed/fs/disease/disgenet_fraction_betweenness_protein80_test_conservative_apid_huri.csv", sep=',', header=0).transpose(), 301, axis = 1)).astype(int) -1), 0, 1)
rwr_protein80_test_index_conservative = np.moveaxis((np.array(np.array_split(pd.read_csv("../../data/processed/fs/disease/disgenet_rwr_protein80_test_conservative_apid_huri.csv", sep=',', header=0).transpose(), 301, axis = 1)).astype(int) -1), 0, 1)

# 2. Classification Tasks

## 2.1. Complete Network

### 2.1.1. Steiner Tree

In [None]:
importlib.reload(classifiers)

clf = LogisticRegression(random_state=22)

parameters = [{'penalty':['l1','l2'], 'C':[100, 10, 1.0, 0.1, 0.01],
             'solver': ['liblinear'], 'max_iter':[10, 50, 100]}, 
              {'penalty':['l2', 'none'], 'C':[100, 10, 1.0, 0.1, 0.01],
             'solver': ['sag', 'saga', 'newton-cg'], 'max_iter':[10, 50, 100]}]

hyper_lgr_clf, hyper_lgr_proba_clf, hyper_cv, hyper_n_fs, hyper_models = classifiers.multiple_fs_classifier2(clf, parameters, hypergeometric, hypergeometric_test_indices, hypergeometric_fs_ar, disgenet_labels_df, jobs=20)
hyper_lgr_clf.to_csv('../../models/GAP-MINE/disease/binary/disease_hypergeometric_lgr.csv', index=False)
hyper_lgr_proba_clf.to_csv('../../models/GAP-MINE/disease/probability/disease_hypergeometric_lgr_proba.csv', index=False)
with open('../../models/GAP-MINE/disease/cv_results/disease_hypergeometric.txt', 'w') as fp:
    fp.write('\n'.join('%s %s' % x for x in hyper_cv))
with open("../../models/GAP-MINE/disease/n_fs/disease_hypergeometric.txt", "w") as f:
    for s in hyper_n_fs:
        f.write(str(s) +"\n")
with open("../../models/GAP-MINE/disease/models/hypergeometric.pckl", "wb") as f:
    for model in hyper_models:
         pickle.dump(model, f)

In [None]:
importlib.reload(classifiers)
clf = LogisticRegression(random_state=22)
parameters = [{'penalty':['l1','l2'], 'C':[100, 10, 1.0, 0.1, 0.01],
             'solver': ['liblinear'], 'max_iter':[10, 50, 100]}, 
              {'penalty':['l2', 'none'], 'C':[100, 10, 1.0, 0.1, 0.01],
             'solver': ['sag', 'saga', 'newton-cg'], 'max_iter':[10, 50, 100]}]
rwr_lgr_clf, rwr_lgr_proba_clf, rwr_cv, rwr_n_fs, rwr_models = classifiers.multiple_fs_classifier2(clf, parameters, rwr, rwr_test_indices, rwr_fs_ar, disgenet_labels_df, jobs=20)
rwr_lgr_clf.to_csv('../../models/GAP-MINE/disease/binary/disease_rwr_lgr.csv', index=False)
rwr_lgr_proba_clf.to_csv('../../models/GAP-MINE/disease/probability/disease_rwr_lgr_proba.csv', index=False)
with open('../../models/GAP-MINE/disease/cv_results/disease_rwr.txt', 'w') as fp:
    fp.write('\n'.join('%s %s' % x for x in rwr_cv))
with open("../../models/GAP-MINE/disease/n_fs/disease_rwr.txt", "w") as f:
    for s in rwr_n_fs:
        f.write(str(s) +"\n")
with open("../../models/GAP-MINE/disease/models/rwr.pckl", "wb") as f:
    for model in rwr_models:
         pickle.dump(model, f)

In [None]:
clf = LogisticRegression(random_state=22)
parameters = [{'penalty':['l1','l2'], 'C':[100, 10, 1.0, 0.1, 0.01],
             'solver': ['liblinear'], 'max_iter':[10, 50, 100]}, 
              {'penalty':['l2', 'none'], 'C':[100, 10, 1.0, 0.1, 0.01],
             'solver': ['sag', 'saga', 'newton-cg'], 'max_iter':[10, 50, 100]}]
closeness_lgr_clf, closeness_lgr_proba_clf, closeness_cv, closeness_n_fs, closeness_models = classifiers.multiple_fs_classifier2(clf, parameters, closeness, closeness_test_indices, closeness_fs_ar, disgenet_labels_df, jobs=20)
closeness_lgr_clf.to_csv('../../models/GAP-MINE/disease/binary/disease_closeness_lgr.csv', index=False)
closeness_lgr_proba_clf.to_csv('../../models/GAP-MINE/disease/probability/disease_closeness_lgr_proba.csv', index=False)
with open('../../models/GAP-MINE/disease/cv_results/disease_closeness.txt', 'w') as fp:
    fp.write('\n'.join('%s %s' % x for x in closeness_cv))
with open("../../models/GAP-MINE/disease/n_fs/disease_closeness.txt", "w") as f:
    for s in closeness_n_fs:
        f.write(str(s) +"\n")
with open("../../models/GAP-MINE/disease/models/closeness.pckl", "wb") as f:
    for model in closeness_models:
         pickle.dump(model, f)

In [None]:
importlib.reload(classifiers)
clf = LogisticRegression(random_state=22)
parameters = [{'penalty':['l1','l2'], 'C':[100, 10, 1.0, 0.1, 0.01],
             'solver': ['liblinear'], 'max_iter':[10, 50, 100]}, 
              {'penalty':['l2', 'none'], 'C':[100, 10, 1.0, 0.1, 0.01],
             'solver': ['sag', 'saga', 'newton-cg'], 'max_iter':[10, 50, 100]}]
betweenness_lgr_clf, betweenness_lgr_proba_clf, betweenness_cv, betweenness_n_fs, betweenness_models = classifiers.multiple_fs_classifier2(clf, parameters, betweenness, betweenness_test_indices, betweenness_fs_ar, disgenet_labels_df, jobs=20)
betweenness_lgr_clf.to_csv('../../models/GAP-MINE/disease/binary/disease_betweenness_lgr.csv', index=False)
betweenness_lgr_proba_clf.to_csv('../../models/GAP-MINE/disease/probability/disease_betweenness_lgr_proba.csv', index=False)
with open('../../models/GAP-MINE/disease/cv_results/disease_betweenness.txt', 'w') as fp:
    fp.write('\n'.join('%s %s' % x for x in betweenness_cv))
with open("../../models/GAP-MINE/disease/n_fs/disease_betweenness.txt", "w") as f:
    for s in betweenness_n_fs:
        f.write(str(s) +"\n")
with open("../../models/GAP-MINE/disease/models/betweenness.pckl", "wb") as f:
    for model in betweenness_models:
         pickle.dump(model, f)

In [None]:
clf = LogisticRegression(random_state=22)
parameters = [{'penalty':['l1','l2'], 'C':[100, 10, 1.0, 0.1, 0.01],
             'solver': ['liblinear'], 'max_iter':[10, 50, 100]}, 
              {'penalty':['l2', 'none'], 'C':[100, 10, 1.0, 0.1, 0.01],
             'solver': ['sag', 'saga', 'newton-cg'], 'max_iter':[10, 50, 100]}]
fraction_betweenness_lgr_clf, fraction_betweenness_lgr_proba_clf, fraction_betweenness_cv, fraction_betweenness_n_fs, fraction_betweenness_models = classifiers.multiple_fs_classifier2(clf, parameters, fraction_betweenness, fraction_betweenness_test_indices, fraction_betweenness_fs_ar, disgenet_labels_df, jobs=20)
fraction_betweenness_lgr_clf.to_csv('../../models/GAP-MINE/disease/binary/disease_fraction_betweenness_lgr.csv', index=False)
fraction_betweenness_lgr_proba_clf.to_csv('../../models/GAP-MINE/disease/probability/disease_fraction_betweenness_lgr.csv', index=False)
with open('../../models/GAP-MINE/disease/cv_results/disease_fraction_betweenness.txt', 'w') as fp:
    fp.write('\n'.join('%s %s' % x for x in fraction_betweenness_cv))
with open("../../models/GAP-MINE/disease/n_fs/disease_fraction_betweenness.txt", "w") as f:
    for s in fraction_betweenness_n_fs:
        f.write(str(s) +"\n")
with open("../../models/GAP-MINE/disease/models/fraction_betweenness.pckl", "wb") as f:
    for model in fraction_betweenness_models:
         pickle.dump(model, f)

### 2.1.2. Conservative Module

In [None]:
importlib.reload(classifiers)
clf = LogisticRegression(random_state=22)
parameters = [{'penalty':['l1','l2'], 'C':[100, 10, 1.0, 0.1, 0.01],
             'solver': ['liblinear'], 'max_iter':[10, 50, 100]}, 
              {'penalty':['l2', 'none'], 'C':[100, 10, 1.0, 0.1, 0.01],
             'solver': ['sag', 'saga', 'newton-cg'], 'max_iter':[10, 50, 100]}]

hyper_lgr_clf_conservative, hyper_lgr_proba_clf_conservative, hyper_cv_conservative, hyper_n_fs_conservative, hyper_conservative_models = classifiers.multiple_fs_classifier2(clf, parameters, hypergeometric_conservative, hypergeometric_test_indices_conservative, hypergeometric_fs_ar_conservative, disgenet_labels_conservative_df, jobs=20)
hyper_lgr_clf_conservative.to_csv('../../models/GAP-MINE/disease/binary/disease_hypergeometric_lgr_conservative.csv', index=False)
hyper_lgr_proba_clf_conservative.to_csv('../../models/GAP-MINE/disease/probability/disease_hypergeometric_lgr_proba_conservative.csv', index=False)
with open('../../models/GAP-MINE/disease/cv_results/disease_hypergeometric_conservative.txt', 'w') as fp:
    fp.write('\n'.join('%s %s' % x for x in hyper_cv_conservative))
with open("../../models/GAP-MINE/disease/n_fs/disease_hypergeometric_conservative.txt", "w") as f:
    for s in hyper_n_fs_conservative:
        f.write(str(s) +"\n")
with open("../../models/GAP-MINE/disease/models/hyper_conservative.pckl", "wb") as f:
    for model in hyper_conservative_models:
         pickle.dump(model, f)

In [None]:
importlib.reload(classifiers)
clf = LogisticRegression(random_state=22)
parameters = [{'penalty':['l1','l2'], 'C':[100, 10, 1.0, 0.1, 0.01],
             'solver': ['liblinear'], 'max_iter':[10, 50, 100]}, 
              {'penalty':['l2', 'none'], 'C':[100, 10, 1.0, 0.1, 0.01],
             'solver': ['sag', 'saga', 'newton-cg'], 'max_iter':[10, 50, 100]}]
rwr_lgr_clf_conservative, rwr_lgr_proba_clf_conservative, rwr_cv_conservative, rwr_n_fs_conservative, rwr_conservative_models = classifiers.multiple_fs_classifier2(clf, parameters, rwr_conservative, rwr_test_indices_conservative, rwr_fs_ar_conservative, disgenet_labels_conservative_df, jobs=20)
rwr_lgr_clf_conservative.to_csv('../../models/GAP-MINE/disease/binary/disease_rwr_lgr_conservative.csv', index=False)
rwr_lgr_proba_clf_conservative.to_csv('../../models/GAP-MINE/disease/probability/disease_rwr_lgr_proba_conservative.csv', index=False)
with open('../../models/GAP-MINE/disease/cv_results/disease_rwr_conservative.txt', 'w') as fp:
    fp.write('\n'.join('%s %s' % x for x in rwr_cv_conservative))
with open("../../models/GAP-MINE/disease/n_fs/disease_rwr_conservative.txt", "w") as f:
    for s in rwr_n_fs_conservative:
        f.write(str(s) +"\n")
with open("../../models/GAP-MINE/disease/models/rwr_conservative.pckl", "wb") as f:
    for model in rwr_conservative_models:
         pickle.dump(model, f)

In [None]:
clf = LogisticRegression(random_state=22)
parameters = [{'penalty':['l1','l2'], 'C':[100, 10, 1.0, 0.1, 0.01],
             'solver': ['liblinear'], 'max_iter':[10, 50, 100]}, 
              {'penalty':['l2', 'none'], 'C':[100, 10, 1.0, 0.1, 0.01],
             'solver': ['sag', 'saga', 'newton-cg'], 'max_iter':[10, 50, 100]}]
closeness_lgr_clf_conservative, closeness_lgr_proba_clf_conservative, closeness_cv_conservative, closeness_n_fs_conservative, closeness_conservative_models = classifiers.multiple_fs_classifier2(clf, parameters, closeness_conservative, closeness_test_indices_conservative, closeness_fs_ar_conservative, disgenet_labels_conservative_df, jobs=20)
closeness_lgr_clf_conservative.to_csv('../../models/GAP-MINE/disease/binary/disease_closeness_lgr_conservative.csv', index=False)
closeness_lgr_proba_clf_conservative.to_csv('../../models/GAP-MINE/disease/probability/disease_closeness_lgr_proba_conservative.csv', index=False)
with open('../../models/GAP-MINE/disease/cv_results/disease_closeness_conservative.txt', 'w') as fp:
    fp.write('\n'.join('%s %s' % x for x in closeness_cv_conservative))
with open("../../models/GAP-MINE/disease/n_fs/disease_closeness_conservative.txt", "w") as f:
    for s in closeness_n_fs_conservative:
        f.write(str(s) +"\n")
with open("../../models/GAP-MINE/disease/models/closeness_conservative.pckl", "wb") as f:
    for model in closeness_conservative_models:
         pickle.dump(model, f)

In [None]:
importlib.reload(classifiers)
clf = LogisticRegression(random_state=22)
parameters = [{'penalty':['l1','l2'], 'C':[100, 10, 1.0, 0.1, 0.01],
             'solver': ['liblinear'], 'max_iter':[10, 50, 100]}, 
              {'penalty':['l2', 'none'], 'C':[100, 10, 1.0, 0.1, 0.01],
             'solver': ['sag', 'saga', 'newton-cg'], 'max_iter':[10, 50, 100]}]
betweenness_lgr_clf_conservative, betweenness_lgr_proba_clf_conservative, betweenness_cv_conservative, betweenness_n_fs_conservative, betweenness_conservative_models = classifiers.multiple_fs_classifier2(clf, parameters, betweenness_conservative, betweenness_test_indices_conservative, betweenness_fs_ar_conservative, disgenet_labels_conservative_df, jobs=20)
betweenness_lgr_clf_conservative.to_csv('../../models/GAP-MINE/disease/binary/disease_betweenness_lgr_conservative.csv', index=False)
betweenness_lgr_proba_clf_conservative.to_csv('../../models/GAP-MINE/disease/probability/disease_betweenness_lgr_proba_conservative.csv', index=False)
with open('../../models/GAP-MINE/disease/cv_results/disease_betweenness_conservative.txt', 'w') as fp:
    fp.write('\n'.join('%s %s' % x for x in betweenness_cv_conservative))
with open("../../models/GAP-MINE/disease/n_fs/disease_betweenness_conservative.txt", "w") as f:
    for s in betweenness_n_fs_conservative:
        f.write(str(s) +"\n")
with open("../../models/GAP-MINE/disease/models/betweenness_conservative.pckl", "wb") as f:
    for model in betweenness_conservative_models:
         pickle.dump(model, f)

In [None]:
clf = LogisticRegression(random_state=22)
parameters = [{'penalty':['l1','l2'], 'C':[100, 10, 1.0, 0.1, 0.01],
             'solver': ['liblinear'], 'max_iter':[10, 50, 100]}, 
              {'penalty':['l2', 'none'], 'C':[100, 10, 1.0, 0.1, 0.01],
             'solver': ['sag', 'saga', 'newton-cg'], 'max_iter':[10, 50, 100]}]
fraction_betweenness_lgr_clf_conservative, fraction_betweenness_lgr_proba_clf_conservative, fraction_betweenness_cv_conservative, fraction_betweenness_n_fs_conservative, fraction_betweenness_conservative_models = classifiers.multiple_fs_classifier2(clf, parameters, fraction_betweenness_conservative, fraction_betweenness_test_indices_conservative, fraction_betweenness_fs_ar_conservative, disgenet_labels_conservative_df, jobs=20)
fraction_betweenness_lgr_clf_conservative.to_csv('../../models/GAP-MINE/disease/binary/disease_fraction_betweenness_lgr_conservative.csv', index=False)
fraction_betweenness_lgr_proba_clf_conservative.to_csv('../../models/GAP-MINE/disease/probability/disease_fraction_betweenness_lgr_conservative.csv', index=False)
with open('../../models/GAP-MINE/disease/cv_results/disease_fraction_betweenness_conservative.txt', 'w') as fp:
    fp.write('\n'.join('%s %s' % x for x in fraction_betweenness_cv_conservative))
with open("../../models/GAP-MINE/disease/n_fs/disease_fraction_betweenness_conservative.txt", "w") as f:
    for s in fraction_betweenness_n_fs_conservative:
        f.write(str(s) +"\n")
with open("../../models/GAP-MINE/disease/models/fraction_betweenness_conservative.pckl", "wb") as f:
    for model in fraction_betweenness_conservative_models:
         pickle.dump(model, f)

## 2.2. Reduced Networks

### 2.2.1. PPI 80% Reduced

#### 2.2.1.1. Steiner Tree

In [None]:
importlib.reload(classifiers)
clf = LogisticRegression(random_state=22)
parameters = [{'penalty':['l1','l2'], 'C':[100, 10, 1.0, 0.1, 0.01],
             'solver': ['liblinear'], 'max_iter':[10, 50, 100]}, 
              {'penalty':['l2', 'none'], 'C':[100, 10, 1.0, 0.1, 0.01],
             'solver': ['sag', 'saga', 'newton-cg'], 'max_iter':[10, 50, 100]}]
hyper_80_lgr, hyper_80_lgr_proba, hyper_80_cv, hyper_80_n_fs = classifiers.reduced_classifier_multiple_fs(clf, parameters, ppi80_hyper_list, hypergeometric_80_test_index, hypergeometric_80_fs_ar, label_list)
hyper_80_lgr.to_csv('../../models/GAP-MINE/disease/binary/disease_hypergeometric_80_lgr.csv', index=False)
hyper_80_lgr_proba.to_csv('../../models/GAP-MINE/disease/probability/disease_hypergeometric_80_lgr.csv', index=False)

In [None]:
clf = LogisticRegression(random_state=22)
parameters = [{'penalty':['l1','l2'], 'C':[100, 10, 1.0, 0.1, 0.01],
             'solver': ['liblinear'], 'max_iter':[10, 50, 100]}, 
              {'penalty':['l2', 'none'], 'C':[100, 10, 1.0, 0.1, 0.01],
             'solver': ['sag', 'saga', 'newton-cg'], 'max_iter':[10, 50, 100]}]
closeness_80_lgr, closeness_80_lgr_proba, closeness_80_cv, closeness_80_n_fs = classifiers.reduced_classifier_multiple_fs(clf, parameters, ppi80_closeness_list, closeness_80_test_index, closeness_80_fs_ar, label_list)
closeness_80_lgr.to_csv('../../models/GAP-MINE/disease/binary/disease_closeness_80_lgr.csv', index=False)
closeness_80_lgr_proba.to_csv('../../models/GAP-MINE/disease/probability/disease_closeness_80_lgr.csv', index=False)

In [None]:
clf = LogisticRegression(random_state=22)
parameters = [{'penalty':['l1','l2'], 'C':[100, 10, 1.0, 0.1, 0.01],
             'solver': ['liblinear'], 'max_iter':[10, 50, 100]}, 
              {'penalty':['l2', 'none'], 'C':[100, 10, 1.0, 0.1, 0.01],
             'solver': ['sag', 'saga', 'newton-cg'], 'max_iter':[10, 50, 100]}]
rwr_80_lgr, rwr_80_lgr_proba, rwr_80_cv, rwr_80_n_fs = classifiers.reduced_classifier_multiple_fs(clf, parameters, ppi80_rwr_list, rwr_80_test_index, rwr_80_fs_ar, label_list)
rwr_80_lgr.to_csv('../../models/GAP-MINE/disease/binary/disease_rwr_80_lgr.csv', index=False)
rwr_80_lgr_proba.to_csv('../../models/GAP-MINE/disease/probability/disease_rwr_80_lgr.csv', index=False)

In [None]:
importlib.reload(classifiers)
clf = LogisticRegression(random_state=22)
parameters = [{'penalty':['l1','l2'], 'C':[100, 10, 1.0, 0.1, 0.01],
             'solver': ['liblinear'], 'max_iter':[10, 50, 100]}, 
              {'penalty':['l2', 'none'], 'C':[100, 10, 1.0, 0.1, 0.01],
             'solver': ['sag', 'saga', 'newton-cg'], 'max_iter':[10, 50, 100]}]
fraction_betweenness_80_lgr, fraction_betweenness_80_lgr_proba, fraction_betweenness_80_cv, fraction_betweenness_80_n_fs = classifiers.reduced_classifier_multiple_fs(clf, parameters, ppi80_fraction_betweenness_list, fraction_betweenness_80_test_index, fraction_betweenness_80_fs_ar, label_list)
fraction_betweenness_80_lgr.to_csv('../../models/GAP-MINE/disease/binary/disease_fraction_betweenness_80_lgr.csv', index=False)
fraction_betweenness_80_lgr_proba.to_csv('../../models/GAP-MINE/disease/probability/disease_fraction_betweenness_80_lgr.csv', index=False)

In [None]:
importlib.reload(classifiers)
clf = LogisticRegression(random_state=22)
parameters = [{'penalty':['l1','l2'], 'C':[100, 10, 1.0, 0.1, 0.01],
             'solver': ['liblinear'], 'max_iter':[10, 50, 100]}, 
              {'penalty':['l2', 'none'], 'C':[100, 10, 1.0, 0.1, 0.01],
             'solver': ['sag', 'saga', 'newton-cg'], 'max_iter':[10, 50, 100]}]
betweenness_80_lgr, betweenness_80_lgr_proba, betweenness_80_cv, betweenness_80_n_fs = classifiers.reduced_classifier_multiple_fs(clf, parameters, ppi80_betweenness_list, betweenness_80_test_index, betweenness_80_fs_ar, label_list)
betweenness_80_lgr.to_csv('../../models/GAP-MINE/disease/disease_betweenness_80_lgr.csv', index=False)
betweenness_80_lgr_proba.to_csv('../../models/GAP-MINE/disease/probability/disease_betweenness_80_lgr.csv', index=False)

#### 2.2.1.2. Conservative Module

In [None]:
importlib.reload(classifiers)
clf = LogisticRegression(random_state=22)
parameters = [{'penalty':['l1','l2'], 'C':[100, 10, 1.0, 0.1, 0.01],
             'solver': ['liblinear'], 'max_iter':[10, 50, 100]}, 
              {'penalty':['l2', 'none'], 'C':[100, 10, 1.0, 0.1, 0.01],
             'solver': ['sag', 'saga', 'newton-cg'], 'max_iter':[10, 50, 100]}]
hyper_80_lgr_conservative, hyper_80_lgr_proba_conservative, hyper_80_cv_conservative, hyper_80_n_fs_conservative = classifiers.reduced_classifier_multiple_fs(clf, parameters, ppi80_hyper_list_conservative, hypergeometric_80_test_index_conservative, hypergeometric_80_fs_ar_conservative, label_list_conservative)
hyper_80_lgr_conservative.to_csv('../../models/GAP-MINE/disease/binary/disease_hypergeometric_80_lgr_conservative.csv', index=False)
hyper_80_lgr_proba_conservative.to_csv('../../models/GAP-MINE/disease/probability/disease_hypergeometric_80_lgr_conservative.csv', index=False)

In [None]:
clf = LogisticRegression(random_state=22)
parameters = [{'penalty':['l1','l2'], 'C':[100, 10, 1.0, 0.1, 0.01],
             'solver': ['liblinear'], 'max_iter':[10, 50, 100]}, 
              {'penalty':['l2', 'none'], 'C':[100, 10, 1.0, 0.1, 0.01],
             'solver': ['sag', 'saga', 'newton-cg'], 'max_iter':[10, 50, 100]}]
closeness_80_lgr_conservative, closeness_80_lgr_proba_conservative, closeness_80_cv_conservative, closeness_80_n_fs_conservative = classifiers.reduced_classifier_multiple_fs(clf, parameters, ppi80_closeness_list_conservative, closeness_80_test_index_conservative, closeness_80_fs_ar_conservative, label_list_conservative)
closeness_80_lgr_conservative.to_csv('../../models/GAP-MINE/disease/binary/disease_closeness_80_lgr_conservative.csv', index=False)
closeness_80_lgr_proba_conservative.to_csv('../../models/GAP-MINE/disease/probability/disease_closeness_80_lgr_conservative.csv', index=False)

In [None]:
clf = LogisticRegression(random_state=22)
parameters = [{'penalty':['l1','l2'], 'C':[100, 10, 1.0, 0.1, 0.01],
             'solver': ['liblinear'], 'max_iter':[10, 50, 100]}, 
              {'penalty':['l2', 'none'], 'C':[100, 10, 1.0, 0.1, 0.01],
             'solver': ['sag', 'saga', 'newton-cg'], 'max_iter':[10, 50, 100]}]
rwr_80_lgr_conservative, rwr_80_lgr_proba_conservative, rwr_80_cv_conservative, rwr_80_n_fs_conservative = classifiers.reduced_classifier_multiple_fs(clf, parameters, ppi80_rwr_list_conservative, rwr_80_test_index_conservative, rwr_80_fs_ar_conservative, label_list_conservative)
rwr_80_lgr_conservative.to_csv('../../models/GAP-MINE/disease/binary/disease_rwr_80_lgr_conservative.csv', index=False)
rwr_80_lgr_proba_conservative.to_csv('../../models/GAP-MINE/disease/probability/disease_rwr_80_lgr_conservative.csv', index=False)

In [None]:
importlib.reload(classifiers)
clf = LogisticRegression(random_state=22)
parameters = [{'penalty':['l1','l2'], 'C':[100, 10, 1.0, 0.1, 0.01],
             'solver': ['liblinear'], 'max_iter':[10, 50, 100]}, 
              {'penalty':['l2', 'none'], 'C':[100, 10, 1.0, 0.1, 0.01],
             'solver': ['sag', 'saga', 'newton-cg'], 'max_iter':[10, 50, 100]}]
fraction_betweenness_80_lgr_conservative, fraction_betweenness_80_lgr_proba_conservative, fraction_betweenness_80_cv_conservative, fraction_betweenness_80_n_fs_conservative = classifiers.reduced_classifier_multiple_fs(clf, parameters, ppi80_fraction_betweenness_list_conservative, fraction_betweenness_80_test_index_conservative, fraction_betweenness_80_fs_ar_conservative, label_list_conservative)
fraction_betweenness_80_lgr_conservative.to_csv('../../models/GAP-MINE/disease/binary/disease_fraction_betweenness_80_lgr_conservative.csv', index=False)
fraction_betweenness_80_lgr_proba_conservative.to_csv('../../models/GAP-MINE/disease/probability/disease_fraction_betweenness_80_lgr_conservative.csv', index=False)

In [None]:
importlib.reload(classifiers)
clf = LogisticRegression(random_state=22)
parameters = [{'penalty':['l1','l2'], 'C':[100, 10, 1.0, 0.1, 0.01],
             'solver': ['liblinear'], 'max_iter':[10, 50, 100]}, 
              {'penalty':['l2', 'none'], 'C':[100, 10, 1.0, 0.1, 0.01],
             'solver': ['sag', 'saga', 'newton-cg'], 'max_iter':[10, 50, 100]}]
betweenness_80_lgr_conservative, betweenness_80_lgr_proba_conservative, betweenness_80_cv_conservative, betweenness_80_n_fs_conservative = classifiers.reduced_classifier_multiple_fs(clf, parameters, ppi80_betweenness_list_conservative, betweenness_80_test_index_conservative, betweenness_80_fs_ar_conservative, label_list_conservative)
betweenness_80_lgr_conservative.to_csv('../../models/GAP-MINE/disease/disease_betweenness_80_lgr_conservative.csv', index=False)
betweenness_80_lgr_proba_conservative.to_csv('../../models/GAP-MINE/disease/probability/disease_betweenness_80_lgr_conservative.csv', index=False)

### 2.2.2. Protein 80% Reduction

#### 2.2.2.1. Steiner Tree

In [None]:
importlib.reload(classifiers)
clf = LogisticRegression(random_state=22)
parameters = [{'penalty':['l1','l2'], 'C':[100, 10, 1.0, 0.1, 0.01],
             'solver': ['liblinear'], 'max_iter':[10, 50, 100]}, 
              {'penalty':['l2', 'none'], 'C':[100, 10, 1.0, 0.1, 0.01],
             'solver': ['sag', 'saga', 'newton-cg'], 'max_iter':[10, 50, 100]}]
hyper_protein80_lgr, hyper_protein80_lgr_proba, hyper_protein80_cv, hyper_protein80_n_fs = classifiers.reduced_classifier_multiple_fs(clf, parameters, protein80_hyper_list, hypergeometric_protein80_test_index, hypergeometric_protein80_fs_ar, label_list_protein80)
hyper_protein80_lgr.to_csv('../../models/GAP-MINE/disease/binary/disease_hypergeometric_protein80_lgr.csv', index=False)
hyper_protein80_lgr_proba.to_csv('../../models/GAP-MINE/disease/probability/disease_hypergeometric_protein80_lgr.csv', index=False)

In [None]:
clf = LogisticRegression(random_state=22)
parameters = [{'penalty':['l1','l2'], 'C':[100, 10, 1.0, 0.1, 0.01],
             'solver': ['liblinear'], 'max_iter':[10, 50, 100]}, 
              {'penalty':['l2', 'none'], 'C':[100, 10, 1.0, 0.1, 0.01],
             'solver': ['sag', 'saga', 'newton-cg'], 'max_iter':[10, 50, 100]}]
closeness_protein80_lgr, closeness_protein80_lgr_proba, closeness_protein80_cv, closeness_protein80_n_fs = classifiers.reduced_classifier_multiple_fs(clf, parameters, protein80_closeness_list, closeness_protein80_test_index, closeness_protein80_fs_ar, label_list_protein80)
closeness_protein80_lgr.to_csv('../../models/GAP-MINE/disease/binary/disease_closeness_protein80_lgr.csv', index=False)
closeness_protein80_lgr_proba.to_csv('../../models/GAP-MINE/disease/probability/disease_closeness_protein80_lgr.csv', index=False)

In [None]:
importlib.reload(classifiers)
clf = LogisticRegression(random_state=22)
parameters = [{'penalty':['l1','l2'], 'C':[100, 10, 1.0, 0.1, 0.01],
             'solver': ['liblinear'], 'max_iter':[10, 50, 100]}, 
              {'penalty':['l2', 'none'], 'C':[100, 10, 1.0, 0.1, 0.01],
             'solver': ['sag', 'saga', 'newton-cg'], 'max_iter':[10, 50, 100]}]
rwr_protein80_lgr, rwr_protein80_lgr_proba, rwr_protein80_cv, rwr_protein80_n_fs = classifiers.reduced_classifier_multiple_fs(clf, parameters, protein80_rwr_list, rwr_protein80_test_index, rwr_protein80_fs_ar, label_list_protein80)
rwr_protein80_lgr.to_csv('../../models/GAP-MINE/disease/binary/disease_rwr_protein80_lgr.csv', index=False)
rwr_protein80_lgr_proba.to_csv('../../models/GAP-MINE/disease/probability/disease_rwr_protein80_lgr.csv', index=False)

In [None]:
importlib.reload(classifiers)
clf = LogisticRegression(random_state=22)
parameters = [{'penalty':['l1','l2'], 'C':[100, 10, 1.0, 0.1, 0.01],
             'solver': ['liblinear'], 'max_iter':[10, 50, 100]}, 
              {'penalty':['l2', 'none'], 'C':[100, 10, 1.0, 0.1, 0.01],
             'solver': ['sag', 'saga', 'newton-cg'], 'max_iter':[10, 50, 100]}]
fraction_betweenness_protein80_lgr, fraction_betweenness_protein80_lgr_proba, fraction_betweenness_protein80_cv, fraction_betweenness_protein80_n_fs = classifiers.reduced_classifier_multiple_fs(clf, parameters, protein80_fraction_betweenness_list, fraction_betweenness_protein80_test_index, fraction_betweenness_protein80_fs_ar, label_list_protein80)
fraction_betweenness_protein80_lgr.to_csv('../../models/GAP-MINE/disease/binary/disease_fraction_betweenness_protein80_lgr.csv', index=False)
fraction_betweenness_protein80_lgr_proba.to_csv('../../models/GAP-MINE/disease/probability/disease_fraction_betweenness_protein80_lgr.csv', index=False)

In [None]:
importlib.reload(classifiers)
clf = LogisticRegression(random_state=22)
parameters = [{'penalty':['l1','l2'], 'C':[100, 10, 1.0, 0.1, 0.01],
             'solver': ['liblinear'], 'max_iter':[10, 50, 100]}, 
              {'penalty':['l2', 'none'], 'C':[100, 10, 1.0, 0.1, 0.01],
             'solver': ['sag', 'saga', 'newton-cg'], 'max_iter':[10, 50, 100]}]
betweenness_protein80_lgr, betweenness_protein80_lgr_proba, betweenness_protein80_cv, betweenness_protein80_n_fs = classifiers.reduced_classifier_multiple_fs(clf, parameters, protein80_betweenness_list, betweenness_protein80_test_index, betweenness_protein80_fs_ar, label_list_protein80)
betweenness_protein80_lgr.to_csv('../../models/GAP-MINE/disease/binary/disease_betweenness_protein80_lgr.csv', index=False)
betweenness_protein80_lgr_proba.to_csv('../../models/GAP-MINE/disease/probability/disease_betweenness_protein80_lgr.csv', index=False)

#### 2.2.2.2. Conservative Module

In [None]:
importlib.reload(classifiers)
clf = LogisticRegression(random_state=22)
parameters = [{'penalty':['l1','l2'], 'C':[100, 10, 1.0, 0.1, 0.01],
             'solver': ['liblinear'], 'max_iter':[10, 50, 100]}, 
              {'penalty':['l2', 'none'], 'C':[100, 10, 1.0, 0.1, 0.01],
             'solver': ['sag', 'saga', 'newton-cg'], 'max_iter':[10, 50, 100]}]
hyper_protein80_lgr_conservative, hyper_protein80_lgr_proba_conservative, hyper_protein80_cv_conservative, hyper_protein80_n_fs_conservative = classifiers.reduced_classifier_multiple_fs(clf, parameters, protein80_hyper_list_conservative, hypergeometric_protein80_test_index_conservative, hypergeometric_protein80_fs_ar_conservative, label_list_protein80_conservative)
hyper_protein80_lgr_conservative.to_csv('../../models/GAP-MINE/disease/binary/disease_hypergeometric_protein80_lgr_conservative.csv', index=False)
hyper_protein80_lgr_proba_conservative.to_csv('../../models/GAP-MINE/disease/probability/disease_hypergeometric_protein80_lgr_conservative.csv', index=False)

In [None]:
clf = LogisticRegression(random_state=22)
parameters = [{'penalty':['l1','l2'], 'C':[100, 10, 1.0, 0.1, 0.01],
             'solver': ['liblinear'], 'max_iter':[10, 50, 100]}, 
              {'penalty':['l2', 'none'], 'C':[100, 10, 1.0, 0.1, 0.01],
             'solver': ['sag', 'saga', 'newton-cg'], 'max_iter':[10, 50, 100]}]
closeness_protein80_lgr_conservative, closeness_protein80_lgr_proba_conservative, closeness_protein80_cv_conservative, closeness_protein80_n_fs_conservative = classifiers.reduced_classifier_multiple_fs(clf, parameters, protein80_closeness_list_conservative, closeness_protein80_test_index_conservative, closeness_protein80_fs_ar_conservative, label_list_protein80_conservative)
closeness_protein80_lgr_conservative.to_csv('../../models/GAP-MINE/disease/binary/disease_closeness_protein80_lgr_conservative.csv', index=False)
closeness_protein80_lgr_proba_conservative.to_csv('../../models/GAP-MINE/disease/probability/disease_closeness_protein80_lgr_conservative.csv', index=False)

In [None]:
clf = LogisticRegression(random_state=22)
parameters = [{'penalty':['l1','l2'], 'C':[100, 10, 1.0, 0.1, 0.01],
             'solver': ['liblinear'], 'max_iter':[10, 50, 100]}, 
              {'penalty':['l2', 'none'], 'C':[100, 10, 1.0, 0.1, 0.01],
             'solver': ['sag', 'saga', 'newton-cg'], 'max_iter':[10, 50, 100]}]
rwr_protein80_lgr_conservative, rwr_protein80_lgr_proba_conservative, rwr_protein80_cv_conservative, rwr_protein80_n_fs_conservative = classifiers.reduced_classifier_multiple_fs(clf, parameters, protein80_rwr_list_conservative, rwr_protein80_test_index_conservative, rwr_protein80_fs_ar_conservative, label_list_protein80_conservative)
rwr_protein80_lgr_conservative.to_csv('../../models/GAP-MINE/disease/binary/disease_rwr_protein80_lgr_conservative.csv', index=False)
rwr_protein80_lgr_proba_conservative.to_csv('../../models/GAP-MINE/disease/probability/disease_rwr_protein80_lgr_conservative.csv', index=False)

In [None]:
importlib.reload(classifiers)
clf = LogisticRegression(random_state=22)
parameters = [{'penalty':['l1','l2'], 'C':[100, 10, 1.0, 0.1, 0.01],
             'solver': ['liblinear'], 'max_iter':[10, 50, 100]}, 
              {'penalty':['l2', 'none'], 'C':[100, 10, 1.0, 0.1, 0.01],
             'solver': ['sag', 'saga', 'newton-cg'], 'max_iter':[10, 50, 100]}]
fraction_betweenness_protein80_lgr_conservative, fraction_betweenness_protein80_lgr_proba_conservative, fraction_betweenness_protein80_cv_conservative, fraction_betweenness_protein80_n_fs_conservative = classifiers.reduced_classifier_multiple_fs(clf, parameters, protein80_fraction_betweenness_list_conservative, fraction_betweenness_protein80_test_index_conservative, fraction_betweenness_protein80_fs_ar_conservative, label_list_protein80_conservative)
fraction_betweenness_protein80_lgr_conservative.to_csv('../../models/GAP-MINE/disease/binary/disease_fraction_betweenness_protein80_lgr_conservative.csv', index=False)
fraction_betweenness_protein80_lgr_proba_conservative.to_csv('../../models/GAP-MINE/disease/probability/disease_fraction_betweenness_protein80_lgr_conservative.csv', index=False)

In [None]:
importlib.reload(classifiers)
clf = LogisticRegression(random_state=22)
parameters = [{'penalty':['l1','l2'], 'C':[100, 10, 1.0, 0.1, 0.01],
             'solver': ['liblinear'], 'max_iter':[10, 50, 100]}, 
              {'penalty':['l2', 'none'], 'C':[100, 10, 1.0, 0.1, 0.01],
             'solver': ['sag', 'saga', 'newton-cg'], 'max_iter':[10, 50, 100]}]
betweenness_protein80_lgr_conservative, betweenness_protein80_lgr_proba_conservative, betweenness_protein80_cv_conservative, betweenness_protein80_n_fs_conservative = classifiers.reduced_classifier_multiple_fs(clf, parameters, protein80_betweenness_list_conservative, betweenness_protein80_test_index_conservative, betweenness_protein80_fs_ar_conservative, label_list_protein80_conservative)
betweenness_protein80_lgr_conservative.to_csv('../../models/GAP-MINE/disease/binary/disease_betweenness_protein80_lgr_conservative.csv', index=False)
betweenness_protein80_lgr_proba_conservative.to_csv('../../models/GAP-MINE/disease/probability/disease_betweenness_protein80_lgr_conservative.csv', index=False)