# 0. Imports

In [1]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LogisticRegression
from tqdm.notebook import tqdm
from tqdm import tqdm_notebook
import classifiers
import importlib
import json
import pickle
import warnings
warnings.filterwarnings("ignore")
import joblib

In [2]:
%%javascript
(function(on) {
const e=$( "<a>Setup failed</a>" );
const ns="js_jupyter_suppress_warnings";
var cssrules=$("#"+ns);
if(!cssrules.length) cssrules = $("<style id='"+ns+"' type='text/css'>div.output_stderr { } </style>").appendTo("head");
e.click(function() {
    var s='Showing';  
    cssrules.empty()
    if(on) {
        s='Hiding';
        cssrules.append("div.output_stderr, div[data-mime-type*='.stderr'] { display:none; }");
    }
    e.text(s+' warnings (click to toggle)');
    on=!on;
}).click();
$(element).append(e);
})(true);

<IPython.core.display.Javascript object>

# 1. Data Load

In [2]:
reactome_rwr_df = pd.read_csv("../../data/processed/metrics/process_rwr_string.csv", sep=',', header=0, index_col=0)
reactome_rwr = np.array(reactome_rwr_df.transpose())

In [3]:
disgenet_rwr_df = pd.read_csv("../../data/processed/metrics/disease_rwr_string.csv", sep=',', header=0, index_col=0)
disgenet_rwr = np.array(disgenet_rwr_df.transpose())
disgenet_conservative_rwr_df = pd.read_csv("../../data/processed/metrics/disease_rwr_conservative_string.csv", sep=',', header=0, index_col=0)
disgenet_conservative_rwr = np.array(disgenet_conservative_rwr_df.transpose())

In [4]:
reactome_labels_df = pd.read_csv('../../data/processed/reactome_labels_string.csv', header = None)
reactome_labels = reactome_labels_df.transpose().to_numpy(dtype='int')
reactome_labels_df.index = reactome_rwr_df.index
reactome_labels_df.columns = reactome_rwr_df.columns

In [5]:
disgenet_labels_df = pd.read_csv('../../data/processed/disgenet_sca_labels_string.csv', header = None)
disgenet_labels = disgenet_labels_df.transpose().to_numpy(dtype='int')
disgenet_labels_df.index = disgenet_rwr_df.index
disgenet_labels_df.columns = disgenet_rwr_df.columns

disgenet_labels_conservative_df = pd.read_csv('../../data/processed/disgenet_conservative_labels_string.csv', header = None)
disgenet_labels_conservative = disgenet_labels_conservative_df.transpose().to_numpy(dtype='int')
disgenet_labels_conservative_df.index = disgenet_conservative_rwr_df.index
disgenet_labels_conservative_df.columns = disgenet_conservative_rwr_df.columns

In [6]:
reactome_rwr_fs_ar = pd.read_csv("../../data/processed/fs/reactome_rwr_fs_string.csv", sep=',', header=0)
reactome_rwr_test_index = pd.read_csv("../../data/processed/fs/reactome_rwr_test_string.csv", sep=',', header=0).transpose().to_numpy(dtype='int')-1

In [7]:
disgenet_rwr_fs_ar = pd.read_csv("../../data/processed/fs/disease/disease_rwr_fs_string.csv", sep=',', header=0)
disgenet_rwr_test_index = pd.read_csv("../../data/processed/fs/disease/disease_rwr_test_string.csv", sep=',', header=0).transpose().to_numpy(dtype='int')-1

disgenet_conservative_rwr_fs_ar = pd.read_csv("../../data/processed/fs/disease/disease_rwr_fs_conservative_string.csv", sep=',', header=0)
disgenet_conservative_rwr_test_index = pd.read_csv("../../data/processed/fs/disease/disease_rwr_test_conservative_string.csv", sep=',', header=0).transpose().to_numpy(dtype='int')-1

# 2. Classification Tasks

## 2.1. Complete Network

In [None]:
importlib.reload(classifiers)
clf = LogisticRegression(random_state=22)
parameters = [{'penalty':['l1','l2'], 'C':[100, 10, 1.0, 0.1, 0.01],
             'solver': ['liblinear'], 'max_iter':[50, 100, 200, 500]}, 
              {'penalty':['l2', 'none'], 'C':[100, 10, 1.0, 0.1, 0.01],
             'solver': ['sag', 'saga', 'newton-cg'], 'max_iter':[100, 250, 500]}]
rwr_lgr_clf, rwr_lgr_proba_clf, rwr_cv, rwr_n_fs, rwr_models = classifiers.multiple_fs_classifier(clf, parameters, reactome_rwr, reactome_rwr_test_index, reactome_rwr_fs_ar, reactome_labels_df, jobs=6)
rwr_lgr_clf.to_csv('../../models/GAP-MINE/STRING/binary/process_rwr_lgr.csv', index=False)
rwr_lgr_proba_clf.to_csv('../../models/GAP-MINE/STRING/probability/process_rwr_lgr_proba.csv', index=False)
with open('../../models/GAP-MINE/STRING/cv_results/process_rwr.txt', 'w') as fp:
    fp.write('\n'.join('%s %s' % x for x in rwr_cv))
with open("../../models/GAP-MINE/STRING/n_fs/process_rwr.txt", "w") as f:
    for s in rwr_n_fs:
        f.write(str(s) +"\n")
with open("../../models/GAP-MINE/STRING/models/process_rwr.pckl", "wb") as f:
    for model in rwr_models:
         joblib.dump(model, f)

In [None]:
clf = LogisticRegression(random_state=22)
parameters = [{'penalty':['l1','l2'], 'C':[100, 10, 1.0, 0.1, 0.01],
             'solver': ['liblinear'], 'max_iter':[10, 50, 100, 500]}, 
              {'penalty':['l2', 'none'], 'C':[100, 10, 1.0, 0.1, 0.01],
             'solver': ['sag', 'saga', 'newton-cg'], 'max_iter':[10, 50, 100, 500]}]
disease_rwr_lgr_clf, disease_rwr_lgr_proba_clf, disease_rwr_cv, disease_rwr_n_fs, disease_rwr_models = classifiers.multiple_fs_classifier(clf, parameters, disgenet_rwr, disgenet_rwr_test_index, disgenet_rwr_fs_ar, disgenet_labels_df, jobs=6)
disease_rwr_lgr_clf.to_csv('../../models/GAP-MINE/STRING/binary/disease_rwr_lgr.csv', index=False)
disease_rwr_lgr_proba_clf.to_csv('../../models/GAP-MINE/STRING/probability/disease_rwr_lgr_proba.csv', index=False)
with open('../../models/GAP-MINE/STRING/cv_results/disease_rwr.txt', 'w') as fp:
    fp.write('\n'.join('%s %s' % x for x in disease_rwr_cv))
with open("../../models/GAP-MINE/STRING/n_fs/disease_rwr.txt", "w") as f:
    for s in disease_rwr_n_fs:
        f.write(str(s) +"\n")
with open("../../models/GAP-MINE/STRING/models/disease_rwr.pckl", "wb") as f:
    for model in disease_rwr_models:
         pickle.dump(model, f)

In [None]:
clf = LogisticRegression(random_state=22)
parameters = [{'penalty':['l1','l2'], 'C':[100, 10, 1.0, 0.1, 0.01],
             'solver': ['liblinear'], 'max_iter':[10, 50, 100, 500]}, 
              {'penalty':['l2', 'none'], 'C':[100, 10, 1.0, 0.1, 0.01],
             'solver': ['sag', 'saga', 'newton-cg'], 'max_iter':[10, 50, 100, 500]}]
disease_conservative_rwr_lgr_clf, disease_conservative_rwr_lgr_proba_clf, disease_conservative_rwr_cv, disease_conservative_rwr_n_fs, disease_conservative_rwr_models = classifiers.multiple_fs_classifier(clf, parameters, disgenet_conservative_rwr, disgenet_conservative_rwr_test_index, disgenet_conservative_rwr_fs_ar, disgenet_labels_conservative_df, jobs=6)
disease_conservative_rwr_lgr_clf.to_csv('../../models/GAP-MINE/STRING/binary/disease_conservative_rwr_lgr.csv', index=False)
disease_conservative_rwr_lgr_proba_clf.to_csv('../../models/GAP-MINE/STRING/probability/disease_conservative_rwr_lgr_proba.csv', index=False)
with open('../../models/GAP-MINE/STRING/cv_results/disease_conservative_rwr.txt', 'w') as fp:
    fp.write('\n'.join('%s %s' % x for x in disease_conservative_rwr_cv))
with open("../../models/GAP-MINE/STRING/n_fs/disease_conservative_rwr.txt", "w") as f:
    for s in disease_conservative_rwr_n_fs:
        f.write(str(s) +"\n")
with open("../../models/GAP-MINE/STRING/models/disease_conservative_rwr.pckl", "wb") as f:
    for model in disease_conservative_rwr_models:
         pickle.dump(model, f)