In [5]:
# import python packages
import pandas as pd
import numpy as np

# import src code
import ase_evaluation as ae
#import benchmark_posteriors as bp
import bootstrap as btstrp
import cross_validation as cv
import logistic_regression as lr
import naive_bayes as nb
import network as ntwk
import process as prcs
#import RIVER as rvr
import simulate_data as sim
from scipy import interp

import matplotlib.pyplot as plt
import seaborn as sns
import benchmark_posteriors as bnchmk
import sklearn

In [None]:
test_data = []
models = ['SPEER', 'SPEER without transfer', 'tissue specific genome only', 'shared tissue genome only']
fpr, tpr, auc, mean_tpr, mean_fpr, mean_auc = {}, {}, {}, {}, {}, {}
for i in range(2):
    # generate simulated data
    s = sim.SimulateData("./test_output/", 'with_transfer', 0.4, 0.6, 0.01)
    s._run()
    
    # create a process object
    p = prcs.Process('./test_output', 0.1)
    p._process_simulated_data()
    # run SPEER
    n = ntwk.Network(p.train_list, p.test_list, p.tissues, p.genomic_features, 
                 with_transfer=True, output_dir="SPEER_output", 
                 lambda_hp_parent = None,
                 lambda_hp_children_dict = None,
                 e_distribution = 'cat')
    train_list, test_list, beta_parent, beta_children, phi = n.run()
    
    # run SPEER without transfer
    lambda_hp_children_dict = {'brain': 0.01, 'group1': 0.01, 'muscle': 0.01, 'epithelial': 0.01, 'digestive': 0.01}
    n = ntwk.Network(train_list, test_list, p.tissues, p.genomic_features, 
                     with_transfer=False, output_dir="SPEER_output", 
                     lambda_hp_parent = None, 
                     lambda_hp_children_dict = lambda_hp_children_dict, 
                     e_distribution = 'cat')
    train_list, test_list, beta_parent, beta_children, phi = n.run()
    
    # add benchmarks 
    bn = bnchmk.BenchmarkPosteriors(train_list, test_list, p.genomic_features)
    train_list, test_list = bn.fit_models() 
    
    for model in models:
        mean_tpr[model] = {}
        mean_fpr[model] = {}
        mean_auc[model] = {}

        auc = 0
        fpr_local, tpr_local, auc_local = {}, {}, {}
        # for each tissue
        for j in range(len(test_list)):
            fpr_local[j], tpr_local[j], _ = sklearn.metrics.roc_curve(test_list[j]["z_label"], test_list[j][model])
            auc_local[j] = sklearn.metrics.roc_auc_score(test_list[j]["z_label"], test_list[j][model])
        mean_tpr[model][i] = 0.0
        mean_fpr[model][i] = np.linspace(0,1,100)
        for j in range(len(test_list)):
            mean_tpr[model][i] += interp(mean_fpr[model][i], fpr_local[j], tpr_local[j])
            mean_tpr[model][i][0] = 0.0
        mean_tpr[model][i] /= len(test_list)
        mean_tpr[model][i][-1] = 1.0
        mean_auc[model][i] = sklearn.metrics.auc(mean_fpr[model][i], mean_tpr[model][i])
    #test_data.append(test_list)
    print(i)

In [None]:
%matplotlib inline
fig = plt.figure(figsize=(10,10))
num_sims = 1
colors = ['green', 'blue', 'red', 'yellow', 'grey']
for ind,model in enumerate(models):
    mean_tpr_global = 0.0
    mean_fpr_global = np.linspace(0,1,100)
    for i in range(num_sims):
        fpr = mean_fpr[model][i]
        tpr = mean_tpr[model][i]
        ax = plt.plot(fpr, tpr, linewidth=1, c=colors[ind])

        
        mean_tpr_global += interp(mean_fpr_global, fpr, tpr)
        mean_tpr_global[0] = 0.0
        
    mean_tpr_global /= num_sims
    mean_tpr_global[-1] = 1.0
    auc_global = sklearn.metrics.auc(mean_fpr_global, mean_tpr_global)
    ax = plt.plot(mean_fpr_global, mean_tpr_global, label=' {0:0.3f}'
              ''.format(auc_global), linewidth=3, c=colors[ind])

ax = plt.plot([0,1], [0,1], 'k--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate', fontsize=20)
sns.set_context("paper")
sns.set_palette("deep")
sns.set(font='serif')
sns.set_style("white", {"font.family": "serif", "font.serif": ["Times", "Palatino", "serif"]})
#ax.spines['top'].set_visible(False)
#ax.spines['right'].set_visible(False)
plt.ylabel('True Positive Rate', fontsize=20)
#plt.title('Receiver operating characteristic')
plt.legend(loc="lower right", frameon=False, prop={'size':16})
plt.tight_layout()