In [7]:
import pandas as pd
from ast import literal_eval
import numpy as np
from igraph import Graph
import plotly.graph_objects as go
from tqdm.notebook import tqdm

In [8]:
disgenet_proteins_indexes_df = pd.read_csv("../../data/processed/disgenet_prot_index_main_comp.csv", sep=',', header=0)
disease_mappings = pd.read_csv("../../data/processed/disease_mappings.tsv", sep='\t')
disgenet = pd.read_csv('../../data/raw/curated_gene_disease_associations.tsv', sep='\t')
reactome_proteins_indexes_df = pd.read_csv("../../data/processed/reactome_proteins_indexes_apid_huri.csv", sep=',', header=0)
graph_apid_huri = Graph.Read_GML("../../data/processed/graph_apid_huri")


In [9]:
disgenet_proteins_indexes_df['conservative_module_ids'] = disgenet_proteins_indexes_df['conservative_module_ids'].apply(literal_eval)
disgenet_sca_proteins_indexes_df = disgenet_proteins_indexes_df[disgenet_proteins_indexes_df['increase']<0.4].reset_index()
disgenet_sca_proteins_indexes_df['main_component_ids'] = disgenet_sca_proteins_indexes_df['main_component_ids'].apply(literal_eval)
reactome_proteins_indexes_df['proteins_ids'] = reactome_proteins_indexes_df['proteins_ids'].apply(literal_eval)

In [10]:
process_rwr_whole = pd.read_csv("../../models/GAP-MINE/process/probability/rwr_whole.csv")
process_rwr_whole['new_proteins'] = process_rwr_whole['new_proteins'].apply(literal_eval)
disease_rwr_whole = pd.read_csv("../../models/GAP-MINE/disease/probability/rwr_whole.csv")
disease_rwr_whole['new_proteins'] = disease_rwr_whole['new_proteins'].apply(literal_eval)
disease_conservative_rwr_whole = pd.read_csv("../../models/GAP-MINE/disease/probability/rwr_conservative_whole.csv")
disease_conservative_rwr_whole['new_proteins'] = disease_conservative_rwr_whole['new_proteins'].apply(literal_eval)

In [11]:
disease_conservative_rwr_whole['process'] = disgenet_proteins_indexes_df['process']
disease_rwr_whole['process'] = disgenet_sca_proteins_indexes_df['process']
process_rwr_whole['process'] = reactome_proteins_indexes_df['process']

In [12]:
phenotype = pd.read_csv('../../data/raw/phenotype.hpoa', sep='\t', header=4, low_memory=False)
genes_to_phenotype = pd.read_csv('../../data/raw/genes_to_phenotype.txt', sep='\t', header=0, names=['entrez-gene-id','entrez-gene-symbol','HPO-Term-ID','HPO-Term-Name','Frequency-Raw','Frequency-HPO','Additional Info from G-D source','G-D source','disease-ID for link'])

In [13]:
phenotype['type'] = phenotype['#DatabaseID'].apply(lambda row: row.split(':')[0])

In [14]:
disease_mappings = disease_mappings[disease_mappings['vocabulary'].isin(['OMIM'])]
disease_mappings.drop_duplicates(subset=['name'], inplace=True)

In [15]:
disease_conservative_rwr_whole = disease_conservative_rwr_whole.merge(disease_mappings, how='left', left_on='process', right_on='diseaseId')
disease_rwr_whole = disease_rwr_whole.merge(disease_mappings, how='left', left_on='process', right_on='diseaseId')

In [16]:
disease_conservative_rwr_whole['has_omim'] = ~disease_conservative_rwr_whole['code'].isna()
disease_rwr_whole['has_omim'] = ~disease_rwr_whole['code'].isna()

In [17]:
disease_conservative_rwr_whole['id'] = disease_conservative_rwr_whole.apply(lambda row: ':'.join(row[['vocabulary', 'code']].values) if row['has_omim']==True else np.nan, axis=1)
disease_rwr_whole['id'] = disease_rwr_whole.apply(lambda row: ':'.join(row[['vocabulary', 'code']].values) if row['has_omim']==True else np.nan, axis=1)

In [18]:
filtered_phenotypes = phenotype[phenotype['#DatabaseID'].isin(disease_conservative_rwr_whole['id'])][['#DatabaseID','DiseaseName','HPO_ID']]
filtered_phenotypes = filtered_phenotypes.merge(genes_to_phenotype, how='left', right_on='HPO-Term-ID', left_on='HPO_ID')[['#DatabaseID', 'DiseaseName','HPO_ID', 'HPO-Term-Name']]

In [19]:
genes_to_phenotype = genes_to_phenotype[genes_to_phenotype['HPO-Term-ID'].isin(filtered_phenotypes['HPO_ID'])]

In [20]:
filtered_phenotypes.drop_duplicates(inplace=True)
genes_to_phenotype.drop_duplicates(subset=['entrez-gene-symbol', 'HPO-Term-ID'], inplace=True)

In [21]:
process_rwr_whole['len_new_proteins'] = process_rwr_whole['new_proteins'].apply(lambda row: len(row))
disease_rwr_whole['len_new_proteins'] = disease_rwr_whole['new_proteins'].apply(lambda row: len(row))
disease_conservative_rwr_whole['len_new_proteins'] = disease_conservative_rwr_whole['new_proteins'].apply(lambda row: len(row))

process_rwr_whole['module_proteins'] = reactome_proteins_indexes_df['proteins_ids']+process_rwr_whole['new_proteins']
disease_rwr_whole['module_proteins'] = disgenet_sca_proteins_indexes_df['main_component_ids']+disease_rwr_whole['new_proteins']
disease_conservative_rwr_whole['module_proteins'] = disgenet_proteins_indexes_df['conservative_module_ids']+disease_conservative_rwr_whole['new_proteins']

process_rwr_whole['random_proteins'] = process_rwr_whole.apply(lambda row: np.random.choice(list(set(graph_apid_huri.vs['name'])^set(row['module_proteins'])), row['len_new_proteins']), axis=1)
disease_rwr_whole['random_proteins'] = disease_rwr_whole.apply(lambda row: np.random.choice(list(set(graph_apid_huri.vs['name'])^set(row['module_proteins'])), row['len_new_proteins']), axis=1)
disease_conservative_rwr_whole['random_proteins'] = disease_conservative_rwr_whole.apply(lambda row: np.random.choice(list(set(graph_apid_huri.vs['name'])^set(row['module_proteins'])), row['len_new_proteins']), axis=1)

In [22]:
def hpo_intersections(phenotypes, clf_results, protein_to_phenotype, target='new_proteins'):
    intersection_dict = {'Process':[], 'Protein':[], 'Protein HPO':[], 'Common HPO':[]}
    for i, row in clf_results.iterrows():
        if row['fp'] > 0 and row['has_omim']:
            fp_proteins = row[target]
            hpo_phenotypes = phenotypes[phenotypes['#DatabaseID']==row['id']]['HPO-Term-Name'].values
            for protein in fp_proteins:
                protein_hpos = set(protein_to_phenotype[protein_to_phenotype['entrez-gene-symbol']==protein]['HPO-Term-Name'].values)
                if len(protein_hpos)>0:
                    intersection_dict['Protein HPO'].append(protein_hpos)
                else:
                    intersection_dict['Protein HPO'].append(np.nan)
                intersect = list(set(hpo_phenotypes)&protein_hpos)
                if len(intersect)>0:
                    intersection_dict['Common HPO'].append(intersect)
                else: 
                    intersection_dict['Common HPO'].append(np.nan)
                intersection_dict['Process'].append(row['process'])
                intersection_dict['Protein'].append(protein)
    return pd.DataFrame.from_dict(intersection_dict)

In [23]:
disease_hpo = hpo_intersections(filtered_phenotypes, disease_rwr_whole, genes_to_phenotype, target='new_proteins')
disease_conservative_hpo = hpo_intersections(filtered_phenotypes, disease_conservative_rwr_whole, genes_to_phenotype, target='new_proteins')

disease_hpo_random = hpo_intersections(filtered_phenotypes, disease_rwr_whole, genes_to_phenotype, target='random_proteins')
disease_conservative_hpo_random = hpo_intersections(filtered_phenotypes, disease_conservative_rwr_whole, genes_to_phenotype, target='random_proteins')

In [24]:
disease_fp_hpo_count = disease_hpo.groupby(['Process', 'Protein']).count().reset_index()
disease_conservative_fp_hpo_count = disease_conservative_hpo.groupby(['Process', 'Protein']).count().reset_index()

disease_fp_hpo_count_random = disease_hpo_random.groupby(['Process', 'Protein']).count().reset_index()
disease_conservative_fp_hpo_count_random = disease_conservative_hpo_random.groupby(['Process', 'Protein']).count().reset_index()

In [25]:
disease_fp_hpo_count = disease_fp_hpo_count.replace(0, np.nan).groupby('Process').count()
disease_fp_hpo_count['Coverage'] = disease_fp_hpo_count['Common HPO']/disease_fp_hpo_count['Protein HPO']
disease_fp_hpo_count.sort_values(by='Coverage', inplace=True, ascending=False)
disease_fp_hpo_count.dropna(inplace=True)

disease_conservative_fp_hpo_count = disease_conservative_fp_hpo_count.replace(0, np.nan).groupby('Process').count()
disease_conservative_fp_hpo_count['Coverage'] = disease_conservative_fp_hpo_count['Common HPO']/disease_conservative_fp_hpo_count['Protein HPO']
disease_conservative_fp_hpo_count.sort_values(by='Coverage', inplace=True, ascending=False)
disease_conservative_fp_hpo_count.dropna(inplace=True)

In [26]:
disease_fp_hpo_count_random = disease_fp_hpo_count_random.replace(0, np.nan).groupby('Process').count()
disease_fp_hpo_count_random['Coverage'] = disease_fp_hpo_count_random['Common HPO']/disease_fp_hpo_count_random['Protein HPO']
disease_fp_hpo_count_random.sort_values(by='Coverage', inplace=True, ascending=False)
disease_fp_hpo_count_random.dropna(inplace=True)

disease_conservative_fp_hpo_count_random = disease_conservative_fp_hpo_count_random.replace(0, np.nan).groupby('Process').count()
disease_conservative_fp_hpo_count_random['Coverage'] = disease_conservative_fp_hpo_count_random['Common HPO']/disease_conservative_fp_hpo_count_random['Protein HPO']
disease_conservative_fp_hpo_count_random.sort_values(by='Coverage', inplace=True, ascending=False)
disease_conservative_fp_hpo_count_random.dropna(inplace=True)

In [27]:
import rpy2.robjects as robjects
rdensity = robjects.r['density']

In [28]:
r_disease_fp_hpo_count = robjects.FloatVector(list(disease_fp_hpo_count['Coverage'].values))
r_disease_conservative_fp_hpo_count = robjects.FloatVector(list(disease_conservative_fp_hpo_count['Coverage'].values))
r_disease_fp_hpo_count_random = robjects.FloatVector(list(disease_fp_hpo_count_random['Coverage'].values))
r_disease_conservative_fp_hpo_count_random = robjects.FloatVector(list(disease_conservative_fp_hpo_count_random['Coverage'].values))

In [29]:
r_disease_hpo_density = rdensity(r_disease_fp_hpo_count, to=1, bw=0.2)
r_disease_conservative_hpo_density = rdensity(r_disease_conservative_fp_hpo_count, to=1, bw=0.2)
r_disease_hpo_density_random = rdensity(r_disease_fp_hpo_count_random, to=1, bw=0.2)
r_disease_conservative_hpo_density_random = rdensity(r_disease_conservative_fp_hpo_count_random, to=1, bw=0.2)

disease_hpo_density = np.array(dict(zip(r_disease_hpo_density.names, map(list,list(r_disease_hpo_density))))['y'])
disease_conservative_hpo_density = np.array(dict(zip(r_disease_conservative_hpo_density.names, map(list,list(r_disease_conservative_hpo_density))))['y'])
disease_hpo_density_random = np.array(dict(zip(r_disease_hpo_density_random.names, map(list,list(r_disease_hpo_density_random))))['y'])
disease_conservative_hpo_density_random = np.array(dict(zip(r_disease_conservative_hpo_density_random.names, map(list,list(r_disease_conservative_hpo_density_random))))['y'])

In [30]:
disease_hpo_density[disease_hpo_density==0] = np.nan
disease_conservative_hpo_density[disease_conservative_hpo_density==0] = np.nan
disease_hpo_density_random[disease_hpo_density_random==0] = np.nan
disease_conservative_hpo_density_random[disease_conservative_hpo_density_random==0] = np.nan

disease_hpo_density = np.nan_to_num(disease_hpo_density, nan=min(disease_hpo_density)) 
disease_conservative_hpo_density = np.nan_to_num(disease_conservative_hpo_density, nan=min(disease_conservative_hpo_density)) 
disease_hpo_density_random = np.nan_to_num(disease_hpo_density_random, nan=min(disease_hpo_density_random)) 
disease_conservative_hpo_density_random = np.nan_to_num(disease_conservative_hpo_density_random, nan=min(disease_conservative_hpo_density_random)) 

In [40]:
traces = []
traces.append(go.Scatter(x=np.linspace(0,1,512), 
                y=np.log(np.divide(np.array(disease_hpo_density),np.array(disease_hpo_density_random))),
                name='Disease SCA', marker_color='#FDA96D', line_width=10))
traces.append(go.Scatter(x=np.linspace(0,1,512), 
                y=np.log(np.divide(np.array(disease_conservative_hpo_density),np.array(disease_conservative_hpo_density_random))),
                name='Disease Conservative', marker_color='#59C3C3', line_width=10))

fig = go.Figure()
fig.add_traces(traces)
fig.update_layout(
    height=1000,
    width=2000,
    xaxis = dict(tickfont = dict(size=50), titlefont=dict(size=50), title_text='Coverage', showline=True, linewidth=3, linecolor='black', mirror=True, color='black'),
    yaxis = dict(tickfont = dict(size=50), titlefont=dict(size=50), title_text='log(Predictions/Random Frequencies)', title_standoff = 20, showline=True, linewidth=3, linecolor='black', mirror=True, zeroline=True, zerolinewidth=5, zerolinecolor='rgb(204,204,204)', color='black'),
    paper_bgcolor='rgba(255,255,255,1)',
    plot_bgcolor='rgba(255,255,255,0.9)',
    margin=dict(l=20, r=20, t=25, b=20),
    showlegend=False)
fig.show()

# GSEA

In [43]:
from goatools.base import download_go_basic_obo
from goatools.goea.go_enrichment_ns import GOEnrichmentStudyNS
from goatools.obo_parser import GODag
from goatools.anno.gaf_reader import GafReader
#download_go_basic_obo('../../data/raw/go-basic.obo')
obodag = GODag("../../data/raw/go-basic.obo")
ogaf = GafReader("../../data/raw/goa_human.gaf")
ns2assc = ogaf.get_ns2assc()

../../data/raw/go-basic.obo: fmt(1.2) rel(2022-05-16) 47,071 Terms
HMS:0:00:16.272206 632,633 annotations READ: ../../data/raw/goa_human.gaf 


In [44]:
from igraph import Graph
import pandas as pd
from tqdm.notebook import tqdm

graph_apid_huri = Graph.Read_GML("../../data/processed/graph_apid_huri")
hgnc_symbols = pd.read_csv('../../data/interim/HGNC symbols.txt', sep=',')
hgnc_symbols = hgnc_symbols[hgnc_symbols['Approved symbol'].isin(graph_apid_huri.vs['name'])]
hgnc_symbols.dropna(inplace=True)
hgnc_symbols.drop_duplicates(inplace=True)

In [45]:
uniprot_ids = []
for protein in graph_apid_huri.vs['name']:
    id_conversion = hgnc_symbols[hgnc_symbols['Approved symbol']==protein]['UniProt ID(supplied by UniProt)'].values
    if len(id_conversion)>0:
        uniprot_ids.append(id_conversion[0])
    else:
        uniprot_ids.append(protein)

In [46]:
def enrichment_analysis(processes, idset_list, uniprot_conversion, universe, associations, ont):
    goeaobj = GOEnrichmentStudyNS(
        universe,
        associations,
        ont, 
        propagate_counts = False,
        alpha = 0.05, 
        methods = ['fdr_bh'])
    go_results_df = pd.DataFrame()
    for i in tqdm(range(len(idset_list))):
        idset = idset_list[i]
        process = processes[i]
        uniprot_ids = []
        for id in idset:
            id_conversion = uniprot_conversion[uniprot_conversion['Approved symbol']==id]['UniProt ID(supplied by UniProt)'].values
            if len(id_conversion)>0:
                uniprot_ids.append(id_conversion[0])
            else:
                uniprot_ids.append(protein)
        goea_results_all = goeaobj.run_study(uniprot_ids,  prt=None)
        goea_results_sig = [r for r in goea_results_all if r.p_fdr_bh < 0.05]
        go_results = [r.get_field_values(["GO",
                                        "NS",
                                        "enrichment",
                                        "name",
                                        "ratio_in_study",
                                        "ratio_in_pop",
                                        "p_uncorrected",
                                        "depth",
                                        "study_count",
                                        "study_items"]) for r in goea_results_sig]
        go_results = pd.DataFrame(go_results, columns = ["GO", "NS", "enrichment", "name", "ratio_in_study", "ratio_in_pop", "p_uncorrected", "depth", "study_count", "study_items"])
        go_results['process'] = process
        go_results_df = go_results_df.append(go_results)
    return go_results_df

In [None]:
disease_conservative_enrichment_df = enrichment_analysis(disgenet_proteins_indexes_df['process'], disgenet_proteins_indexes_df['conservative_module_ids'], hgnc_symbols, uniprot_ids, ns2assc, obodag)
disease_conservative_enrichment_df.to_csv('../../data/processed/goea/disease_conservative.csv', index=False)

In [None]:
disgenet_sca_proteins_indexes_df.reset_index(inplace=True)
disease_enrichment_df = enrichment_analysis(disgenet_sca_proteins_indexes_df['process'], disgenet_sca_proteins_indexes_df['main_component_ids'], hgnc_symbols, uniprot_ids, ns2assc, obodag)
disease_enrichment_df.to_csv('../../data/processed/goea/disease_sca.csv', index=False)

In [None]:
process_enrichment_df = enrichment_analysis(reactome_proteins_indexes_df['process'], reactome_proteins_indexes_df['proteins_ids'], hgnc_symbols, uniprot_ids, ns2assc, obodag)
process_enrichment_df.to_csv('../../data/processed/goea/process.csv', index=False)

In [47]:
disease_conservative_enrichment_df = pd.read_csv('../../data/processed/goea/disease_conservative.csv')
disease_enrichment_df = pd.read_csv('../../data/processed/goea/disease_sca.csv')
process_enrichment_df = pd.read_csv('../../data/processed/goea/process.csv')

In [48]:
def goterms_intersections(enrichment_df, clf_results, uniprot_conversion, associations, target='new_proteins'):
    intersection_dict = {'Process':[], 'NS':[], 'Protein':[], 'Protein GO':[], 'Common GO':[], 'Common GO Name':[]}
    for i, row in clf_results.iterrows():
        if row['fp'] > 0:
            fp_proteins = row[target]
            for aspect, association in associations.items():
                go_terms = enrichment_df[(enrichment_df['NS']==aspect) & (enrichment_df['process']==row['process'])]['GO'].values
                for protein in fp_proteins:
                    id_conversion = uniprot_conversion[uniprot_conversion['Approved symbol']==protein]['UniProt ID(supplied by UniProt)'].values
                    if len(id_conversion)>0:
                        protein = id_conversion[0]
                    try: 
                        protein_go = association[protein]
                        intersection_dict['Protein GO'].append(protein_go)
                        intersect = list(set(go_terms)&protein_go)
                        if len(intersect)>0:
                            intersection_dict['Common GO'].append(intersect)
                            name_intersect = [enrichment_df[enrichment_df['GO']==i]['name'].unique()[0] for i in intersect]
                            intersection_dict['Common GO Name'].append(name_intersect)
                        else: 
                            intersection_dict['Common GO'].append(np.nan)
                            intersection_dict['Common GO Name'].append(np.nan)
                        intersection_dict['Process'].append(row['process'])
                        intersection_dict['Protein'].append(protein)
                        intersection_dict['NS'].append(aspect)
                    except KeyError:
                        continue
    return pd.DataFrame.from_dict(intersection_dict)

In [49]:
process_fp_go = goterms_intersections(process_enrichment_df, process_rwr_whole, hgnc_symbols, ns2assc)
disease_fp_go = goterms_intersections(disease_enrichment_df, disease_rwr_whole, hgnc_symbols, ns2assc)
disease_conservative_fp_go = goterms_intersections(disease_conservative_enrichment_df, disease_conservative_rwr_whole, hgnc_symbols, ns2assc)

In [50]:
process_fp_go_count = process_fp_go.groupby(['Process', 'Protein']).count()[['Common GO', 'Common GO Name']].reset_index()
disease_fp_go_count = disease_fp_go.groupby(['Process', 'Protein']).count()[['Common GO', 'Common GO Name']].reset_index()
disease_conservative_fp_go_count = disease_conservative_fp_go.groupby(['Process', 'Protein']).count()[['Common GO', 'Common GO Name']].reset_index()

In [51]:
process_fp_count = process_fp_go_count.replace(0, np.nan).groupby('Process').count()
process_fp_count['Coverage'] = process_fp_count['Common GO']/process_fp_count['Protein']
process_fp_count.sort_values(by='Coverage', inplace=True, ascending=False)

In [52]:
disease_fp_count = disease_fp_go_count.replace(0, np.nan).groupby('Process').count()
disease_fp_count['Coverage'] = disease_fp_count['Common GO']/disease_fp_count['Protein']
disease_fp_count.sort_values(by='Coverage', inplace=True, ascending=False)

In [53]:
disease_conservative_fp_count = disease_conservative_fp_go_count.replace(0, np.nan).groupby('Process').count()
disease_conservative_fp_count['Coverage'] = disease_conservative_fp_count['Common GO']/disease_conservative_fp_count['Protein']
disease_conservative_fp_count.sort_values(by='Coverage', inplace=True, ascending=False)

In [54]:
process_fp_go_random = goterms_intersections(process_enrichment_df, process_rwr_whole, hgnc_symbols, ns2assc, 'random_proteins')
disease_fp_go_random = goterms_intersections(disease_enrichment_df, disease_rwr_whole, hgnc_symbols, ns2assc, 'random_proteins')
disease_conservative_fp_go_random = goterms_intersections(disease_conservative_enrichment_df, disease_conservative_rwr_whole, hgnc_symbols, ns2assc, 'random_proteins')

In [55]:
process_fp_go_count_random = process_fp_go_random.groupby(['Process', 'Protein']).count()[['Common GO', 'Common GO Name']].reset_index()
disease_fp_go_count_random = disease_fp_go_random.groupby(['Process', 'Protein']).count()[['Common GO', 'Common GO Name']].reset_index()
disease_conservative_fp_go_count_random = disease_conservative_fp_go_random.groupby(['Process', 'Protein']).count()[['Common GO', 'Common GO Name']].reset_index()

In [56]:
process_fp_count_random = process_fp_go_count_random.replace(0, np.nan).groupby('Process').count()
process_fp_count_random['Coverage'] = process_fp_count_random['Common GO']/process_fp_count_random['Protein']
process_fp_count_random.sort_values(by='Coverage', inplace=True, ascending=False)

disease_fp_count_random = disease_fp_go_count_random.replace(0, np.nan).groupby('Process').count()
disease_fp_count_random['Coverage'] = disease_fp_count_random['Common GO']/disease_fp_count_random['Protein']
disease_fp_count_random.sort_values(by='Coverage', inplace=True, ascending=False)

disease_conservative_fp_count_random = disease_conservative_fp_go_count_random.replace(0, np.nan).groupby('Process').count()
disease_conservative_fp_count_random['Coverage'] = disease_conservative_fp_count_random['Common GO']/disease_conservative_fp_count_random['Protein']
disease_conservative_fp_count_random.sort_values(by='Coverage', inplace=True, ascending=False)

In [57]:
r_process_fp_go_count = robjects.FloatVector(list(process_fp_count['Coverage'].values))
r_disease_fp_go_count = robjects.FloatVector(list(disease_fp_count['Coverage'].values))
r_disease_conservative_fp_go_count = robjects.FloatVector(list(disease_conservative_fp_count['Coverage'].values))
r_process_fp_go_count_random = robjects.FloatVector(list(process_fp_count_random['Coverage'].values))
r_disease_fp_go_count_random = robjects.FloatVector(list(disease_fp_count_random['Coverage'].values))
r_disease_conservative_fp_go_count_random = robjects.FloatVector(list(disease_conservative_fp_count_random['Coverage'].values))

In [58]:
r_process_go_density = rdensity(r_process_fp_go_count, to=1, bw=0.2)
r_disease_go_density = rdensity(r_disease_fp_go_count, to=1, bw=0.2)
r_disease_conservative_go_density = rdensity(r_disease_conservative_fp_go_count, to=1, bw=0.2)
r_process_go_density_random = rdensity(r_process_fp_go_count_random, to=1, bw=0.2)
r_disease_go_density_random = rdensity(r_disease_fp_go_count_random, to=1, bw=0.2)
r_disease_conservative_go_density_random = rdensity(r_disease_conservative_fp_go_count_random, to=1, bw=0.2)

process_go_density = np.array(dict(zip(r_process_go_density.names, map(list,list(r_process_go_density))))['y'])
disease_go_density = np.array(dict(zip(r_disease_go_density.names, map(list,list(r_disease_go_density))))['y'])
disease_conservative_go_density = np.array(dict(zip(r_disease_conservative_go_density.names, map(list,list(r_disease_conservative_go_density))))['y'])
process_go_density_random = np.array(dict(zip(r_process_go_density_random.names, map(list,list(r_process_go_density_random))))['y'])
disease_go_density_random = np.array(dict(zip(r_disease_go_density_random.names, map(list,list(r_disease_go_density_random))))['y'])
disease_conservative_go_density_random = np.array(dict(zip(r_disease_conservative_go_density_random.names, map(list,list(r_disease_conservative_go_density_random))))['y'])

In [59]:
process_go_density[process_go_density==0] = np.nan
disease_go_density[disease_go_density==0] = np.nan
disease_conservative_go_density[disease_conservative_go_density==0] = np.nan
process_go_density_random[process_go_density_random==0] = np.nan
disease_go_density_random[disease_go_density_random==0] = np.nan
disease_conservative_go_density_random[disease_conservative_go_density_random==0] = np.nan

process_go_density = np.nan_to_num(process_go_density, nan=min(process_go_density)) 
disease_go_density = np.nan_to_num(disease_go_density, nan=min(disease_go_density)) 
disease_conservative_go_density = np.nan_to_num(disease_conservative_go_density, nan=min(disease_conservative_go_density)) 
process_go_density_random = np.nan_to_num(process_go_density_random, nan=min(process_go_density_random)) 
disease_go_density_random = np.nan_to_num(disease_go_density_random, nan=min(disease_go_density_random)) 
disease_conservative_go_density_random = np.nan_to_num(disease_conservative_go_density_random, nan=min(disease_conservative_go_density_random)) 

In [69]:
traces = []
traces.append(go.Scatter(x=np.linspace(0,1,512), 
                y=np.log(np.divide(process_go_density,process_go_density_random)),
                name='Process', marker_color='#F97068', line_width=8))
traces.append(go.Scatter(x=np.linspace(0,1,512), 
                y=np.log(np.divide(disease_go_density,disease_go_density_random)),
                name='Disease SCA', marker_color='#FDA96D', line_width=8))
traces.append(go.Scatter(x=np.linspace(0,1,512), 
                y=np.log(np.divide(disease_conservative_go_density,disease_conservative_go_density_random)),
                name='Disease Conservative', marker_color='#59C3C3', line_width=8))

fig = go.Figure()
fig.add_traces(traces)
fig.update_layout(
    height=1000,
    width=2000,
    legend=dict(orientation='h', font=dict(size=50, color='black'), yanchor="bottom", y=1.01, xanchor="right", x=0.99),
    xaxis = dict(tickfont = dict(size=50), titlefont=dict(size=50), title_text='Coverage', showline=True, linewidth=3, linecolor='black', mirror=True, color='black'),
    yaxis = dict(tickfont = dict(size=50), titlefont=dict(size=50), title_text='log(Predictions/Random Frequencies)', title_standoff = 20, showline=True, linewidth=3, linecolor='black', mirror=True, zeroline=True, zerolinewidth=5, zerolinecolor='rgb(204,204,204)', color='black'),
    paper_bgcolor='rgba(255,255,255,1)',
    plot_bgcolor='rgba(255,255,255,0.9)',
    margin=dict(l=20, r=20, t=25, b=20))
fig.show()