In [None]:
import os
import math
import pandas as pd
import numpy as np
from ddot import Ontology

import matplotlib.pyplot as plt
from scipy import stats
import seaborn as sns

In [None]:
sns.set()
sns.set_style("white")
sns.set_style("ticks", {"xtick.major.size":8, "ytick.major.size":8})
sns.axes_style("whitegrid")
sns.set_palette("muted")
sns.color_palette("muted")

plt.rcParams['svg.fonttype'] = 'none'

SMALL_SIZE = 12
MEDIUM_SIZE = 16
BIGGER_SIZE = 22

plt.rc('font', size=SMALL_SIZE)          # controls default text sizes
plt.rc('axes', titlesize=SMALL_SIZE)     # fontsize of the axes title
plt.rc('axes', labelsize=MEDIUM_SIZE)    # fontsize of the x and y labels
plt.rc('xtick', labelsize=SMALL_SIZE)    # fontsize of the tick labels
plt.rc('ytick', labelsize=SMALL_SIZE)    # fontsize of the tick labels
plt.rc('legend', fontsize=SMALL_SIZE)    # legend fontsize
plt.rc('figure', titlesize=BIGGER_SIZE)  # fontsize of the figure title

In [None]:
def load_hierarchy_network():
    
    network_name = '../data/NeST/NeST'
    
    nodes_df = pd.read_csv(network_name + '_node.csv')[['name', 'Genes']]
    
    edges_df = pd.read_csv(network_name + '_edge.sif', sep='\t', header=None, names = ['S', 'M', 'T'])
    
    return nodes_df, edges_df

In [None]:
def convert_to_clixo_format(hierarchy_edges_df, hierarchy_nodes_df, min_size):
    
    ont_df = pd.DataFrame()
    temp_file = '../data/temp_ont.txt'
    
    for _, row in hierarchy_edges_df.iterrows():
        ont_df = ont_df.append({'Source' : row['S'], 'Target' : row['T'], 'Mapping' : 'default'}, ignore_index=True)

    for _, row in hierarchy_nodes_df.iterrows():
        genes = row['Genes'].split()
        for gene in genes:
            ont_df = ont_df.append({'Source' : row['name'], 'Target' : gene, 'Mapping' : 'gene'}, ignore_index=True)

    ont_df = ont_df[['Source', 'Target', 'Mapping']]
    ont_df.to_csv(temp_file, sep='\t', header=False, index=False)
    ont = Ontology.from_table(temp_file, clixo_format=True)
    os.remove(temp_file)
    
    ont = ont.collapse_ontology(method='python', min_term_size=min_size)
    ont = ont.propagate(direction='reverse')
        
    return ont

In [None]:
def get_jaccard_index(g1, g2):
    ji = len(g1.intersection(g2)) / len(g1.union(g2))
    return ji

In [None]:
def get_system_overlap(ont):
    
    n = len(ont.parent_2_child.keys())
    overlap_fraction = np.zeros((n,n))
    
    for i, t1 in enumerate(ont.terms):
        for j, t2 in enumerate(ont.terms):
            if t1 == t2 or t2 in ont.parent_2_child[t1] or t1 in ont.parent_2_child[t2]:
                continue

            g1 = set(ont.term_2_gene[t1])
            g2 = set(ont.term_2_gene[t2])
            for t in ont.parent_2_child[t1]:
                g1.update(ont.term_2_gene[t])
            for t in ont.parent_2_child[t2]:
                g2.update(ont.term_2_gene[t])
                
            ji = get_jaccard_index(g1, g2)
            overlap_fraction[i][j] = ji
            if ji >= 0.5:
                print('{} {} {:.2f}'.format(t1, t2, ji))
    
    return overlap_fraction

In [None]:
n_type = 'ctg_av'

ont_file = '../data/training_files_av/ontology_' + n_type + '.txt'
ont = Ontology.from_table(ont_file, clixo_format=True)
jaccard_table = get_system_overlap(ont)

In [None]:
jaccard_table

In [None]:
ax = sns.heatmap(jaccard_table)

In [None]:
fig = plt.figure(figsize=(5, 5))
ax = fig.add_subplot(111)
plt.hist(jaccard_table.flatten())
plt.show()

In [None]:
h_nodes_df, h_edges_df = load_hierarchy_network()
ont_nest = convert_to_clixo_format(h_edges_df, h_nodes_df, 4)

In [None]:
ont_nest

In [None]:
jaccard_table_nest = get_system_overlap(ont_nest)

In [None]:
*NEST:102 NEST:110 0.78   -   Common child
NEST:105 NEST:77 0.70     -   Common child in NeST and unimportant here
NEST:107 NEST:60 0.50     -   Common child and not important
NEST:125 NEST:231 0.50    -   231 has 5/7 genes in 125 but both are unimportant
NEST:137 NEST:145 0.62    -   Common child in NeST and unimportant here
NEST:48  NEST:63 0.67     -   Multiple common children in NeST and unimportant here
*NEST:65  NEST:91 0.50    -   Common child