In [None]:
import os
import math
import pandas as pd
import numpy as np
from ddot import Ontology

In [None]:
def load_hierarchy_network():
    
    network_name = '../data/NeST/NeST'
    
    nodes_df = pd.read_csv(network_name + '_node.csv')[['name', 'Genes']]
    
    edges_df = pd.read_csv(network_name + '_edge.sif', sep='\t', header=None, names = ['S', 'M', 'T'])
    
    return nodes_df, edges_df

In [None]:
def convert_to_clixo_format(hierarchy_edges_df, hierarchy_nodes_df, gene_list, min_size):
    
    ont_df = pd.DataFrame()
    temp_file = '../data/temp_ont.txt'
    
    for _, row in hierarchy_edges_df.iterrows():
        ont_df = ont_df.append({'Source' : row['S'], 'Target' : row['T'], 'Mapping' : 'default'}, ignore_index=True)

    for _, row in hierarchy_nodes_df.iterrows():
        genes = row['Genes'].split()
        for gene in genes:
            if gene in gene_list:
                ont_df = ont_df.append({'Source' : row['name'], 'Target' : gene, 'Mapping' : 'gene'}, ignore_index=True)

    ont_df = ont_df[['Source', 'Target', 'Mapping']]
    ont_df.to_csv(temp_file, sep='\t', header=False, index=False)
    ont = Ontology.from_table(temp_file, clixo_format=True)
    os.remove(temp_file)
    
    ont = ont.collapse_ontology(method='python', min_term_size=min_size)
    ont = ont.propagate(direction='reverse')
        
    return ont

In [None]:
def ont_neuron_count(ont, k):
    total = 0
    for term in ont.term_2_gene.keys():
        g = len(ont.term_2_gene[term])
        c = len(ont.parent_2_child[term])
        size = k * (k*c + g)
        total += size
    return total

In [None]:
n_type = 'random_718_e'
min_term_size = 5

gene_list = list(pd.read_csv('../data/gene2ind_' + n_type + '.txt', sep='\t', header=None, names=['I', 'G'])['G'])

h_nodes_df, h_edges_df = load_hierarchy_network()

In [None]:
ont_file = '../data/ontology_' + n_type + '.txt'

ont = convert_to_clixo_format(h_edges_df, h_nodes_df, gene_list, min_term_size)
ont.to_table(ont_file, clixo_format=True)

print(ont)
print(ont.get_roots())

total_neuron_count = ont_neuron_count(ont, 6) + len(gene_list)
print(total_neuron_count)

In [None]:
#Randomize the genes

ont2_file = '../data/ontology_' + n_type + '_bb_d.txt'

ont = Ontology.from_table(ont_file, clixo_format=True)

ont2 = ont.shuffle_genes()
ont2 = ont2.collapse_ontology(method='python', min_term_size=min_term_size)
ont2.to_table(ont2_file, clixo_format=True)

print(ont2)
print(ont2.get_roots())

In [None]:
# Filter go ontology

network_name = "cg_go"

go_ont_file = "../data/ontology_" + network_name + ".txt"
go_ont = Ontology.from_table(go_ont_file, clixo_format=True)
print(go_ont)
print(go_ont.get_roots())
total_neuron_count = ont_neuron_count(go_ont, 6) + len(gene_list)
print(total_neuron_count)

In [None]:
for term in go_ont.term_2_gene.keys():
    if len(go_ont.term_2_gene[term]) <= 2:
        print(term, go_ont.term_2_gene[term], go_ont.parent_2_child[term])