In [1]:
import json
import pickle
import networkx as nx
import chexmix.graph as graph
from chexmix.utils import remove_symbols


In [11]:
## KPEB data load
with open('../data/KPEB/KPEB_name_taxID.json', 'r') as f:
    kpeb_data = json.load(f)
kpeb_data = {int(k): v for k,v in kpeb_data.items()}

##viridiplantae load
with open('../data/viridiplanae.txt', 'rb') as f:
    viridiplantae = pickle.load(f)

1


In [3]:
path = '../data/network/'

In [4]:
keyword = 'podophyllum peltatum'

In [10]:
parent_node_type = graph.TaxParentType.Genus # Genus or Family
input_names = ['podophyllum peltatum', 'taxus cuspidata', 'dermal papilla']

In [None]:
biographs_of_keyword = []
# create keyword graph
for input_name in input_names:    
    file_name = path + remove_symbols(input_name)
    pubmed_graph = graph.PubMedGraph.from_keyword(keyword)
    article_ids = pubmed_graph.get_article_ids()
    pubtator_graph = graph.PubTatorGraph.from_article_ids(article_ids)
    tax_graph = graph.TaxonomyGraph.from_pubtator_bioentities(parent_node_type, pubtator_graph.get_bioentities('TAXO'),
                                                              viridiplantae, 'KPEB', kpeb_data)
    mesh_graph = graph.MeSHGraph.from_pubtator_bioentities(pubtator_graph.get_bioentities('MESH'), ['D','C'])

    biograph_of_keyword = graph.BioGraph()
    biograph_of_keyword.add_edges_from([(keyword, root_node) for root_node in tax_graph.find_roots()])
    mesh_entities_graph = mesh_graph.intersection(pubtator_graph)
    biograph_of_keyword.add_edges_from([(keyword, appeared_node) for appeared_node in mesh_entities_graph.nodes()])
    biograph_of_keyword.nodes[keyword]['type'] = "Keyword"
    biograph_of_keyword.inherit_attr(tax_graph.union(mesh_graph))
    nx.write_graphml(biograph_of_keyword, f'{file_name}.graphml')
    biographs_of_keyword.append(biograph_of_keyword)

In [None]:
union_graph = biographs_of_keyword[0]
intersect_graph = biographs_of_keyword[0]
for g in biographs_of_keyword[1:]:
    union_graph.union(g)
    intersect_graph.intersection(g)

In [None]:
union_graph.set_attribute('highlight', True, intersect_graph.nodes())

In [None]:
# bio graphs
for input_name in input_names:    
    file_name = path+remove_symbols(input_name)
    pubmed_graph = graph.PubMedGraph.from_keyword(keyword)
    article_ids = pubmed_graph.get_article_ids()
    pubtator_graph = graph.PubTatorGraph.from_article_ids(article_ids)
    bioentities = pubtator_graph.get_bioentities('TAXO')
    tax_graph = graph.TaxonomyGraph.from_pubtator_bioentities(parent_node_type, bioentities, viridiplantae, 'KPEB', kpeb_data)
    bioentities = pubtator_graph.get_bioentities('MESH')
    mesh_graph = graph.MeSHGraph.from_pubtator_bioentities(bioentities, ['D','C'])
    nx.write_graphml(pubmed_graph.to_graphml(), f'{file_name}_pubmed.graphml')
    nx.write_graphml(pubtator_graph, f'{file_name}_pubtator.graphml')
    nx.write_graphml(tax_graph, f'{file_name}_taxonomy.graphml')
    nx.write_graphml(mesh_graph, f'{file_name}_mesh.graphml')

In [None]:
# threshold

threshold = 3

In [None]:
sub_graph = tax_graph.remain_by_edge_types([graph.EdgeType.INCLUDES])
roots = sub_graph.find_roots()
selected_roots = [root for root in roots if sub_graph.total_count(nx.descendants(sub_graph, root)) > threshold]
sub_graphs = [sub_graph.subgraph_from_root(r) for r in selected_roots]
trimmed_graph =  nx.compose_all(sub_graphs)
nx.write_graphml(trimmed_graph, f'./{input_name}_tax_thres_{threshold}.graphml')