# Graph analysis with networkx functions

In [1]:
import json, networkx, pandas

In [2]:
# Entities dataset load
with open("../graph/entities_graph.json", "r") as file:
    entities_json = json.load(file)
    
# Papers dataset load
with open("../graph/even_more_updated_output_file.json", "r") as file:
    papers_json = json.load(file)
    
    
# Entities-graph conversion to networkx object
# Since in the json file we have a directed graph, we need to convert it to an undirected graph
# We then go to 281734 edges to 140867. Nodes are the same.
entities_nxgraph = networkx.Graph()

for node in entities_json["nodes"]:
    entities_nxgraph.add_node(node["id"], name = node["name"], category = node["category"])
    
for edge in entities_json["links"]:
    entities_nxgraph.add_edge(edge["source"], edge["target"], weight = edge["weight"], common_papers = edge["common_papers"])
    

# Papers-graph conversion to networkx object
# The same rules apply to this graph. We went to 455672 edges to 194991 edges since edges with same source/target
# but different entities are cut-out. Nodes are the same.
papers_nxgraph = networkx.Graph()
for node in papers_json["nodes"]:
    papers_nxgraph.add_node(node["id"], terms = node["terms"])
    
for edge in papers_json["links"]:
    papers_nxgraph.add_edge(edge["source"], edge["target"])

In [None]:
print(f"Entities-graph:\n- nodes: {len(entities_nxgraph.nodes)}\n- edges: {len(entities_nxgraph.edges)}\n")
print(f"Papers-graph:\n- nodes: {len(papers_nxgraph.nodes)}\n- edges: {len(papers_nxgraph.edges)}")

## Connected Components analysis

In [28]:
# Connectivity analysis with connected components
entities_is_connected = networkx.is_connected(entities_nxgraph)
entities_n_connected_components = networkx.number_connected_components(entities_nxgraph)
entities_size_connected_components = [len(component) for component in sorted(networkx.connected_components(entities_nxgraph), key = len, reverse = False)]

papers_is_connected = networkx.is_connected(papers_nxgraph)
papers_n_connected_components = networkx.number_connected_components(papers_nxgraph)
papers_size_connected_components = [len(component) for component in sorted(networkx.connected_components(papers_nxgraph), key = len, reverse = False)]


print(f"Entities-graph:\n- is_connected: {entities_is_connected}\n- connected components: {entities_n_connected_components}\n- size of connected components: {entities_size_connected_components}\n")
print(f"Papers-graph:\n- is_connected: {papers_is_connected}\n- connected components: {papers_n_connected_components}\n- size of connected components: {papers_size_connected_components}")

Entities-graph:
- is_connected: False
- connected components: 43
- size of connected components: [1, 1, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 6, 7, 7, 8, 8, 8, 8, 9, 9, 9, 10, 10, 11, 11, 13, 19, 14082]

Papers-graph:
- is_connected: False
- connected components: 41
- size of connected components: [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1960]


## Bridges analysis

In [26]:
# entities-graph bridge analysis
entities_has_bridges = networkx.has_bridges(entities_nxgraph)
entities_bridges = list(networkx.bridges(entities_nxgraph))

print(f"Entities-graph:\n- has_bridges: {entities_has_bridges}\n- number of bridges: {len(entities_bridges)}\n- bridges: {entities_bridges}")

Entities-graph:
- has_bridges: True
- number of bridges: 8
- bridges: [('PE2, peptide', 'anti-bacterial activity, activity'), ('L-melittin, peptide', 'melittin, peptide'), ('Rotahaler, device', '60 L/min, volume'), ('LM7-2, peptide', 'α-helix, protein'), ('single mutations, mutation', 'unique activity, activity'), ('wzi29, mutation', 'wzi154, mutation'), ('Sd5, protein', 'Sugar cane, plant'), ('nanofibers, device', 'Nanofibers, device')]


In [27]:
# papers-graph bridge analysis
papers_has_bridges = networkx.has_bridges(papers_nxgraph)
papers_bridges = list(networkx.bridges(papers_nxgraph))

print(f"Papers-graph:\n- has_bridges: {papers_has_bridges}\n- number of bridges: {len(papers_bridges)}\n- bridges: {papers_bridges}")

Papers-graph:
- has_bridges: True
- number of bridges: 25
- bridges: [('26547698', '27129587'), ('26940096', '27358498'), ('26956134', '36251170'), ('27036372', '29196621'), ('27187357', '35112864'), ('27318963', '27624303'), ('27349900', '34388167'), ('28429578', '29905453'), ('29440679', '30061724'), ('29532754', '31181304'), ('29559676', '31336833'), ('29969762', '32894831'), ('30027848', '31054799'), ('30424807', '34033877'), ('30865424', '30917919'), ('31257848', '34171196'), ('31317613', '32499582'), ('33095969', '35987005'), ('33225796', '33899825'), ('33387360', '34064456'), ('33964686', '34058872'), ('34338839', '34918728'), ('34530072', '34952285'), ('35231605', '35806446'), ('35307176', '35444957')]


## Degree centrality analysis

In [None]:
# entities-graph degree centrality analysis
entities_degree_centrality = networkx.degree_centrality(entities_nxgraph)
entities_df = pandas.DataFrame(list(entities_degree_centrality.items()), columns = ['node', 'degree_centrality'])
entities_df.to_csv('entities/degree_centrality.csv')

print(entities_df)

In [None]:
# papers-graph degree centrality analysis
papers_degree_centrality = networkx.degree_centrality(papers_nxgraph)
papers_df = pandas.DataFrame(list(papers_degree_centrality.items()), columns = ['node', 'degree_centrality'])
papers_df.to_csv('papers/degree_centrality.csv')

print(papers_df)

In [3]:
# entities-graph cliques analysis
entitites_number_of_cliques = list(networkx.enumerate_all_cliques(entities_nxgraph))

print(f"Entities-graph:\n- number of cliques: {len(entitites_number_of_cliques)}\n")


Exception ignored in: <bound method IPythonKernel._clean_thread_parent_frames of <ipykernel.ipkernel.IPythonKernel object at 0x105b03650>>
Traceback (most recent call last):
  File "/opt/homebrew/Caskroom/miniconda/base/envs/med-graph/lib/python3.11/site-packages/ipykernel/ipkernel.py", line 775, in _clean_thread_parent_frames
    def _clean_thread_parent_frames(

KeyboardInterrupt: 


In [None]:
# papers-graph cliques analysis
papers_number_of_cliques = list(networkx.enumerate_all_cliques(papers_nxgraph))

print(f"Papers-graph:\n- number of cliques: {len(papers_number_of_cliques)}\n")

## Chains Analysis

In [35]:
# Chains analysis
entities_chains = list(networkx.chain_decomposition(entities_nxgraph))
entities_chains.sort(key = len)

papers_chains = list(networkx.chain_decomposition(papers_nxgraph))
papers_chains.sort(key = len)

print(f"Entities-graph:\n- number of chains: {len(entities_chains)}\n- longest chain: {len(entities_chains[-1])} edges\n- shortest chain: {len(entities_chains[0])} edges\n")
print(f"Papers-graph:\n- number of chains: {len(papers_chains)}\n- longest chain: {len(papers_chains[-1])} edges\n- shortest chain: {len(papers_chains[0])} edges")

Entities-graph:
- number of chains: 126585
- longest chain: 2471 edges
- shortest chain: 1 edges

Papers-graph:
- number of chains: 193032
- longest chain: 25 edges
- shortest chain: 1 edges


## Cycles analysis

In [37]:
# Cycles analysis
entities_girth = networkx.girth(entities_nxgraph)
entities_cycles = networkx.cycle_basis(entities_nxgraph)
entities_cycles.sort(key = len)

papers_girth = networkx.girth(papers_nxgraph)
papers_cycles = networkx.cycle_basis(papers_nxgraph)
papers_cycles.sort(key = len)

print(f"Entities-graph:\n- number of cycles: {len(entities_cycles)}\n- girth: {entities_girth}\n- longest cycle: {len(entities_cycles[-1])} nodes\n")
print(f"Papers-graph:\n- number of cycles: {len(papers_cycles)}\n- girth: {papers_girth}\n- longest cycle: {len(papers_cycles[-1])} nodes")

Entities-graph:
- number of cycles: 126585
- girth: 3
- longest cycle: 254 nodes

Papers-graph:
- number of cycles: 193032
- girth: 3
- longest cycle: 83 nodes


## PageRank analysis

In [None]:
# entities-graph pagerank analysis
entities_pagerank = networkx.pagerank(entities_nxgraph)
entities_df = pandas.DataFrame(list(entities_pagerank.items()), columns = ['node', 'pagerank value'])
entities_df.to_csv('entities/pagerank.csv')

print(entities_df)

In [None]:
# papers-graph pagerank analysis
papers_pagerank = networkx.pagerank(papers_nxgraph)
papers_df = pandas.DataFrame(list(papers_pagerank.items()), columns = ['node', 'pagerank value'])
papers_df.to_csv('papers/pagerank.csv')

print(papers_df)