In [3]:
import networkx as nx
import matplotlib.pyplot as plt
import pandas as pd

In [4]:
filtered_data = pd.read_csv("microRNA_to_disease_no duplicates.csv", usecols=["miRNA", "Gene", "diseaseName"])
len(filtered_data)

51761

In [6]:
# Create a directed graph
G = nx.DiGraph()

# Add edges to the graph
for _, row in filtered_data.iterrows():
    G.add_edge(row['miRNA'], row['Gene'])
    G.add_edge(row['Gene'], row['diseaseName'])

# 1) Node and edge count
node_count = len(G.nodes)
edge_count = len(G.edges)
print(f"Node count: {node_count}")
print(f"Edge count: {edge_count}")

# 2) Identify bridge edges
miRNA_disease_network_undirected = G.to_undirected()
bridge_edges = list(nx.bridges(miRNA_disease_network_undirected))
bridge_count = len(bridge_edges)
print(f"Number of bridge edges: {bridge_count}")

# 3) Find the most connected node (hub)
hub_node, hub_degree = max(G.degree, key=lambda x: x[1])
print(f"The most connected node (hub) is {hub_node} with {hub_degree} connections.")

# 4) Find the most important bottleneck node (highest betweenness centrality)
betweenness_centrality = nx.betweenness_centrality(G)
bottleneck_node = max(betweenness_centrality, key=betweenness_centrality.get)
print(f"The most important bottleneck node is {bottleneck_node} with betweenness centrality of {betweenness_centrality[bottleneck_node]}.")

# 5) Find the most influential node (highest closeness centrality)
closeness_centrality = nx.closeness_centrality(G)
influential_node = max(closeness_centrality, key=closeness_centrality.get)
print(f"The most influential node is {influential_node} with closeness centrality of {closeness_centrality[influential_node]}.")

Node count: 3925
Edge count: 20828


Number of bridge edges: 1116
The most connected node (hub) is ZEB2 with 209 connections.


The most important bottleneck node is PTEN with betweenness centrality of 2.504246500486038e-05.
The most influential node is ZEB2 with closeness centrality of 0.053007135575942915.
