# Network Analysis

## Setup

In [1]:
import networkx as nx
import pandas   as pd
import igraph   as ig
import os

os.chdir(os.path.abspath(os.path.join(os.getcwd(), "..")))

from config import PATH

## Metrics Calculations

In [2]:
G = nx.DiGraph()
G = nx.read_edgelist(PATH + "real_network.csv", delimiter=",", nodetype=int, data=(('weight', float),), create_using=nx.DiGraph())

In [3]:
num_nodes = G.number_of_nodes()
num_edges = G.number_of_edges()

print(f"Number of nodes: {num_nodes}")
print(f"Number of edges: {num_edges}")

Number of nodes: 677640
Number of edges: 834432


In [4]:
nx.density(G)

1.8171614879550778e-06

In [5]:
in_degree              = dict(G.in_degree())
out_degree             = dict(G.out_degree())
katz_centrality        = nx.katz_centrality(G)
eigenvector_centrality = nx.eigenvector_centrality(G, max_iter=1000)
pagerank               = nx.pagerank(G)

data = {
    'node':        list(G.nodes),
    'in_degree':   [in_degree[node] for node in G.nodes],
    'out_degree':  [out_degree[node] for node in G.nodes],
    'katz':        [katz_centrality[node] for node in G.nodes],
    'eigenvector': [eigenvector_centrality[node] for node in G.nodes],
    'pagerank':    [pagerank[node] for node in G.nodes]
}

metrics_df = pd.DataFrame(data)
metrics_df.head()

Unnamed: 0,node,in_degree,out_degree,katz,eigenvector,pagerank
0,39364684,1,206,0.001173,1.925743e-14,1e-06
1,21061006,3,0,0.0014,6.756681e-11,3e-06
2,18513522,1,0,0.001162,1.925727e-14,1e-06
3,38251731,1,0,0.001162,1.925727e-14,1e-06
4,22369434,1,0,0.001162,1.925727e-14,1e-06


# Betweeness with iGraph

Way faster than networkx.

In [6]:
df = pd.read_csv(PATH + "real_network.csv", header = None)
df.columns = ['source','target', 'weight']
df["weight"] = df["weight"].apply(lambda x: 0.0001 if x <= 0 else x)

df.head()

Unnamed: 0,source,target,weight
0,39364684,21061006,0.45
1,39364684,18513522,0.85
2,39364684,38251731,1.15
3,39364684,22369434,1.2
4,39364684,98928660,1.4


In [7]:
edge_list = list(zip(df["source"], df["target"], df["weight"]))
G = ig.Graph.TupleList(edge_list, directed=True, edge_attrs=["weight"])

In [8]:
bt = G.betweenness(directed=True, weights=G.es["weight"])
bt_df = pd.DataFrame({"node": G.vs["name"], "betweenness": bt})
bt_df.head()

Unnamed: 0,node,betweenness
0,39364684,742.0
1,21061006,0.0
2,18513522,0.0
3,38251731,0.0
4,22369434,0.0


## Metrics Dataframe

In [9]:
metrics_df = metrics_df.merge(bt_df, on="node")
metrics_df.head()

Unnamed: 0,node,in_degree,out_degree,katz,eigenvector,pagerank,betweenness
0,39364684,1,206,0.001173,1.925743e-14,1e-06,742.0
1,21061006,3,0,0.0014,6.756681e-11,3e-06,0.0
2,18513522,1,0,0.001162,1.925727e-14,1e-06,0.0
3,38251731,1,0,0.001162,1.925727e-14,1e-06,0.0
4,22369434,1,0,0.001162,1.925727e-14,1e-06,0.0


In [10]:
metrics_df.describe()

Unnamed: 0,node,in_degree,out_degree,katz,eigenvector,pagerank,betweenness
count,677640.0,677640.0,677640.0,677640.0,677640.0,677640.0,677640.0
mean,1.01098e+16,1.231379,1.231379,0.001207,7.284969e-05,1.47571e-06,19053.37
std,8.561247e+16,0.856394,92.302353,0.00014,0.001212603,8.591826e-07,1802217.0
min,57.0,0.0,0.0,0.001045,5.739166e-22,-0.0003712248,0.0
25%,146237800.0,1.0,0.0,0.00115,1.4347919999999998e-20,1.317094e-06,0.0
50%,454382600.0,1.0,0.0,0.001163,9.628723e-15,1.317257e-06,0.0
75%,1714993000.0,1.0,0.0,0.001195,1.327581e-08,1.322488e-06,0.0
max,7.884893e+17,94.0,35379.0,0.013551,0.2493019,0.0003751144,375552100.0


In [11]:
metrics_df.to_csv(PATH + 'graph_metrics.csv')