# Network Analysis

## Setup

In [1]:
!pip install igraph

Collecting igraph
  Downloading igraph-0.11.8-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.8 kB)
Collecting texttable>=1.6.2 (from igraph)
  Downloading texttable-1.7.0-py2.py3-none-any.whl.metadata (9.8 kB)
Downloading igraph-0.11.8-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.1 MB)
[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/3.1 MB[0m [31m?[0m eta [36m-:--:--[0m[2K   [91m━━━[0m[91m╸[0m[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.3/3.1 MB[0m [31m8.1 MB/s[0m eta [36m0:00:01[0m[2K   [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[90m╺[0m[90m━━━━━━━[0m [32m2.5/3.1 MB[0m [31m37.1 MB/s[0m eta [36m0:00:01[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.1/3.1 MB[0m [31m32.7 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading texttable-1.7.0-py2.py3-none-any.whl (10 kB)
Installing collected packages: texttable, igraph
Successfully installed igraph-0.11.8 texttable-1.7.0


In [3]:
import networkx as nx
import numpy    as np
import pandas   as pd
import igraph   as ig

## Metrics Calculations

In [18]:
G = nx.DiGraph()
G = nx.read_edgelist("network.csv", delimiter=",", nodetype=int, data=(('weight', float),), create_using=nx.DiGraph())

In [19]:
num_nodes = G.number_of_nodes()
num_edges = G.number_of_edges()

print(f"Number of nodes: {num_nodes}")
print(f"Number of edges: {num_edges}")

Number of nodes: 677640
Number of edges: 834421


In [22]:
in_degree              = dict(G.in_degree())
out_degree             = dict(G.out_degree())
katz_centrality        = nx.katz_centrality(G)
eigenvector_centrality = nx.eigenvector_centrality(G, max_iter=1000)
pagerank               = nx.pagerank(G)

data = {
    'node':        list(G.nodes),
    'in_degree':   [in_degree[node] for node in G.nodes],
    'out_degree':  [out_degree[node] for node in G.nodes],
    'katz':        [katz_centrality[node] for node in G.nodes],
    'eigenvector': [eigenvector_centrality[node] for node in G.nodes],
    'pagerank':    [pagerank[node] for node in G.nodes]
}

metrics_df = pd.DataFrame(data)
metrics_df.head()

Unnamed: 0,node,in_degree,out_degree,katz,eigenvector,pagerank
0,39364684,1,206,0.001174,1.926011e-14,1e-06
1,21061006,3,0,0.001398,3.852239e-14,3e-06
2,18513522,1,0,0.001163,1.925995e-14,1e-06
3,38251731,1,0,0.001163,1.925995e-14,1e-06
4,22369434,1,0,0.001163,1.925995e-14,1e-06


# Betweeness with iGraph

Way faster than networkx.

In [11]:
df = pd.read_csv("network.csv", header = None)
df.columns = ['source','target', 'weight']
df["weight"] = df["weight"].apply(lambda x: 0.0001 if x <= 0 else x)

df.head()

Unnamed: 0,source,target,weight
0,39364684,21061006,0.45
1,39364684,18513522,0.85
2,39364684,38251731,1.15
3,39364684,22369434,1.2
4,39364684,98928660,1.4


In [12]:
edge_list = list(zip(df["source"], df["target"], df["weight"]))
G = ig.Graph.TupleList(edge_list, directed=True, edge_attrs=["weight"])

In [16]:
bt = G.betweenness(directed=True, weights=G.es["weight"])
bt_df = pd.DataFrame({"node": G.vs["name"], "betweenness": bt})
bt_df.head()

Unnamed: 0,node,betweenness
0,39364684,742.0
1,21061006,0.0
2,18513522,0.0
3,38251731,0.0
4,22369434,0.0


## Metrics Dataframe

In [23]:
metrics_df = metrics_df.merge(bt_df, on="node")
metrics_df.head()

Unnamed: 0,node,in_degree,out_degree,katz,eigenvector,pagerank,betweenness
0,39364684,1,206,0.001174,1.926011e-14,1e-06,742.0
1,21061006,3,0,0.001398,3.852239e-14,3e-06,0.0
2,18513522,1,0,0.001163,1.925995e-14,1e-06,0.0
3,38251731,1,0,0.001163,1.925995e-14,1e-06,0.0
4,22369434,1,0,0.001163,1.925995e-14,1e-06,0.0
