In [2]:
import numpy as np
import pandas as pd
import snap
import matplotlib.pyplot as plt
import powerlaw as pl
import csv

In [3]:
edges = pd.read_csv("Edges_hard250.csv")
nodes = pd.read_csv("Nodes_hard250.csv")
print("Data loaded successfuly!")

Data loaded successfuly!


In [4]:
# Create an empty Vector - SNAP data structure
src = snap.TIntV() 
trg = snap.TIntV()
n = snap.TIntV()

# Add elements
[src.Add(element) for element in edges['Source'].tolist()]
[trg.Add(element) for element in edges['Target'].tolist()]
[n.Add(element) for element in nodes['Id'].tolist()]

# Print vector size
print ('Source Size -->',src.Len())
print ('Target Size -->',trg.Len())
print ('Sequence Length -->',n.Len())

Source Size --> 767397
Target Size --> 767397
Sequence Length --> 5784


# Graph Creation

In [5]:
# Create a Directed Graph
G1 = snap.TNGraph.New()

# Add Nodes before 
for i in range(0, n.Len()):
    G1.AddNode(n[i])

# adding edges
for i in range(0, src.Len()):
    G1.AddEdge(src[i],trg[i])

# Set node labels
NIdName = snap.TIntStrH() 
for node in n:
    NIdName[node] = str(node)

# Directed network
N1 = snap.TNEANet.New()
print("Directed Graph creation successfuly!")

Directed Graph creation successfuly!


# Centrality Metrics

## Page Rank

In [16]:
PRankH = snap.TIntFltH()
snap.GetPageRank(G1, PRankH)

slist = sorted(PRankH, key = lambda key: PRankH[key], reverse = True)
print ("\nTop 20 Users by Page Rank:\n=============\n")
for item in slist[:20]:
    print ("Node ID: %7s   |   Page Rank: %.6f" % (item, PRankH[item]))


Top 20 Users by Page Rank:

Node ID:  768100   |   Page Rank: 0.010409
Node ID:  653154   |   Page Rank: 0.009760
Node ID:  564363   |   Page Rank: 0.006833
Node ID:   25056   |   Page Rank: 0.003375
Node ID:  120858   |   Page Rank: 0.003354
Node ID:   35705   |   Page Rank: 0.003329
Node ID:  880652   |   Page Rank: 0.002209
Node ID:  316195   |   Page Rank: 0.002181
Node ID:   43147   |   Page Rank: 0.001631
Node ID:  485026   |   Page Rank: 0.001613
Node ID:  741235   |   Page Rank: 0.001583
Node ID:   60303   |   Page Rank: 0.001566
Node ID:  499567   |   Page Rank: 0.001554
Node ID:  384702   |   Page Rank: 0.001529
Node ID:    4063   |   Page Rank: 0.001502
Node ID:  239168   |   Page Rank: 0.001483
Node ID:  833199   |   Page Rank: 0.001454
Node ID:  901899   |   Page Rank: 0.001453
Node ID:  604376   |   Page Rank: 0.001375
Node ID:  461427   |   Page Rank: 0.001328


# Closeness Centrality

In [17]:
n1=[]
c1=[]
for NI in G1.Nodes():
    CloseCentr = snap.GetClosenessCentr(G1, NI.GetId())
    n1.append(NI.GetId())
    c1.append(CloseCentr)
    
df = pd.DataFrame({"node": n1, "score": c1}) 
final_df = df.sort_values(by=['score'], ascending=False)
print ("\nTop 20 Users by Closeness Centrality:\n=======================\n")
final_df[:20]



Top 20 Users by Closeness Centrality:



Unnamed: 0,node,score
2029,322502,0.689602
1333,206017,0.674953
689,99870,0.666628
197,25840,0.662656
5435,874574,0.642913
5443,875879,0.626748
5669,914808,0.613386
4768,768100,0.613061
4409,703426,0.612931
937,138647,0.606821


## Betweenness Centrality

In [6]:
Nodes2 = snap.TIntFltH()
Edges2 = snap.TIntPrFltH()
snap.GetBetweennessCentr(G1, Nodes2, Edges2, 1.0)



slist = sorted(Nodes2, key = lambda key: Nodes2[key], reverse = True)
print ("\nTop 20 Users by Betweenness Centrality:\n=======================\n")
for item in slist[:20]:
    print ("Node ID: %7s   |   Betweenness Centrality: %.6f" % (item, Nodes2[item]))
    
print("DONE!")



Top 20 Users by Betweenness Centrality:

Node ID:  322502   |   Betweenness Centrality: 693440.172936
Node ID:  206017   |   Betweenness Centrality: 669161.456366
Node ID:   99870   |   Betweenness Centrality: 552886.514665
Node ID:   25840   |   Betweenness Centrality: 537870.750079
Node ID:  875879   |   Betweenness Centrality: 395637.081383
Node ID:  874574   |   Betweenness Centrality: 387664.556688
Node ID:  768100   |   Betweenness Centrality: 270418.491254
Node ID:  703426   |   Betweenness Centrality: 264990.005167
Node ID:  138647   |   Betweenness Centrality: 221579.077079
Node ID:  608395   |   Betweenness Centrality: 213282.682985
Node ID:   46853   |   Betweenness Centrality: 193470.563478
Node ID:  914808   |   Betweenness Centrality: 183623.795740
Node ID:  918034   |   Betweenness Centrality: 180335.473513
Node ID:  257155   |   Betweenness Centrality: 172372.164252
Node ID:  714613   |   Betweenness Centrality: 165728.972274
Node ID:  359317   |   Betweenness Centrali

In [9]:
# Create a Directed Graph
G11 = snap.TUNGraph.New()

# Add Nodes before 
for i in range(0, n.Len()):
    G11.AddNode(n[i])

# adding edges
for i in range(0, src.Len()):
    G11.AddEdge(src[i],trg[i])

# Set node labels
NIdName = snap.TIntStrH() 
for node in n:
    NIdName[node] = str(node)

# Directed network
N1 = snap.TNEANet.New()
print("Unirected Graph creation successfuly!")

Unirected Graph creation successfuly!


In [10]:
NIdEigenH = snap.TIntFltH()
snap.GetEigenVectorCentr(G11, NIdEigenH)

slist = sorted(NIdEigenH, key = lambda key: NIdEigenH[key], reverse = True)

print ("\nTop 20 Users by Eigenvector Centrality:\n=============\n")
for item in slist[:5]:
    print ("Node ID: %7s   |   Page Rank: %.6f" % (item, NIdEigenH[item]))



Top 20 Users by Eigenvector Centrality:

Node ID:  206017   |   Page Rank: 0.102766
Node ID:   25840   |   Page Rank: 0.101722
Node ID:  322502   |   Page Rank: 0.101318
Node ID:   99870   |   Page Rank: 0.098089
Node ID:  874574   |   Page Rank: 0.091298
