In [None]:
# import packages
import itertools
import numpy as np
import pandas as pd
import matplotlib.cm as cm
import matplotlib.pyplot as plt
import matplotlib.colors as mcolors
import networkx as nx
from networkx.algorithms.community import girvan_newman, modularity
from pprint import pprint as pp
from operator import itemgetter
from networkx.algorithms import community
import seaborn as sns


In [None]:
# load in the data
df = pd.read_csv("cleaned_data.csv")
df = df.drop(df.columns[0], axis=1)
df.head(5)

In [None]:
# understanding the dataset
df.info()

In [None]:
# creating user space from Matt's code
userSpace = df[(df.from_address_type == 'EOA') & (df.to_address_type == 'EOA')]
contractSpace = df[(df.from_address_type == 'Contract')
                   & (df.to_address_type == 'Contract')]
bipartiteSpace = df[((df.from_address_type == 'EOA') & (df.to_address_type == 'Contract')) | (
    (df.from_address_type == 'Contract') & (df.to_address_type == 'EOA'))]

In [None]:
userSpace = userSpace[['from_address', 'to_address', 'value']]
userSpace.head()

In [None]:
# creating a graph
G = nx.from_pandas_edgelist(userSpace,                # the df containing the data
                          source='from_address',        # first element of the dyad
                          target='to_address',        # second element of the dyad
                          edge_attr='value')

In [None]:
# inspect the network
pp(nx.info(G))

In [None]:
# betweenness centrality - code takes 5-8 minutes to run
bet_centrality = nx.betweenness_centrality(G)

In [None]:
nx.set_node_attributes(G, bet_centrality, 'betweenness')

In [None]:
sorted_betweenness = sorted(
    bet_centrality.items(), key=itemgetter(1), reverse=True)

print("Top 20 nodes by betweenness centrality:")
for b in sorted_betweenness[:20]:
    print(b)

In [None]:
# betweenness centrality for edges
bet_edge_centrality = nx.edge_betweenness_centrality(G)

In [None]:
nx.set_node_attributes(G, bet_edge_centrality, 'edge betweenness')

In [None]:
sorted_edge_betweenness = sorted(
    bet_edge_centrality.items(), key=itemgetter(1), reverse=True)

print("Top 20 nodes by edge betweenness centrality:")
for b in sorted_edge_betweenness[:20]:
    print(b)

In [None]:
# eigenvector centrality - issue with the code
ev_centrality = nx.eigenvector_centrality(G, max_iter=200)


In [None]:
nx.set_node_attributes(G, ev_centrality, 'eigenvector centrality')

In [None]:
sorted_ev_centrality = sorted(
    ev_centrality.items(), key=itemgetter(1), reverse=True)

print("Top 20 nodes by eigenvector centrality:")
for b in sorted_ev_centrality[:20]:
    print(b)

In [None]:
# degree centrality
deg_centrality = nx.degree_centrality(G)

In [None]:
nx.set_node_attributes(G, deg_centrality, 'degree centrality')

In [None]:
sorted_deg_centrality = sorted(
    deg_centrality.items(), key=itemgetter(1), reverse=True)

print("Top 20 nodes by degree centrality:")
for b in sorted_deg_centrality[:20]:
    print(b)

In [None]:
# closeness centrality
close_centrality = nx.closeness_centrality(G)

In [None]:
nx.set_node_attributes(G, close_centrality, 'closeness centrality')

In [None]:
sorted_close_centrality = sorted(
    close_centrality.items(), key=itemgetter(1), reverse=True)

print("Top 20 nodes by closeness centrality:")
for b in sorted_close_centrality[:20]:
    print(b)

In [None]:
# visualize results
## create dataframe
df = pd.DataFrame({'degree': deg_centrality, 'ev_centrality': ev_centrality,
                   'betweenness_centrality': bet_centrality})
# correlation matrix
df.corr()
# scatter plot matrix
sns.pairplot(df)


In [None]:
# modularity analysis - problems with running due to size of the dataset
pos = nx.spring_layout(G)

In [None]:
# alternative partitioning solutions to consider
k = 20

# register modularit scores
modularity_scores = dict()

# iterate over solutions
for community in itertools.islice(solutions, k):
    solution = list(sorted(c) for c in community)
    score = modularity(G, solution)
    modularity_scores[len(solution)] = score


In [None]:
# plot modularity data
fig = plt.figure()
pos = list(modularity_scores.keys())
values = list(modularity_scores.values())
ax = fig.add_subplot(1, 1, 1)
ax.stem(pos, values)
ax.set_xticks(pos)
ax.set_xlabel(r'Number of communities detected')
ax.set_ylabel(r'Modularity score')
plt.show()


In [None]:
# graphs of centrality
## create a function called draw
def draw(G, pos, measures, measure_name):
    
    nodes = nx.draw_networkx_nodes(G, pos, node_size=30, cmap=plt.cm.plasma,
                                   node_color=list(measures.values()),
                                   nodelist=measures.keys())
    nodes.set_norm(mcolors.SymLogNorm(linthresh=0.01, linscale=1, base=10))
    # labels = nx.draw_networkx_labels(G, pos)
    edges = nx.draw_networkx_edges(G, pos)

    plt.title(measure_name)
    plt.colorbar(nodes)
    plt.axis('off')
    plt.show()

In [None]:
# re-run position
pos = nx.spring_layout(G)

In [None]:
# degree centrality
draw(G, pos, nx.degree_centrality(G), 'Degree Centrality')

In [None]:
# betweenness centrality
draw(G, pos, nx.betweenness_centrality(G), 'Betweenness Centrality')

In [None]:
# closeness centrality
draw(G, pos, nx.closeness_centrality(G), 'Closeness Centrality')