In [1]:
import itertools
import numpy as np
import pandas as pd
import matplotlib.cm as cm
import matplotlib.pyplot as plt
import networkx as nx
from networkx.algorithms.community import girvan_newman, modularity
from pprint import pprint as pp


In [3]:
df = pd.read_csv("cleaned_data.csv")
df = df.drop(df.columns[0], axis=1)
df.head(5)


Unnamed: 0,from_address,to_address,value,from_address_type,to_address_type
0,0x318072374ffa96e8867138cd05c3be282f85f405,0x11111112542d85b3ef69ae05771c2dccff4faa26,8.0,EOA,Contract
1,0x1e7f8de390b95f960d71fbfc42f3d91021fff7ad,0xb9665650e0de599c2fafbfe98cf160399f8a23ba,14.0,EOA,EOA
2,0x53f2736fae551c998d4e72e519f1acf474264de4,0x4dbd4fc535ac27206064b68ffcf827b0a60bab3f,7.0,EOA,Contract
3,0x318072374ffa96e8867138cd05c3be282f85f405,0x11111112542d85b3ef69ae05771c2dccff4faa26,8.0,EOA,Contract
4,0x9a429a5091fb9e00ec7185fa96494765d98f6d3f,0xe592427a0aece92de3edee1f18e0157c05861564,10.0,EOA,Contract


In [4]:
userSpace = df[(df.from_address_type == 'EOA') & (df.to_address_type == 'EOA')]
contractSpace = df[(df.from_address_type == 'Contract')
                   & (df.to_address_type == 'Contract')]
bipartiteSpace = df[((df.from_address_type == 'EOA') & (df.to_address_type == 'Contract')) | (
    (df.from_address_type == 'Contract') & (df.to_address_type == 'EOA'))]


In [5]:
userSpace = userSpace[['from_address', 'to_address', 'value']]
userSpace.head()


Unnamed: 0,from_address,to_address,value
1,0x1e7f8de390b95f960d71fbfc42f3d91021fff7ad,0xb9665650e0de599c2fafbfe98cf160399f8a23ba,14.0
5,0x61e29379b06491ea5a0be90fae954c56c91fd9c4,0xfcadf00f3c5560fa0f1015ef5a8f20c17e6a5392,10.01
7,0xa12431d0b9db640034b0cdfceef9cce161e62be4,0x59a5208b32e627891c389ebafc644145224006e8,120.0
11,0x59a5208b32e627891c389ebafc644145224006e8,0xbdbf7f7135218fc78bea7e0ebbd6769a31af2976,100.0
13,0x5d478b5ea3ca7bfc287ada34b9a1eb6ac18cf9c1,0x92666ffd1df94f17707ddb94afc3bbadd3aa6def,37.5


In [6]:
G = nx.from_pandas_edgelist(userSpace,                # the df containing the data
                            source='from_address',        # first element of the dyad
                            target='to_address',        # second element of the dyad
                            edge_attr='value')


In [7]:
pp(nx.info(G))


'Graph with 7445 nodes and 8335 edges'


In [8]:
pos = nx.spring_layout(G)


In [9]:
solutions = girvan_newman(G)


In [10]:
# alternative paritioning solutions to consider
k = 10

# register modularit scores
modularity_scores = dict()

# iterate over solutions
for community in itertools.islice(solutions, k):
    solution = list(sorted(c) for c in community)
    score = modularity(G, solution)
    modularity_scores[len(solution)] = score


KeyboardInterrupt: 

In [None]:
# plot modularity data
fig = plt.figure()
pos = list(modularity_scores.keys())
values = list(modularity_scores.values())
ax = fig.add_subplot(1, 1, 1)
ax.stem(pos, values)
ax.set_xticks(pos)
ax.set_xlabel(r'Number of communities detected')
ax.set_ylabel(r'Modularity score')
plt.show()
