In [120]:
import pandas as pd
from web3 import Web3
from web3.middleware import geth_poa_middleware

In [121]:
chain = 'eth'
query_mode = 'http'
core_number = 25
chunk_size = 1000
w3 = None
data = {
    'eth': {
        'rpc': '/eth/eth_node/node/geth.ipc',
        'http': 'https://mainnet.infura.io/v3/e0a4e987f3ff4f4fa9aa21bb08f09ef5',
        'datafile': 'data/one_day_exit_scam_eth.csv',
        'edgefile': 'data/transactions_eth.csv',
        'nodefile': 'data/addresses_eth.csv',
        'logfile': 'data/chunk_log_eth.csv'
    },
    'bsc': {
        'rpc': '',
        'http': 'https://bsc-dataseed.binance.org/',
        'datafile': 'data/one_day_exit_scam_bsc.csv',
        'edgefile': 'data/transactions_bsc.csv',
        'nodefile': 'data/addresses_bsc.csv',
        'logfile': 'data/chunk_log_bsc.csv'
    }
}

In [122]:
def get_w3():
    tmp = data[chain][query_mode]
    w3_data = {
        'eth': {
            'rpc': Web3(Web3.IPCProvider(tmp)),
            'http': Web3(Web3.HTTPProvider(tmp))
        },
        'bsc': {
            'rpc': None,
            'http': Web3(Web3.HTTPProvider(tmp)).middleware_onion.inject(geth_poa_middleware, layer=0)
        } 
    }
    return w3_data[chain][query_mode]

In [123]:
w3 = get_w3()

In [124]:
def filter(node_df, edge_df, black_df):
    node_df = node_df[~node_df['address'].isin(black_df['Address'])]
    edge_df = edge_df[~edge_df['from'].isin(black_df['Address'])]
    edge_df = edge_df[~edge_df['to'].isin(black_df['Address'])]

    edge_df = edge_df[edge_df['from'] != edge_df['to']]
    return node_df, edge_df

In [125]:
data_df = pd.read_csv(data[chain]['datafile'])
chunk_df = pd.read_csv(data[chain]['logfile'])
edge_df = pd.read_csv(data[chain]['edgefile'])
node_df = pd.read_csv(data[chain]['nodefile'])

In [126]:
node_df['address'] = node_df['address'].str.upper()
edge_df['from'] = edge_df['from'].str.upper()
edge_df['to'] = edge_df['to'].str.upper()

In [127]:
blackList = [
    'data/collectibles_info_account.csv',
    'data/dex_info_account.csv',
    'data/exchanges_info.csv',
    'data/gaming_account_info.csv',
    'data/scatterplot_account_info.csv'
]
black_df = pd.DataFrame([], columns=['Address'])
for file in blackList:
    black_df = pd.concat([black_df, pd.DataFrame(pd.read_csv(file)['Address'])]).reset_index(drop=True)

In [128]:
black_df['Address'] = black_df['Address'].str.upper()

In [129]:
node_df, edge_df = filter(node_df, edge_df, black_df)

In [130]:
'''
indirizzi lvl 0
indirizzi lvl 1
transazioni da lvl 0 a lvl 0
transazioni da lvl 1 a lvl 0
transazioni self
transazioni self con 0 value
per address lvl 0, numero medio di transazioni ricevute e inviate
per address lvl 1, numero medio di transazioni inviate
'''
pass

In [131]:
'controllare se il logfile contiene tutti i chunk'
import math
chunk_df.shape[0] == math.ceil(data_df["block_number_remove"].max() / chunk_size)

True

In [132]:
'generazione del grafo'
import networkx as nx
graph = nx.MultiDiGraph()
for index, node in node_df.iterrows():
    #attr = {k:v for k,v in node.items()}
    graph.add_node(node['address'])
for index, edge in edge_df.iterrows():
    attr = {k:v for k,v in edge.items()}
    graph.add_edge(edge['from'], edge['to'])

In [133]:
'controllare la completezza di address e transazioni'
print(len(graph.nodes) == node_df.shape[0])
print(len(graph.edges) == edge_df.shape[0])

True
True


In [134]:
'componenti connesse e componenti fortemente connesse'
ncc = nx.number_connected_components(graph.to_undirected())
cc = nx.connected_components(graph.to_undirected())
nscc = nx.number_strongly_connected_components(graph)
scc = nx.strongly_connected_components(graph)
print('number of connected components: {} components'.format(ncc))
print('number of strongly connected components: {} components'.format(nscc))
maxcc = len(max(cc, key=lambda x: len(x)))
maxscc = len(max(scc, key=lambda x: len(x)))
print('largest connected component: {} nodes'.format(maxcc))
print('largest strongly connected component: {} nodes'.format(maxscc))

number of connected components: 7929 components
number of strongly connected components: 31893 components
largest connected component: 6219 nodes
largest strongly connected component: 42 nodes


In [135]:
nx.write_graphml(graph, 'graph.graphml', prettyprint=4)

In [136]:
l = [(node, graph.out_degree(node)) for node in graph.nodes()]
sorted(l, key=lambda x: x[1], reverse=True)

[('0X27E9F4748A2EB776BE193A1F7DEC2BB6DAAFE9CF', 110),
 ('0XC9610BE2843F1618EDFEDD0860DC43551C727061', 101),
 ('0X077D360F11D220E4D5D831430C81C26C9BE7C4A4', 100),
 ('0XF60C2EA62EDBFE808163751DD0D8693DCB30019C', 79),
 ('0X47262B32A23B902A5083B3BE5E6A270A71BE83E0', 59),
 ('0X71CD1E9E6F834123CB482ECC456D121B0B2A1E3D', 55),
 ('0X912FD21D7A69678227FE6D08C64222DB41477BA0', 55),
 ('0X3A19F332C684A5B9DD6BA6F2378AF02D5A3D9A06', 53),
 ('0XA94E56EFC384088717BB6EDCCCEC289A72EC2381', 52),
 ('0X13B432914A996B0A48695DF9B2D701EDA45FF264', 51),
 ('0XEC610CEE0AA9B58D35D6BA876957D6C7A8E4E52F', 50),
 ('0XE721F4357508D19354B6E0700153CE21AC589ACC', 50),
 ('0X9E1A9C8202777B251A17BF288A2A0903EF5D9885', 44),
 ('0X05EE546C1A62F90D7ACBFFD6D846C9C54C7CF94C', 41),
 ('0XDD94821E85B3DC1C03E77283D1727FDB856E11AA', 38),
 ('0XE64C85059F9560FD475CC4D3DDE881334917D4EB', 35),
 ('0XBEE254D6AB5DBA9048E7C4750567298C08459CCC', 35),
 ('0X4C38B7C6992647525A7F5A3B64F65422D91650D3', 34),
 ('0XC7BE8A6BDE47F3FA80EBEBA7D889B62B1940A0