In [1]:
import pandas as pd
from web3 import Web3
from web3.middleware import geth_poa_middleware

In [2]:
chain = 'eth'
query_mode = 'http'
core_number = 25
chunk_size = 1000
w3 = None
data = {
    'eth': {
        'rpc': '/eth/eth_node/node/geth.ipc',
        'http': 'https://mainnet.infura.io/v3/e0a4e987f3ff4f4fa9aa21bb08f09ef5',
        'datafile': 'data/one_day_exit_scam_eth.csv',
        'edgefile': 'data/transactions_eth.csv',
        'nodefile': 'data/addresses_eth.csv',
        'logfile': 'data/chunk_log_eth.csv'
    },
    'bsc': {
        'rpc': '',
        'http': 'https://bsc-dataseed.binance.org/',
        'datafile': 'data/one_day_exit_scam_bsc.csv',
        'edgefile': 'data/transactions_bsc.csv',
        'nodefile': 'data/addresses_bsc.csv',
        'logfile': 'data/chunk_log_bsc.csv'
    }
}

In [3]:
def get_w3():
    tmp = data[chain][query_mode]
    w3_data = {
        'eth': {
            'rpc': Web3(Web3.IPCProvider(tmp)),
            'http': Web3(Web3.HTTPProvider(tmp))
        },
        'bsc': {
            'rpc': None,
            'http': Web3(Web3.HTTPProvider(tmp)).middleware_onion.inject(geth_poa_middleware, layer=0)
        } 
    }
    return w3_data[chain][query_mode]

In [4]:
w3 = get_w3()

In [5]:
def filter(node_df, edge_df, black_df):
    node_df = node_df[~node_df['address'].isin(black_df['Address'])]
    edge_df = edge_df[~edge_df['from'].isin(black_df['Address'])]
    edge_df = edge_df[~edge_df['to'].isin(black_df['Address'])]

    edge_df = edge_df[edge_df['from'] != edge_df['to']]
    return node_df, edge_df

In [6]:
data_df = pd.read_csv(data[chain]['datafile'])
chunk_df = pd.read_csv(data[chain]['logfile'])
edge_df = pd.read_csv(data[chain]['edgefile'])
node_df = pd.read_csv(data[chain]['nodefile'])

In [7]:
node_df['address'] = node_df['address'].str.upper()
edge_df['from'] = edge_df['from'].str.upper()
edge_df['to'] = edge_df['to'].str.upper()

In [8]:
blackList = [
    'data/collectibles_info_account.csv',
    'data/dex_info_account.csv',
    'data/exchanges_info.csv',
    'data/gaming_account_info.csv',
    'data/scatterplot_account_info.csv'
]
black_df = pd.DataFrame([], columns=['Address'])
for file in blackList:
    black_df = pd.concat([black_df, pd.DataFrame(pd.read_csv(file)['Address'])]).reset_index(drop=True)

In [9]:
black_df['Address'] = black_df['Address'].str.upper()

In [10]:
node_df, edge_df = filter(node_df, edge_df, black_df)

In [11]:
'''
indirizzi lvl 0
indirizzi lvl 1
transazioni da lvl 0 a lvl 0
transazioni da lvl 1 a lvl 0
transazioni self
transazioni self con 0 value
per address lvl 0, numero medio di transazioni ricevute e inviate
per address lvl 1, numero medio di transazioni inviate
'''
pass

In [12]:
'controllare se il logfile contiene tutti i chunk'
import math
chunk_df.shape[0] == math.ceil(data_df["block_number_remove"].max() / chunk_size)

True

In [13]:
'generazione del grafo'
import networkx as nx
graph = nx.MultiDiGraph()
nodeAttr = {}
edgeAttr = {}
for index, node in node_df.iterrows():
    nodeAttr[node['address']] = {k:v for k,v in node.items()}
    graph.add_node(node['address'])
for index, edge in edge_df.iterrows():
    nodeAttr[(edge['from'], edge['to'])] = {k:v for k,v in edge.items()}
    graph.add_edge(edge['from'], edge['to'])
nx.set_node_attributes(graph, nodeAttr)
nx.set_edge_attributes(graph, edgeAttr)

In [14]:
'controllare la completezza di address e transazioni'
print(len(graph.nodes) == node_df.shape[0])
print(len(graph.edges) == edge_df.shape[0])

True
True


In [15]:
'componenti connesse e componenti fortemente connesse'
ncc = nx.number_connected_components(graph.to_undirected())
cc = nx.connected_components(graph.to_undirected())
nscc = nx.number_strongly_connected_components(graph)
scc = nx.strongly_connected_components(graph)
print('number of connected components: {} components'.format(ncc))
print('number of strongly connected components: {} components'.format(nscc))
maxcc = len(max(cc, key=lambda x: len(x)))
maxscc = len(max(scc, key=lambda x: len(x)))
print('largest connected component: {} nodes'.format(maxcc))
print('largest strongly connected component: {} nodes'.format(maxscc))

number of connected components: 7929 components
number of strongly connected components: 31893 components
largest connected component: 6219 nodes
largest strongly connected component: 42 nodes


In [16]:
nx.write_graphml(graph, 'graph.graphml', prettyprint=4)

In [17]:
l = [(node, graph.out_degree(node)) for node in graph.nodes()]
sorted(l, key=lambda x: x[1], reverse=True)[:20]

[('0X27E9F4748A2EB776BE193A1F7DEC2BB6DAAFE9CF', 110),
 ('0XC9610BE2843F1618EDFEDD0860DC43551C727061', 101),
 ('0X077D360F11D220E4D5D831430C81C26C9BE7C4A4', 100),
 ('0XF60C2EA62EDBFE808163751DD0D8693DCB30019C', 79),
 ('0X47262B32A23B902A5083B3BE5E6A270A71BE83E0', 59),
 ('0X71CD1E9E6F834123CB482ECC456D121B0B2A1E3D', 55),
 ('0X912FD21D7A69678227FE6D08C64222DB41477BA0', 55),
 ('0X3A19F332C684A5B9DD6BA6F2378AF02D5A3D9A06', 53),
 ('0XA94E56EFC384088717BB6EDCCCEC289A72EC2381', 52),
 ('0X13B432914A996B0A48695DF9B2D701EDA45FF264', 51),
 ('0XEC610CEE0AA9B58D35D6BA876957D6C7A8E4E52F', 50),
 ('0XE721F4357508D19354B6E0700153CE21AC589ACC', 50),
 ('0X9E1A9C8202777B251A17BF288A2A0903EF5D9885', 44),
 ('0X05EE546C1A62F90D7ACBFFD6D846C9C54C7CF94C', 41),
 ('0XDD94821E85B3DC1C03E77283D1727FDB856E11AA', 38),
 ('0XE64C85059F9560FD475CC4D3DDE881334917D4EB', 35),
 ('0XBEE254D6AB5DBA9048E7C4750567298C08459CCC', 35),
 ('0X4C38B7C6992647525A7F5A3B64F65422D91650D3', 34),
 ('0XC7BE8A6BDE47F3FA80EBEBA7D889B62B1940A0

In [276]:
def getNumberOfRugpull(address, data_df, graph, NRugpullPerAddress):
    if NRugpullPerAddress[address] != None: 
        return NRugpullPerAddress[address].copy()
    df = pd.DataFrame({column : [] for column in data_df.columns})
    if graph.nodes[address]['attr']['level'] == 0:
        df = data_df[data_df['from']==address]
    for adjNode, edges in graph[address].items():
        for edge in edges.values():
            attr = edge['attr']
            if NRugpullPerAddress[adjNode] == None:
                rugpullAdjNode = getNumberOfRugpull(adjNode, data_df, graph, NRugpullPerAddress)
            else:
                rugpullAdjNode = NRugpullPerAddress[adjNode].copy()
            filteredRugpullAdjNode = rugpullAdjNode[rugpullAdjNode['block_number_remove']>=attr['blockNumber']]
            df = pd.concat([df, filteredRugpullAdjNode])
    return df.drop_duplicates(keep='last')

In [277]:
NRugpullPerAddress = {
    row['address'] : None 
    for _, row in node_df.iterrows()
}

In [278]:
no_outdegree = [x for x, y in l if y == 0]

for address in no_outdegree:
    NRugpullPerAddress['address'] = data_df[data_df['from']==address]

while True:
    todo = [k for k, v in NRugpullPerAddress.items() if type(v) == type(None)]
    if todo == []: break
    for address in todo:
        flag = False
        for adjNode, edges in graph[address].items():
            if type(NRugpullPerAddress[adjNode]) == type(None):
                flag = True
                break
        if flag == False:
            df = pd.DataFrame({column : [] for column in data_df.columns})
            for adjNode, edges in graph[address].items():
                for edge in edges.values():
                    attr = edge['attr']
                    rugpullAdjNode = NRugpullPerAddress[adjNode].copy()
                    filteredRugpullAdjNode = rugpullAdjNode[rugpullAdjNode['block_number_remove']>=attr['blockNumber']]
                    df = pd.concat([df, filteredRugpullAdjNode])
            NRugpullPerAddress[address] = df.drop_duplicates(keep='last').copy()
    if todo == [k for k, v in NRugpullPerAddress.items() if type(v) == type(None)]:
        break

KeyError: 'attr'

In [None]:
c=0
for v, k in NRugpullPerAddress.items():
    if type(k) == type(None):
        print(v)
        c+=1
x = [(k, v.shape) for k, v in NRugpullPerAddress.items() if type(v)!=type(None)]

0X0BCC7DDED058B1B144C6F358CCDD07C8F8602C9F
0XA6102FB9EC167CEF3AF3E78E698D9824443CC047
0X6CEB62A1890E11D799B5EE5C78107EF7D1B0A97C
0XD792E9501750517CB5C8453AAF8AF843617FE612
0XE487B407F5D6087CA7725ACA8B5DADF3E7FA59BC
0X7587D38C1862696D94FB21D0CF0AF8BC830660BA
0X30B848A64763CAB0B7413DE389D83BEA76C7D6BC
0X64B44F7A5AFBB04029EDB5C2D4B5A9A4E4A09553
0XD3A777281CBA8C3FCD39A96FD05FECEF1116CBE2
0XDF41F13B6ECAEB8A5CB86CF9875126DA3A35C536
0X6214451F754A3CA0040BB702165E3FB40E73AAB6
0XB210D9AFDE9CDA1B8AF0D260AD8602F5134F6CAC
0X6C433C451CDE0FCAB16404C160DCB1B856E66EFC
0X19C5A39E75AA20AE3FBF91A65249A953CD620CC4
0X1CA82F88DF1110B963BE96A9A5447139E799806D
0X986D1BA194137C9D57C226400ED9F2F90B7E53D7
0XB8ACCB48C199553903FDAC780C149B4DA8F396E9
0X921E973BB2E46D65EFE327A820D2EBD9DB8458A2
0X12BDBD520DE427B84AEC2851FD571A139C706E46
0XA939F596B98EB2663F7917AFAB5EFEECCB93A37A
0X108490B3DA05AC1C0632133EBC7F10135644E87D
0XDA2090BF156949FD3D977F3568DF30B0B3A6B21E
0X0C5E1736CAEFB270B399ECEFB703037243AC0177
0X21E706FE3

In [None]:
sorted(x, key=lambda x: x[1], reverse=False)


[('0X2F24E0809A0DA8E312B33AF3D3B4DC46C986CD8F', (0, 18)),
 ('0X21FB8E54A34F462C3642B5DC3E529206FDF7B324', (0, 18)),
 ('0XC5E18BCA150DC0E56C5F13EBBF2B2CC6D38518D6', (0, 18)),
 ('0XB5DD83253C302EA1603E3B0E539181AEE44BE30B', (0, 18)),
 ('0X7F3F6B62F5BC9122A8B2DF950290DE704FA300D1', (0, 18)),
 ('0XBE2DE04FF1D75680C2CFB531D566E1DEC8085FE7', (0, 18)),
 ('0XC0F615B11754B8572E63E8869D4AB801ED9E28CC', (0, 18)),
 ('0X245614142E7796D9F31D1784A0BD753441F5F441', (0, 18)),
 ('0XB9714A4A25EF8046A467C98C2C1899D9C70D1B70', (0, 18)),
 ('0X84BFC6929944157782B6FC39511F15C4B2D6E8EA', (0, 18)),
 ('0X60EAB3D61C54566AFAADED9BFAA4CE4415FFA2BB', (0, 18)),
 ('0XFCFD9C38828B7C7BD33BF021A2D51BF6F8E42B4A', (0, 18)),
 ('0X5DB5FD095F60AA1E952F5052CF4C1FE69F16FB96', (0, 18)),
 ('0XFD186FC49AC9C92A249C9F2F242577F9469F0558', (0, 18)),
 ('0XE0E29C58715282DAE0BAE0AA7C75CAC626D62B69', (0, 18)),
 ('0X52298BEE3B499AB0F51500101FE02E7A9B56933D', (0, 18)),
 ('0XB8652F6668B4522635ECBF4439178EB99EB34CA3', (0, 18)),
 ('0XD3FC65D14

In [18]:
''

NameError: name 'x' is not defined