In [1]:
import pandas as pd
from web3 import Web3
from web3.middleware import geth_poa_middleware

In [2]:
chain = 'eth'
query_mode = 'http'
core_number = 25
chunk_size = 1000
w3 = None
data = {
    'eth': {
        'rpc': '/eth/eth_node/node/geth.ipc',
        'http': 'https://mainnet.infura.io/v3/e0a4e987f3ff4f4fa9aa21bb08f09ef5',
        'datafile': 'data/one_day_exit_scam_eth.csv',
        'edgefile': 'data/transactions_eth.csv',
        'nodefile': 'data/addresses_eth.csv',
        'logfile': 'data/chunk_log_eth.csv'
    },
    'bsc': {
        'rpc': '',
        'http': 'https://bsc-dataseed.binance.org/',
        'datafile': 'data/one_day_exit_scam_bsc.csv',
        'edgefile': 'data/transactions_bsc.csv',
        'nodefile': 'data/addresses_bsc.csv',
        'logfile': 'data/chunk_log_bsc.csv'
    }
}

In [3]:
def get_w3():
    tmp = data[chain][query_mode]
    w3_data = {
        'eth': {
            'rpc': Web3(Web3.IPCProvider(tmp)),
            'http': Web3(Web3.HTTPProvider(tmp))
        },
        'bsc': {
            'rpc': None,
            'http': Web3(Web3.HTTPProvider(tmp)).middleware_onion.inject(geth_poa_middleware, layer=0)
        } 
    }
    return w3_data[chain][query_mode]

In [4]:
w3 = get_w3()

In [5]:
data_df = pd.read_csv(data[chain]['datafile'])
chunk_df = pd.read_csv(data[chain]['logfile'])
edge_df = pd.read_csv(data[chain]['edgefile'])
node_df = pd.read_csv(data[chain]['nodefile'])

In [6]:
'''
indirizzi lvl 0
indirizzi lvl 1
transazioni da lvl 0 a lvl 0
transazioni da lvl 1 a lvl 0
transazioni self
transazioni self con 0 value
per address lvl 0, numero medio di transazioni ricevute e inviate
per address lvl 1, numero medio di transazioni inviate
'''
pass

In [7]:
'controllare se il logfile contiene tutti i chunk'
import math
chunk_df.shape[0] == math.ceil(data_df["block_number_remove"].max() / chunk_size)

True

In [8]:
'generazione del grafo'
import networkx as nx
graph = nx.MultiDiGraph()
for index, node in node_df.iterrows():
    attr = {k:v for k,v in node.items()}
    graph.add_node(node_for_adding=node['address'])
for index, edge in edge_df.iterrows():
    attr = {k:v for k,v in edge.items()}
    graph.add_edge(u_for_edge=edge['from'], v_for_edge=edge['to'])

In [9]:
'controllare la completezza di address e transazioni'
print(len(graph.nodes) == node_df.shape[0])
print(len(graph.edges) == edge_df.shape[0])

True
True


In [10]:
'componenti connesse e componenti fortemente connesse'
ncc = nx.number_connected_components(graph.to_undirected())
cc = nx.connected_components(graph.to_undirected())
nscc = nx.number_strongly_connected_components(graph)
scc = nx.strongly_connected_components(graph)
print('number of connected components: {} components'.format(ncc))
print('number of strongly connected components: {} components'.format(nscc))
maxcc = len(max(cc, key=lambda x: len(x)))
maxscc = len(max(scc, key=lambda x: len(x)))
print('largest connected component: {} nodes'.format(maxcc))
print('largest strongly connected component: {} nodes'.format(maxscc))

number of connected components: 4096 components
number of strongly connected components: 32009 components
largest connected component: 14799 nodes
largest strongly connected component: 42 nodes


In [None]:
'insieme di nodi lvl 0 con 0 indegree'
data = {
    'degree_distribution' : {
        'in_degree' : {
            'level' : {
                0 : {
                    '0' : 0,
                    '1' : 0,
                    '>1' : 0
                },
                1 : {
                    '0' : 0,
                    '1' : 0,
                    '>1' : 0
                }
            }
        },
        'out_degree' : {
            'level' : {
                0 : {
                    '0' : 0,
                    '1' : 0,
                    '>1' : 0
                },
                1 : {
                    '0' : 0,
                    '1' : 0,
                    '>1' : 0
                }
            }
        }
    }
}
for address, attr in graph.nodes(data=True):
    level = attr['attr']['level']
    in_degree = graph.in_degree(address)
    if in_degree == 0:
        data['degree_distribution']['in_degree']['level'][level]['0'] += 1
    elif in_degree == 1:
        data['degree_distribution']['in_degree']['level'][level]['1'] += 1
    else:
        data['degree_distribution']['in_degree']['level'][level]['>1'] += 1
    out_degree = graph.out_degree(address)
    if out_degree == 0:
        data['degree_distribution']['out_degree']['level'][level]['0'] += 1
    elif out_degree == 1:
        data['degree_distribution']['out_degree']['level'][level]['1'] += 1
    else:
        data['degree_distribution']['out_degree']['level'][level]['>1'] += 1
    
for src, dst, attr in graph.edges(data=True):
    #print(attr)
    #print(graph[src][dst])
    break

In [None]:
import pprint
pprint.pprint(data)
tot = 0
for v in data['degree_distribution'].values():
    for vv in v.values():
        for vvv in vv.values():
            for vvvv in vvv.values():
                tot += vvvv
print(tot)

In [11]:
nx.write_gexf(graph, 'graph.gexf', prettyprint=4)
nx.write_graphml(graph, 'graph.graphml', prettyprint=4)

In [None]:
def getNumberOfRugpull(address, data_df, graph, NRugpullPerAddress):
    if NRugpullPerAddress[address] != None: 
        return NRugpullPerAddress[address].copy()
    df = pd.DataFrame({column : [] for column in data_df.columns})
    if graph.nodes[address]['attr']['level'] == 0: 
        df = data_df[data_df['from']==address]
    for adjNode, edges in graph[address].items():
        for edge in edges.values():
            attr = edge['attr']
            rugpullAdjNode = getNumberOfRugpull(adjNode, data_df, graph, NRugpullPerAddress)
            filteredRugpullAdjNode = rugpullAdjNode[rugpullAdjNode['block_number_remove']>=attr['blockNumber']]
            df = pd.concat([df, filteredRugpullAdjNode])
    return df.drop_duplicates(keep='last')

In [None]:
NRugpullPerAddress = {
    row['address'] : None 
    for _, row in node_df.iterrows()
}

In [None]:
for _, row in node_df.iterrows():
    NRugpullPerAddress[row['address']] = getNumberOfRugpull(row['address'], data_df, graph, NRugpullPerAddress)