In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import networkx as nx
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from sklearn.pipeline import make_pipeline, FeatureUnion
from sklearn.preprocessing import PowerTransformer
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from IPython.display import Image
import plotly.io as pio

pio.renderers.default = "notebook_connected" # will supposedly render nbviewer
%config InlineBackend.figure_format = 'retina' 

In [22]:
df = pd.read_csv('../data/spk_trancation_detail_res.csv', index_col=[0])#.sample(2000000)
df_actor = pd.read_csv('../data/actor_balances.csv', index_col=[0])
df = df.merge(df_actor, left_on='from_id', right_on='id')
df = df[['from_id', 'to_id', 'value', 'balance']]
df.rename(columns={'balance': 'balance_from'}, inplace=True)

df = df.merge(df_actor, left_on='to_id', right_on='id')
df = df[['from_id', 'to_id', 'value', 'balance_from', 'balance']]
df.rename(columns={'balance': 'balance_to'}, inplace=True)

df['balance_from'] = (pd.to_numeric(df['balance_from'], errors='coerce')*1e-18).fillna(0)
df['balance_to'] = (pd.to_numeric(df['balance_to'], errors='coerce')*1e-18).fillna(0)
del df_actor

In [25]:
miner_df = pd.read_csv('../data/miner_list.csv', index_col=[0])

In [4]:
edgelist = df[['from_id', 'to_id', 'value']]
G = nx.from_pandas_edgelist(edgelist, 'from_id', 'to_id', 'value', create_using=nx.DiGraph)
print("Populated network")
num_neighs = {g[0]:len(g[1]) for g in G.adjacency()}
print('1 of 6 / Calculated number of neighbors')
centrality = nx.degree_centrality(G)
print('2 of 6 / Calculated centrality')
in_centrality = nx.in_degree_centrality(G)
print('3 of 6 / Calculated in-centrality')
out_centrality = nx.out_degree_centrality(G)
print('4 of 6 / Calculated out-centrality')
avg_neigh_degree = nx.average_neighbor_degree(G)
print('5 of 6 / Calculated neighbor degree')
pagerank = nx.pagerank(G)
print('6 of 6 / PageRank')

Populated network
1 of 6 / Calculated number of neighbors
2 of 6 / Calculated centrality
3 of 6 / Calculated in-centrality
4 of 6 / Calculated out-centrality
5 of 6 / Calculated neighbor degree
6 of 6 / PageRank


In [33]:
df_num_neighs = pd.DataFrame.from_dict(num_neighs, orient='index', columns=['num_neighs'])
df_centrality = pd.DataFrame.from_dict(centrality, orient='index', columns=['centrality'])
df_in_centrality = pd.DataFrame.from_dict(in_centrality, orient='index', columns=['in_centrality'])
df_out_centrality = pd.DataFrame.from_dict(out_centrality, orient='index', columns=['out_centrality'])
df_avg_neigh_degree = pd.DataFrame.from_dict(avg_neigh_degree, orient='index', columns=['avg_neigh_degree'])
df_pagerank = pd.DataFrame.from_dict(pagerank, orient='index', columns=['pagerank'])

dataframe = df_num_neighs\
    .merge(df_centrality, left_index=True, right_index=True)\
    .merge(df_in_centrality, left_index=True, right_index=True)\
    .merge(df_out_centrality, left_index=True, right_index=True)\
    .merge(df_avg_neigh_degree, left_index=True, right_index=True)\
    .merge(df_pagerank, left_index=True, right_index=True)

dataframe = dataframe.join(df.groupby('from_id').sum()['value'], how='left').fillna(0)
dataframe.rename(columns={'value': 'value_spent'}, inplace=True)
dataframe = dataframe.join(df.groupby('to_id').sum()['value'], how='left').fillna(0)    
dataframe.rename(columns={'value': 'value_received'}, inplace=True)
dataframe = dataframe.join(df.groupby('to_id').first()['balance_to'], how='left').fillna(0)    
dataframe.rename(columns={'value': 'balance_to'}, inplace=True)

In [73]:
df[df.to_id=='f099']

Unnamed: 0,from_id,to_id,value,balance_from,balance_to
1685069,f01801087,f099,0.000000e+00,0.005235,3.377030e+07
1685070,f02407,f099,9.000000e-13,565.053205,3.377030e+07
1685071,f01541675,f099,0.000000e+00,79.587097,3.377030e+07
1685072,f0104106,f099,1.000000e-05,0.001294,3.377030e+07
1685073,f0702003,f099,0.000000e+00,60.348064,3.377030e+07
...,...,...,...,...,...
1685151,f01483348,f099,0.000000e+00,182.091629,3.377030e+07
1685152,f01098097,f099,0.000000e+00,54.285095,3.377030e+07
1685153,f01447536,f099,0.000000e+00,8.848242,3.377030e+07
1685154,f01045962,f099,0.000000e+00,2.398155,3.377030e+07


In [39]:
pipeline = make_pipeline(PowerTransformer(), StandardScaler(), PCA(n_components=2))
pipeline.fit(dataframe[dataframe.columns[0:-1]])
data_pc = pipeline.transform(dataframe[dataframe.columns[0:-1]])
df_miner = df.merge(miner_df, left_on='to_id', right_on='miner_id')

In [68]:
dfc = dataframe.sample(3000, random_state=42).append(
    dataframe.loc[['f099', 'f036150']]
)
dx = pipeline.transform(dfc[dfc.columns[0:-1]])
df_out = dfc.reset_index().merge(dataframe.reset_index()[['index']], how='inner', left_on='index', right_on='index')
df_out['PC1'] = dx[:,0]
df_out['PC2'] = dx[:,1]
df_out['value_spent_rd'] = np.round(df_out['value_spent'],0)
df_out['value_received_rd'] = np.round(df_out['value_received'],0)
df_out['log1p_spent'] = np.log1p(df_out['value_spent'])
df_out['log1p_received'] = np.log1p(df_out['value_received'])
df_out['log1p_balance'] = np.log1p(df_out['balance_to'])
df_out['in_centrality_quartile'] = pd.qcut(df_out['in_centrality'], 4, labels=False, duplicates='drop').map({3:'High', 2:'High', 1:'Medium', 0:'Low'})
df_out['out_centrality_quartile'] = pd.qcut(df_out['out_centrality'], 4, labels=False, duplicates='drop').map({3:'High', 2:'High', 1:'Medium', 0:'Low'})
df_out['balance_rd'] = np.round(df_out['balance_to'],0)

In [150]:
fig = go.Figure() 
fig.add_trace(    
    go.Scatter(x = df_out['PC1'], 
               y = df_out['PC2'],
               mode = 'markers', 
               showlegend=False,
               customdata=df_out[df_out.columns[[0,1,12,13,19,17,18]]],
               marker=dict(
                   line=dict(width=0),
                   color=df_out['log1p_received'],
                   size=2*(df_out['log1p_balance']+1),
                   opacity=0.5,
               )               
            )
)


fig.update_traces(hovertemplate='<b>Actor ID</b> %{customdata[0]}<br>' +
                                '<b>Neighbors</b> %{customdata[1]}<br>' +
                                '<b>Spent</b> %{customdata[2]}<br>' +
                                '<b>Received</b> %{customdata[3]}<br>' +  
                                '<b>Balance</b> %{customdata[4]}<br>' +                        
                                '<b>In-centrality</b> %{customdata[5]}<br>' +  
                                '<b>Out-centrality</b> %{customdata[6]}<br>' +                    
                                '<extra></extra>',\
                  selector=dict(type='scatter')) 
fig.update_layout(title_text='Transaction network in PCA space', 
                  xaxis_title='Principal component 1',
                  yaxis_title='Principal component 2')

fig.update_layout(
    updatemenus=[
        dict(
            type="buttons",
            buttons=[
                dict(label="Size by balance",
                     method="restyle",
                     args=["marker", dict(
                   line=dict(width=0),
                   color=df_out['log1p_balance'],
                   size=2*(df_out['log1p_balance']+1),
                   opacity=0.5, 
               ), [0]]),
                dict(label="Size by spending",
                     method="restyle",
                     args=["marker", dict(
                   line=dict(width=0),
                   color=df_out['log1p_received'],
                   size=(df_out['log1p_spent']+1),
                   opacity=0.5,
               ), [0] 
               ])                    
            ],
        )
    ]
)

fig.add_annotation(x=5, y=7.5, ax=1,ay=1,text="Bridge", font=dict(size=14))
fig.add_annotation(x=13, y=3,ax=1,ay=1, text="Hub", font=dict(size=14))
fig.add_annotation(x=-2, y=-1, ax=1,ay=1, text="Fringe", font=dict(size=14))
fig.add_annotation(x=8, y=-2.5,ax=1,ay=1, text="Outpost", font=dict(size=14))
fig.update_layout(paper_bgcolor='rgba(0, 0, 0, 0)')
fig.show()


fig.write_html("network.html")

In [148]:
dfm = dataframe.loc[df_miner.to_id]
dx = pipeline.transform(dfm[dfm.columns[0:-1]])
dfm_out = dfm.reset_index().merge(dataframe.reset_index()[['index']], how='inner', left_on='index', right_on='index')
dfm_out['PC1'] = dx[:,0]
dfm_out['PC2'] = dx[:,1]
dfm_out['value_spent_rd'] = np.round(dfm_out['value_spent'],0)
dfm_out['value_received_rd'] = np.round(dfm_out['value_received'],0)
dfm_out['log1p_spent'] = np.log1p(dfm_out['value_spent'])
dfm_out['log1p_received'] = np.log1p(dfm_out['value_received'])
dfm_out['log1p_balance'] = np.log1p(dfm_out['balance_to'])
dfm_out['in_centrality_quartile'] = pd.qcut(dfm_out['in_centrality'], 4, labels=False, duplicates='drop').map({3:'High', 2:'High', 1:'Medium', 0:'Low'})
dfm_out['out_centrality_quartile'] = pd.qcut(dfm_out['out_centrality'], 4, labels=False, duplicates='drop').map({3:'High', 2:'High', 1:'Medium', 0:'Low'})
dfm_out['balance_rd'] = np.round(dfm_out['balance_to'],0)

dfc = dataframe.sample(5000)
dx = pipeline.transform(dfc[dfc.columns[0:-1]])
df_out = dfc.reset_index().merge(dataframe.reset_index()[['index']], how='inner', left_on='index', right_on='index')
df_out['PC1'] = dx[:,0]
df_out['PC2'] = dx[:,1]
df_out['value_spent_rd'] = np.round(df_out['value_spent'],0)
df_out['value_received_rd'] = np.round(df_out['value_received'],0)
df_out['log1p_spent'] = np.log1p(df_out['value_spent'])
df_out['log1p_received'] = np.log1p(df_out['value_received'])
df_out['log1p_balance'] = np.log1p(df_out['balance_to'])
df_out['in_centrality_quartile'] = pd.qcut(df_out['in_centrality'], 4, labels=False, duplicates='drop').map({3:'High', 2:'High', 1:'Medium', 0:'Low'})
df_out['out_centrality_quartile'] = pd.qcut(df_out['out_centrality'], 4, labels=False, duplicates='drop').map({3:'High', 2:'High', 1:'Medium', 0:'Low'})
df_out['balance_rd'] = np.round(df_out['balance_to'],0)

fig = go.Figure() 
fig.add_trace(    
    go.Scatter(x = df_out['PC1'], 
               y = df_out['PC2'],
               mode = 'markers', 
               name='Original',
               meta=['Normal'],
               customdata=df_out[df_out.columns[[0,1,12,13,19,17,18]]],
               marker=dict(
                   line=dict(width=0),
                   size=df_out['log1p_balance'],
                   color='rgba(0,0,0,0.5)'
               )               
            )
)



fig.add_trace(    
    go.Scatter(x = dfm_out['PC1'].iloc[0:200], 
               y = dfm_out['PC2'].iloc[0:200],
               mode = 'markers', 
               name = 'Miners',
               meta = ['Miners'],
               customdata=dfm_out.iloc[0:200][dfm_out.columns[[0,1,12,13,19,17,18]]],
               marker=dict(
                   line=dict(width=0),
                   color='limegreen',
                   size=dfm_out['log1p_balance'],
#                    symbol='square'
#                    colorbar=dict(thickness=20, title='Value received', 
#                                  tickvals=[0, np.log(1+10),np.log(1+100),np.log(1+1000), np.log(1+10000), np.log(1+100000), np.log(1+1000000)],
#                                  ticktext=[0,10,100,100,10000,100000,1000000]
#                                 ) 
               )               
            )
)



blackhole = ['f099']
dfc = dataframe.loc[blackhole]
dx = pipeline.transform(dfc[dfc.columns[0:-1]])
df_out4 = dfc.reset_index().merge(dataframe.reset_index()[['index']], how='inner', left_on='index', right_on='index')
df_out4['PC1'] = dx[:,0]
df_out4['PC2'] = dx[:,1]
df_out4['value_spent_rd'] = np.round(df_out4['value_spent'],0)
df_out4['value_received_rd'] = np.round(df_out4['value_received'],0)
df_out4['log1p_spent'] = np.log1p(df_out4['value_spent'])
df_out4['log1p_received'] = np.log1p(df_out4['value_received'])
df_out4['log1p_balance'] = np.log1p(df_out4['balance_to'])    
df_out4['in_centrality_quartile'] = pd.qcut(df_out4['in_centrality'], 4, labels=False, duplicates='drop').map({3:'High', 2:'High', 1:'Medium', 0:'Low'})
df_out4['out_centrality_quartile'] = pd.qcut(df_out4['out_centrality'], 4, labels=False, duplicates='drop').map({3:'High', 2:'High', 1:'Medium', 0:'Low'})    
df_out4['balance_rd'] = np.round(df_out4['balance_to'],0)
fig.add_trace(    
    go.Scatter(x = df_out4['PC1'], 
               y = df_out4['PC2'],
               mode = 'markers', 
               name = 'Black hole',
               meta = ['Black hole'],               
               customdata=df_out4[df_out4.columns[[0,1,12,13,19,17,18]]],
               marker=dict(
                   line=dict(width=0),
                   color='black',
                   size=df_out4['log1p_balance'],
#                    symbol='circle'
               )               
            )
)

reserve = ['f028890', 'f027464', 'f027875', 'f028944', 'f028628', 'f028096', 'f026882', 'f027257', 'f028437', 'f028366']
dfc = dataframe.loc[reserve]
dx = pipeline.transform(dfc[dfc.columns[0:-1]])
df_out3 = dfc.reset_index().merge(dataframe.reset_index()[['index']], how='inner', left_on='index', right_on='index')
df_out3['PC1'] = dx[:,0]
df_out3['PC2'] = dx[:,1]
df_out3['value_spent_rd'] = np.round(df_out3['value_spent'],0)
df_out3['value_received_rd'] = np.round(df_out3['value_received'],0)
df_out3['log1p_spent'] = np.log1p(df_out3['value_spent'])
df_out3['log1p_received'] = np.log1p(df_out3['value_received'])
df_out3['log1p_balance'] = np.log1p(df_out3['balance_to'])    
df_out3['in_centrality_quartile'] = pd.qcut(df_out3['in_centrality'], 4, labels=False, duplicates='drop').map({3:'High', 2:'High', 1:'Medium', 0:'Low'})
df_out3['out_centrality_quartile'] = pd.qcut(df_out3['out_centrality'], 4, labels=False, duplicates='drop').map({3:'High', 2:'High', 1:'Medium', 0:'Low'})    
df_out3['balance_rd'] = np.round(df_out3['balance_to'],0)
fig.add_trace(    
    go.Scatter(x = df_out3['PC1'], 
               y = df_out3['PC2'],
               mode = 'markers', 
               name = 'SAFT/Remainder',
               meta = ['SAFT/Remainder'],               
               customdata=df_out3[df_out3.columns[[0,1,12,13,19,17,18]]],
               marker=dict(
                   line=dict(width=0),
                   color='#FF9005',
                   size=df_out3['log1p_balance'],
#                    symbol='triangle-up'
               )               
            )
)
exchange_verified = ['f036150', #bitfinex
                   'f084877', 'f0227189', 'f0227195', # coinbase
                   'f034689', #gate
                   'f033259', #huobi
                   'f033544', #kraken
                   'f042593',  #kucoin
                   'f047684',  #oke
                   'f0412624', 'f039682', 'f023933', 'f033348', 'f0130039', 'f099824'
                  ]
dfc = dataframe.loc[exchange_verified]
dx = pipeline.transform(dfc[dfc.columns[0:-1]])
df_out2 = dfc.reset_index().merge(dataframe.reset_index()[['index']], how='inner', left_on='index', right_on='index')
df_out2['PC1'] = dx[:,0]
df_out2['PC2'] = dx[:,1]
df_out2['value_spent_rd'] = np.round(df_out2['value_spent'],0)
df_out2['value_received_rd'] = np.round(df_out2['value_received'],0)
df_out2['log1p_spent'] = np.log1p(df_out2['value_spent'])
df_out2['log1p_received'] = np.log1p(df_out2['value_received'])
df_out2['log1p_balance'] = np.log1p(df_out2['balance_to'])    
df_out2['in_centrality_quartile'] = pd.qcut(df_out2['in_centrality'], 4, labels=False, duplicates='drop').map({3:'High', 2:'High', 1:'Medium', 0:'Low'})
df_out2['out_centrality_quartile'] = pd.qcut(df_out2['out_centrality'], 4, labels=False, duplicates='drop').map({3:'High', 2:'High', 1:'Medium', 0:'Low'})    
df_out2['balance_rd'] = np.round(df_out2['balance_to'],0)
fig.add_trace(    
    go.Scatter(x = df_out2['PC1'], 
               y = df_out2['PC2'],
               mode = 'markers', 
               name = 'Exchange',
               meta = ['Exchange'],               
               customdata=df_out2[df_out2.columns[[0,1,12,13,19,17,18]]],
               marker=dict(
                   line=dict(width=0),
                   color='crimson',
                   size=df_out2['log1p_balance'],
#                    symbol='diamond'
               )               
            )
)


exchange_verified = ['f01259647']
dfc = dataframe.loc[exchange_verified]
dx = pipeline.transform(dfc[dfc.columns[0:-1]])
df_out2 = dfc.reset_index().merge(dataframe.reset_index()[['index']], how='inner', left_on='index', right_on='index')
df_out2['PC1'] = dx[:,0]
df_out2['PC2'] = dx[:,1]
df_out2['value_spent_rd'] = np.round(df_out2['value_spent'],0)
df_out2['value_received_rd'] = np.round(df_out2['value_received'],0)
df_out2['log1p_spent'] = np.log1p(df_out2['value_spent'])
df_out2['log1p_received'] = np.log1p(df_out2['value_received'])
df_out2['log1p_balance'] = np.log1p(df_out2['balance_to'])    
df_out2['in_centrality_quartile'] = pd.qcut(df_out2['in_centrality'], 4, labels=False, duplicates='drop').map({3:'High', 2:'High', 1:'Medium', 0:'Low'})
df_out2['out_centrality_quartile'] = pd.qcut(df_out2['out_centrality'], 4, labels=False, duplicates='drop').map({3:'High', 2:'High', 1:'Medium', 0:'Low'})    
df_out2['balance_rd'] = np.round(df_out2['balance_to'],0)
fig.add_trace(    
    go.Scatter(x = df_out2['PC1'], 
               y = df_out2['PC2'],
               mode = 'markers', 
               name = 'Filecoin Foundation',
               meta = ['Filecoin Foundation'],               
               customdata=df_out2[df_out2.columns[[0,1,12,13,19,17,18]]],
               marker=dict(
                   line=dict(width=0),
                   color='yellow',
                   size=df_out2['log1p_balance'],
#                    symbol='diamond'
               )               
            )
)

exchange_verified = [str(dfi) for dfi in dataframe[dataframe.balance_to > 1000].index.tolist() if 'f022' in str(dfi)][0:25]
dfc = dataframe.loc[exchange_verified]
dx = pipeline.transform(dfc[dfc.columns[0:-1]])
df_out2 = dfc.reset_index().merge(dataframe.reset_index()[['index']], how='inner', left_on='index', right_on='index')
df_out2['PC1'] = dx[:,0]
df_out2['PC2'] = dx[:,1]
df_out2['value_spent_rd'] = np.round(df_out2['value_spent'],0)
df_out2['value_received_rd'] = np.round(df_out2['value_received'],0)
df_out2['log1p_spent'] = np.log1p(df_out2['value_spent'])
df_out2['log1p_received'] = np.log1p(df_out2['value_received'])
df_out2['log1p_balance'] = np.log1p(df_out2['balance_to'])    
df_out2['in_centrality_quartile'] = pd.qcut(df_out2['in_centrality'], 4, labels=False, duplicates='drop').map({3:'High', 2:'High', 1:'Medium', 0:'Low'})
df_out2['out_centrality_quartile'] = pd.qcut(df_out2['out_centrality'], 4, labels=False, duplicates='drop').map({3:'High', 2:'High', 1:'Medium', 0:'Low'})    
df_out2['balance_rd'] = np.round(df_out2['balance_to'],0)
fig.add_trace(    
    go.Scatter(x = df_out2['PC1'], 
               y = df_out2['PC2'],
               mode = 'markers', 
               name = 'Protocol Labs',
               meta = ['Protocol Labs'],               
               customdata=df_out2[df_out2.columns[[0,1,12,13,19,17,18]]],
               marker=dict(
                   line=dict(width=0),
                   color='mediumblue',
                   size=df_out2['log1p_balance'],
#                    symbol='diamond'
               )               
            )
)

fig.update_traces(hovertemplate='<b>Actor ID</b> %{customdata[0]}<br>' +
                                '<b>Type</b> %{meta[0]}<br>' +
                                '<b>Neighbors</b> %{customdata[1]}<br>' +
                                '<b>Spent</b> %{customdata[2]}<br>' +
                                '<b>Received</b> %{customdata[3]}<br>' +  
                                '<b>Balance</b> %{customdata[4]}<br>' +                        
                                '<b>In-centrality</b> %{customdata[5]}<br>' +  
                                '<b>Out-centrality</b> %{customdata[6]}<br>' +                    
                                '<extra></extra>',\
                  selector=dict(type='scatter')) 
fig.update_layout(title_text='Network actors by archetype<br>Sized by account balance', 
                  xaxis_title='Principal component 1',
                  yaxis_title='Principal component 2')


fig.update_layout(paper_bgcolor='rgba(0, 0, 0, 0)')

#     fig.add_annotation(x=0, y=5, text="Net receivers of token")
fig.add_annotation(x=5, y=7.5, ax=1,ay=1,text="Bridge", font=dict(size=14))
fig.add_annotation(x=13, y=3,ax=1,ay=1, text="Hub", font=dict(size=14))
fig.add_annotation(x=-2, y=-1, ax=1,ay=1, text="Fringe", font=dict(size=14))
fig.add_annotation(x=8, y=-2.5,ax=1,ay=1, text="Outpost", font=dict(size=14))
fig.show()
fig.write_html("archetype_all.html")