In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import igraph
import csv

## read in data

In [None]:
addr_pca_kmeans_graph = pd.read_csv('../data/addresses_pca_kmeans_graph.csv', index_col=0)
addr_pca_kmeans = pd.read_csv('../data/addresses_pca_kmeans.csv', index_col=0)


In [None]:
addr_pca_kmeans = addr_pca_kmeans.set_index('address')

In [None]:
addr_pca_kmeans_graph.head()

In [None]:
len(addr_pca_kmeans)

In [None]:
addr_pca_kmeans_graph = addr_pca_kmeans_graph.rename(columns={'pca_x_vals': 'pca_x_vals_graph', 'pca_y_vals':'pca_y_vals_graph', 'clusters':'clusters_graph'})

In [None]:
addr_pca_kmeans = addr_pca_kmeans.rename(columns={'pca_x_vals': 'pca_x_vals_tx', 'pca_y_vals':'pca_y_vals_tx', 'clusters':'clusters_tx'})

## join graph clusters and tx clusters

In [None]:
joined_df = addr_pca_kmeans.join(addr_pca_kmeans_graph)

In [None]:
joined_df.head()

In [None]:
joined_df.iloc[0]

## overlap percentage

In [None]:
for tx_index in range(4):
    for graph_index in range(4):
        g_set = set(joined_df[joined_df['clusters_graph'] == graph_index].index)
        tx_set = set(joined_df[joined_df['clusters_tx'] == tx_index].index)
        overlap = g_set & tx_set
        universe = g_set | tx_set
        print('Tx cluster:', tx_index, ', Graph cluster:', graph_index, 'Percent Overlap:', "%.2f" %((len(overlap)/len(universe))*100),'%')
    print()
        

## cosine similarity

In [None]:
#use cosine similarity to find the most similar clusters between groups
from sklearn.metrics.pairwise import euclidean_distances, cosine_similarity

for tx_index in range(4):
    for graph_index in range(4):
        sim_score = cosine_similarity(np.append(joined_df[joined_df['clusters_graph'] == graph_index].describe().loc['mean'][0:7], joined_df[joined_df['clusters_graph'] == graph_index].describe().loc['mean'][11:19]).reshape(1,-1), np.append(joined_df[joined_df['clusters_tx'] == tx_index].describe().loc['mean'][0:7], joined_df[joined_df['clusters_tx'] == tx_index].describe().loc['mean'][11:19]).reshape(1,-1))

        print('Tx cluster:', tx_index, ', Graph cluster:', graph_index, 'Cosine similarity:', sim_score[0][0].round(4))
    print()
        

In [None]:
print(joined_df[joined_df['clusters_tx'] == 0].describe().loc['mean'])
print(joined_df[joined_df['clusters_graph'] == 0].describe().loc['mean'])

# TX cluster comparisons

In [None]:
# cols=['Cluster Size']
# x_axis = np.arange(len(cols))
plt.figure(figsize=(15,10))
# plt.bar(x=x_axis-0.2, color='orange', label='Tx 0', width=0.2, height =len(joined_df[joined_df['clusters_tx'] == 0]))
# plt.bar(x=x_axis, color='navy', label='Tx 1', width=0.2, height =len(joined_df[joined_df['clusters_tx'] == 1]))
# plt.bar(x=x_axis+0.2, color='lightblue', label='Tx 2', width=0.2, height =len(joined_df[joined_df['clusters_tx'] == 2]))
# plt.bar(x=x_axis+0.4, color='darkred', label='Tx 3', width=0.2, height =len(joined_df[joined_df['clusters_tx'] == 3]))
colors = ['orange', 'blue', 'lightblue', 'darkred']
plt.pie(joined_df['clusters_tx'].value_counts(), labels=joined_df['clusters_tx'].value_counts().index, colors=colors,autopct='%1.1f%%',shadow=True)

# pd.Series(np.log([x+1 for x in joined_df[joined_df['clusters_tx'] == 0].describe().loc['mean'][0:7]])).plot.bar()
# plt.show()
# pd.Series(np.log([x+1 for x in joined_df[joined_df['clusters_graph'] == 0].describe().loc['mean'][0:7]])).plot.bar()
# pd.Series(np.log([x+1 for x in joined_df[joined_df['clusters_tx'] == 1].describe().loc['mean'][0:7]])).plot.bar(stacked=True)
# pd.Series(np.log([x+1 for x in joined_df[joined_df['clusters_graph'] == 1].describe().loc['mean'][0:7]])).plot.bar(stacked=True)
# pd.Series(np.log([x+1 for x in joined_df[joined_df['clusters_tx'] == 2].describe().loc['mean'][0:7]])).plot.bar(stacked=True)
# pd.Series(np.log([x+1 for x in joined_df[joined_df['clusters_graph'] == 2].describe().loc['mean'][0:7]])).plot.bar(stacked=True)
# pd.Series(np.log([x+1 for x in joined_df[joined_df['clusters_tx'] == 3].describe().loc['mean'][0:7]])).plot.bar(stacked=True)
# pd.Series(np.log([x+1 for x in joined_df[joined_df['clusters_graph'] == 3].describe().loc['mean'][0:7]])).plot.bar(stacked=True)
# plt.xticks(x_axis, cols)
plt.title('Addresses per Cluster - Transaction')
plt.legend()
plt.show()


In [None]:
joined_df['clusters_tx'].value_counts().index

In [None]:
cols=['total_eth_sent', 'total_eth_recv', 'max_nonce', 'total_eth_trans']
x_axis = np.arange(len(cols))
plt.rc('font', size=20)
plt.figure(figsize=(15,10))
plt.bar(x=x_axis-0.2, color='orange', label='Tx 0', width=0.2, height =[x+1 for x in joined_df[joined_df['clusters_tx'] == 0].describe().loc['mean'][3:7]])
plt.bar(x=x_axis, color='navy', label='Tx 1', width=0.2, height =[x+1 for x in joined_df[joined_df['clusters_tx'] == 1].describe().loc['mean'][3:7]])
plt.bar(x=x_axis+0.2, color='lightblue', label='Tx 2', width=0.2, height =[x+1 for x in joined_df[joined_df['clusters_tx'] == 2].describe().loc['mean'][3:7]])
plt.bar(x=x_axis+0.4, color='darkred', label='Tx 3', width=0.2, height =[x+1 for x in joined_df[joined_df['clusters_tx'] == 3].describe().loc['mean'][3:7]])


plt.xlabel('Statistic')
plt.ylabel('Value')
plt.title('Mean Transaction Statistics by Cluster, Tx')
plt.xticks(x_axis, cols)
plt.legend()
plt.show()


In [None]:
cols=['send_count', 'receive_count']
x_axis = np.arange(len(cols))
plt.rc('font', size=20)
plt.figure(figsize=(15,10))
plt.bar(x=x_axis-0.2, color='orange', label='Tx 0', width=0.2, height =[x+1 for x in joined_df[joined_df['clusters_tx'] == 0].describe().loc['mean'][0:2]])
plt.bar(x=x_axis, color='navy', label='Tx 1', width=0.2, height =[x+1 for x in joined_df[joined_df['clusters_tx'] == 1].describe().loc['mean'][0:2]])
plt.bar(x=x_axis+0.2, color='lightblue', label='Tx 2', width=0.2, height =[x+1 for x in joined_df[joined_df['clusters_tx'] == 2].describe().loc['mean'][0:2]])
plt.bar(x=x_axis+0.4, color='darkred', label='Tx 3', width=0.2, height =[x+1 for x in joined_df[joined_df['clusters_tx'] == 3].describe().loc['mean'][0:2]])


plt.xlabel('Statistic')
plt.ylabel('Value')
plt.title('Mean Transaction Statistics by Cluster, Tx')
plt.xticks(x_axis, cols)
plt.legend()
plt.show()


In [None]:
cols=['avg_gas_cost']
x_axis = np.arange(len(cols))
plt.rc('font', size=20)
plt.figure(figsize=(15,10))
plt.bar(x=x_axis-0.2, color='orange', label='Tx 0', width=0.2, height =[x+1 for x in joined_df[joined_df['clusters_tx'] == 0].describe().loc['mean'][2:3]])
plt.bar(x=x_axis, color='navy', label='Tx 1', width=0.2, height =[x+1 for x in joined_df[joined_df['clusters_tx'] == 1].describe().loc['mean'][2:3]])
plt.bar(x=x_axis+0.2, color='lightblue', label='Tx 2', width=0.2, height =[x+1 for x in joined_df[joined_df['clusters_tx'] == 2].describe().loc['mean'][2:3]])
plt.bar(x=x_axis+0.4, color='darkred', label='Tx 3', width=0.2, height =[x+1 for x in joined_df[joined_df['clusters_tx'] == 3].describe().loc['mean'][2:3]])


plt.xlabel('Statistic')
plt.ylabel('Value')
plt.title('Mean Transaction Statistics by Cluster, Tx')
plt.xticks(x_axis, cols)
plt.legend()
plt.show()


# Graph Cluster Comparisons

In [None]:
cols=['Cluster Size']
x_axis = np.arange(len(cols))
plt.figure(figsize=(15,10))

colors = ['gold', 'hotpink', 'purple', 'green']
plt.pie(joined_df['clusters_graph'].value_counts(), labels=joined_df['clusters_graph'].value_counts().index, colors=colors,autopct='%1.1f%%',shadow=True)

plt.title('Addresses per Cluster - Graph')
# plt.xticks(x_axis, cols)
plt.legend()
plt.show()


In [None]:
cols=['total_eth_sent', 'total_eth_recv', 'max_nonce', 'total_eth_trans']
x_axis = np.arange(len(cols))
plt.rc('font', size=20)
plt.figure(figsize=(15,10))
plt.bar(x=x_axis-0.2, color='gold', label='Graph 0', width=0.2, height =[x+1 for x in joined_df[joined_df['clusters_graph'] == 0].describe().loc['mean'][3:7]])
plt.bar(x=x_axis, color='hotpink', label='Graph 1', width=0.2, height =[x+1 for x in joined_df[joined_df['clusters_graph'] == 1].describe().loc['mean'][3:7]])
plt.bar(x=x_axis+0.2, color='purple', label='Graph 2', width=0.2, height =[x+1 for x in joined_df[joined_df['clusters_graph'] == 2].describe().loc['mean'][3:7]])
plt.bar(x=x_axis+0.4, color='green', label='Graph 3', width=0.2, height =[x+1 for x in joined_df[joined_df['clusters_graph'] == 3].describe().loc['mean'][3:7]])
plt.xlabel('Statistic')
plt.ylabel('Value')
plt.title('Mean Transaction Statistics by Cluster, Graph')
plt.xticks(x_axis, cols)
plt.legend()
plt.show()


In [None]:
cols=['send_count', 'receive_count']
x_axis = np.arange(len(cols))
plt.rc('font', size=20)
plt.figure(figsize=(15,10))
plt.bar(x=x_axis-0.2, color='gold', label='Graph 0', width=0.2, height =[x+1 for x in joined_df[joined_df['clusters_graph'] == 0].describe().loc['mean'][0:2]])
plt.bar(x=x_axis, color='hotpink', label='Graph 1', width=0.2, height =[x+1 for x in joined_df[joined_df['clusters_graph'] == 1].describe().loc['mean'][0:2]])
plt.bar(x=x_axis+0.2, color='purple', label='Graph 2', width=0.2, height =[x+1 for x in joined_df[joined_df['clusters_graph'] == 2].describe().loc['mean'][0:2]])
plt.bar(x=x_axis+0.4, color='green', label='Graph 3', width=0.2, height =[x+1 for x in joined_df[joined_df['clusters_graph'] == 3].describe().loc['mean'][0:2]])
plt.xlabel('Statistic')
plt.ylabel('Value')
plt.title('Mean Transaction Statistics by Cluster, Graph')
plt.xticks(x_axis, cols)
plt.legend()
plt.show()


In [None]:
cols=['avg_gas_cost']
x_axis = np.arange(len(cols))
plt.rc('font', size=20)
plt.figure(figsize=(15,10))
plt.bar(x=x_axis-0.2, color='gold', label='Graph 0', width=0.2, height =[x+1 for x in joined_df[joined_df['clusters_graph'] == 0].describe().loc['mean'][2:3]])
plt.bar(x=x_axis, color='hotpink', label='Graph 1', width=0.2, height =[x+1 for x in joined_df[joined_df['clusters_graph'] == 1].describe().loc['mean'][2:3]])
plt.bar(x=x_axis+0.2, color='purple', label='Graph 2', width=0.2, height =[x+1 for x in joined_df[joined_df['clusters_graph'] == 2].describe().loc['mean'][2:3]])
plt.bar(x=x_axis+0.4, color='green', label='Graph 3', width=0.2, height =[x+1 for x in joined_df[joined_df['clusters_graph'] == 3].describe().loc['mean'][2:3]])
plt.xlabel('Statistic')
plt.ylabel('Value')
plt.title('Mean Transaction Statistics by Cluster, Graph')
plt.xticks(x_axis, cols)
plt.legend()
plt.show()


# Label Density

In [None]:
labeled_addr = pd.read_csv('../data/top_10k_addr_only.csv', index_col=0)
labeled_addr = labeled_addr.set_index('address')

In [None]:
labeled_addr.head()
print(len(labeled_addr))

In [None]:
joined_df = joined_df.join(labeled_addr)

In [None]:
joined_df.head()

In [None]:
joined_df['label'].value_counts()

In [None]:
#for each cluster find the percentage of addresses with that label in that cluster


### Tx cluster label density

In [None]:
for index in range(4):
    print('Tx cluster', index)
    print(joined_df[joined_df['clusters_tx'] == index]['label'].value_counts())
    print()

In [None]:
'''
Tx 0 contains:
whale                  100%
exchange               93%
dex                     0%
token contract          0%
bridge                  0%
liquidity pool          0%
marketplace             0%
contract deployer       100%
otc                     100%
celsius                 100%
investment fund         100%
user proxy contract     0%
eth2                    0%
mining                  100%
proxy                   0%

Tx 1 contains:
whale                  0%
exchange               3%
dex                     100%
token contract          100%
bridge                  100%
liquidity pool          75%
marketplace             100%
contract deployer       0%
otc                     0%
celsius                 0%
investment fund         0%
user proxy contract     100%
eth2                    100%
mining                  0%
proxy                   100%

Tx 2 contains:
whale                  
exchange               
dex                     
token contract          
bridge                  
liquidity pool          25%
marketplace             
contract deployer       
otc                     
celsius                 
investment fund         
user proxy contract     
eth2                    
mining                  
proxy                   

Tx 3 contains:
whale                  
exchange               3%
dex                     
token contract          
bridge                  
liquidity pool          
marketplace             
contract deployer       
otc                     
celsius                 
investment fund         
user proxy contract     
eth2                    
mining                  
proxy                   
'''

### Graph cluster label density

In [None]:
for index in range(4):
    print('Graph cluster', index)
    print(joined_df[joined_df['clusters_graph'] == index]['label'].value_counts())
    print()

In [None]:
'''
Graph 0
whale                  13%
exchange               7%
dex                     100%
token contract          100%
bridge                  100%
liquidity pool          75%
marketplace             100%
contract deployer       0%
otc                     0%
celsius                 0%
investment fund         0%
user proxy contract     100%
eth2                    100%
mining                  0%
proxy                   100%

Graph 1
whale                  23%
exchange               41%
dex                     
token contract          
bridge                  
liquidity pool          25%
marketplace             
contract deployer       100%
otc                     
celsius                 
investment fund         100%
user proxy contract     
eth2                    
mining                  100%
proxy                   

Graph 2
whale                  30%
exchange               51%
dex                     
token contract          
bridge                  
liquidity pool          
marketplace             
contract deployer       
otc                     100%
celsius                 
investment fund         
user proxy contract     
eth2                    
mining                  
proxy                   

Graph 3
whale                  30%
exchange               
dex                     
token contract          
bridge                  
liquidity pool          
marketplace             
contract deployer       
otc                     
celsius                 100%
investment fund         
user proxy contract     
eth2                    
mining                  
proxy                   
'''

## label heatmap, only prelabeled addresses

In [None]:
import seaborn as sns
label_df = pd.DataFrame()
label_df['Tx 0'] = [30, 27, 0, 0, 0, 0]
label_df['Tx 1'] = [0,1,5,5,5,3]
label_df['Tx 2'] = [0,0,0,0,0,1]
label_df['Tx 3'] = [0,1,0,0,0,0]
label_df['Graph 0'] = [4,2,5,5,5,3]
label_df['Graph 1']= [12,7,0,0,0,1]
label_df['Graph 2'] = [9,15,0,0,0,0]
label_df['Graph 3'] = [10,0,0,0,0,0]
label_df['label'] = ['whale', 'exchange', 'dex', 'token contract', 'bridge', 'liquidity pool']
label_df = label_df.set_index('label')
plt.figure(figsize=(15,10))
plt.title('Labeled Address Heatmap')
plt.rc('font', size=30)
p = sns.heatmap(label_df, annot=True, fmt='d')
p.set_xlabel('Cluster')
p.set_ylabel('Label')

# compare cluster scores

In [None]:
tx_m4_df = pd.read_csv('../data/tx_m4_scores.csv', index_col=0)
graph_m4_df = pd.read_csv('../data/graph_m4_scores.csv', index_col=0)

In [None]:
len(tx_m4_df['sil'])

In [None]:
#plot all three scores
plt.figure(figsize=(10,10))
plt.rc('font', size=16)
plt.plot(range(2,12), tx_m4_df['sil'], label='Tx',c='red')
plt.plot(range(2,12), graph_m4_df['sil'], label='Graph',c='blue')

plt.title('Number of Clusters vs. Silhouette Score')
plt.xticks(ticks=range(2, 12))
plt.xlabel('Number of Clusters')
plt.ylabel('Silhouette Score')
plt.legend()
plt.grid(visible=True,axis='x')
plt.grid(visible=True,axis='y')
plt.show()

plt.figure(figsize=(10,10))
plt.plot(range(2,12), tx_m4_df['ch'], label='Tx',c='red')
plt.plot(range(2,12), graph_m4_df['ch'], label='Graph',c='blue')

plt.title('Number of Clusters vs. Calinski-Harabasz Score')
plt.legend()
plt.xlabel('Number of Clusters')
plt.ylabel('Calinski-Harabasz Score')
plt.xticks(ticks=range(2, 11))
plt.grid(visible=True,axis='x')
plt.grid(visible=True,axis='y')
plt.show()

plt.figure(figsize=(10,10))
plt.plot(range(2,12), tx_m4_df['db'], label='Tx',c='red')
plt.plot(range(2,12), graph_m4_df['db'], label='Graph',c='blue')

plt.title('Number of Clusters vs. Davies-Bouldin Score')
plt.legend()
plt.xlabel('Number of Clusters')
plt.ylabel('Davies-Bouldin Score')
plt.xticks(ticks=range(2, 11))
plt.grid(visible=True,axis='x')
plt.grid(visible=True,axis='y')
plt.show()

# Plot original scatter plots with labels colored and in legend

In [None]:
#plot addresses
plt.rc('font', size=20)
plt.figure(figsize=(10,10))


plt.scatter(joined_df['pca_x_vals_tx'], joined_df['pca_y_vals_tx'], label='unlabeled',c='lightgrey',alpha=0.5)
plt.scatter(joined_df['pca_x_vals_tx'][joined_df['label'] == 'exchange'], joined_df['pca_y_vals_tx'][joined_df['label'] == 'exchange'], label='exchange',c='hotpink',alpha=0.5)
plt.scatter(joined_df['pca_x_vals_tx'][joined_df['label'] == 'whale'], joined_df['pca_y_vals_tx'][joined_df['label'] == 'whale'], label='whale',c='gold', alpha=0.5)

plt.scatter(joined_df['pca_x_vals_tx'][joined_df['label'] == 'token contract'], joined_df['pca_y_vals_tx'][joined_df['label'] == 'token contract'], label='token contract',c='navy',alpha=0.5)

plt.scatter(joined_df['pca_x_vals_tx'][joined_df['label'] == 'dex'], joined_df['pca_y_vals_tx'][joined_df['label'] == 'dex'], label='dex',c='limegreen',alpha=0.5)
plt.scatter(joined_df['pca_x_vals_tx'][joined_df['label'] == 'bridge'], joined_df['pca_y_vals_tx'][joined_df['label'] == 'bridge'], label='bridge',c='black',alpha=0.5)
plt.scatter(joined_df['pca_x_vals_tx'][joined_df['label'] == 'liquidity pool'], joined_df['pca_y_vals_tx'][joined_df['label'] == 'liquidity pool'], label='liquidity pool',c='red', alpha=0.5)

plt.legend()    
plt.xlabel('PC1')
plt.ylabel('PC2')
plt.title('Method 4, Tx statistics, Labels')
plt.show()

In [None]:
#plot addresses
plt.rc('font', size=20)
plt.figure(figsize=(10,10))


plt.scatter(joined_df['pca_x_vals_graph'], joined_df['pca_y_vals_graph'], label='unlabeled',c='lightgrey',alpha=0.5)
plt.scatter(joined_df['pca_x_vals_graph'][joined_df['label'] == 'exchange'], joined_df['pca_y_vals_graph'][joined_df['label'] == 'exchange'], label='exchange',c='hotpink',alpha=0.5)
plt.scatter(joined_df['pca_x_vals_graph'][joined_df['label'] == 'whale'], joined_df['pca_y_vals_graph'][joined_df['label'] == 'whale'], label='whale',c='gold', alpha=0.5)

plt.scatter(joined_df['pca_x_vals_graph'][joined_df['label'] == 'token contract'], joined_df['pca_y_vals_graph'][joined_df['label'] == 'token contract'], label='token contract',c='navy',alpha=0.5)

plt.scatter(joined_df['pca_x_vals_graph'][joined_df['label'] == 'dex'], joined_df['pca_y_vals_graph'][joined_df['label'] == 'dex'], label='dex',c='limegreen',alpha=0.5)
plt.scatter(joined_df['pca_x_vals_graph'][joined_df['label'] == 'bridge'], joined_df['pca_y_vals_graph'][joined_df['label'] == 'bridge'], label='bridge',c='black',alpha=0.5)
plt.scatter(joined_df['pca_x_vals_graph'][joined_df['label'] == 'liquidity pool'], joined_df['pca_y_vals_graph'][joined_df['label'] == 'liquidity pool'], label='liquidity pool',c='red', alpha=0.5)

plt.legend()    
plt.xlabel('PC1')
plt.ylabel('PC2')
plt.title('Method 4, Graph statistics, Labels')
plt.show()

# Get top 5 addresses from each cluster by total eth trans

In [None]:
lbl = ['whale', 'whale temp', 'exchange', 'dex', 'token contract', 'proxy', 'liquidity pool', 'nft marketplace']

In [None]:
tx0_df = joined_df[joined_df['clusters_tx'] == 0].sort_values('total_eth_trans',ascending=False)
print(len(tx0_df))
print(tx0_df.head(10))
tx0 = [30,0,17,0,0,0,0,0]
#binance, ftx, coinbase, coinbase, coinbase, coinbase, binance, binance, OKEx exchange,binance 


In [None]:
tx1_df = joined_df[joined_df['clusters_tx'] == 1].sort_values('total_eth_trans',ascending=False)
print(len(tx1_df))
print(tx1_df.head(10))
tx1 = [2,7,1,5,5,0,3,0]
#3 whale temp accounts, 1 loan marketplace, 1 whale, whale temp, whale temp, whale temp, whale, whale temp

In [None]:
tx2_df = joined_df[joined_df['clusters_tx'] == 2].sort_values('total_eth_trans',ascending=False)
print(len(tx2_df))
print(tx2_df.head(10))
tx2 = [0,0,0,2,3,1,2,2]
#OpenSea, uniswap Dex, weth token contract, looks rare nft, 1inch dex, uniswap token contract, aave, 0x proxy, compound token contract,lido liquidity pool 

In [None]:
tx3_df = joined_df[joined_df['clusters_tx'] == 3].sort_values('total_eth_trans',ascending=False)
print(len(tx3_df))
print(tx3_df.head(10))
tx3 = [4,3,3,0,0,0,0,0]
#bincnace, whale burner, bitfinex exchange, whale, kucoin exchange, whale temp, whale, whale, whale temp, whale

In [None]:
g0_df = joined_df[joined_df['clusters_graph'] == 0].sort_values('total_eth_trans',ascending=False)
print(len(g0_df))
print(g0_df.head(10))
g0 = [4,2,6,5,5,0,3,0]
#binance, binance, binance, binance, binance, gemini, whale, whale temp, whale temp, whale

In [None]:
g1_df = joined_df[joined_df['clusters_graph'] == 1].sort_values('total_eth_trans',ascending=False)
print(len(g1_df))
print(g1_df.head(10))
g1 = [12,2,7,2,2,1,1,2]
#opensea, uniswap dex, token contract, look rare, 1 inch dex, uniswap token contract, whale temp, aave weth gateway, 0x exchange proxy, whale temp

In [None]:
g2_df = joined_df[joined_df['clusters_graph'] == 2].sort_values('total_eth_trans',ascending=False)
print(len(g2_df))
print(g2_df.head(10))
g2 = [9,0,15,0,0,0,0,0]
#binance, ftx, coinbase, coinbase, coinbase, coinbase, OKEx exchange, whale, OTC, bitfinex

In [None]:
g3_df = joined_df[joined_df['clusters_graph'] == 3].sort_values('total_eth_trans',ascending=False)
print(len(g3_df))
print(g3_df.head(10))
g3 = [10,6,0,0,0,0,0,0]
#whale temp, whale temp, whale temp, whale temp, whale temp, whale temp, whale, whale, whale, whale

# cluster label heatmap, prelabeled addresses and top addresses per cluster

In [None]:
toplbl_df = pd.DataFrame()
toplbl_df['label'] = lbl
toplbl_df = toplbl_df.set_index('label')
toplbl_df['Tx 0'] = tx0
toplbl_df['Tx 1'] = tx1
toplbl_df['Tx 2'] = tx2
toplbl_df['Tx 3'] = tx3
toplbl_df['Graph 0'] = g0
toplbl_df['Graph 1'] = g1
toplbl_df['Graph 2'] = g2
toplbl_df['Graph 3'] = g3

In [None]:
plt.figure(figsize=(15,10))
plt.title('Labeled Addresses per Cluster')
plt.rc('font', size=30)
p = sns.heatmap(toplbl_df, annot=True, fmt='d')
p.set_xlabel('Cluster')
p.set_ylabel('Label')