In [2]:
DATA_DIR = "elliptic_dataset"

import os
import pandas as pd
import numpy as np
from pathlib import Path

pd.set_option("display.max_columns", 60)
p = Path(DATA_DIR)
assert p.exists(), f"Missing: {p}"
files = sorted(f.name for f in p.iterdir() if f.suffix==".csv")
files

['AddrAddr_edgelist.csv',
 'AddrTx_edgelist.csv',
 'TxAddr_edgelist.csv',
 'txs_classes.csv',
 'txs_edgelist.csv',
 'txs_features.csv',
 'wallets_classes.csv',
 'wallets_features.csv',
 'wallets_features_classes_combined.csv']

In [3]:
tx_feat   = pd.read_csv(p / "txs_features.csv")
tx_cls    = pd.read_csv(p / "txs_classes.csv")
tx_edges  = pd.read_csv(p / "txs_edgelist.csv")

wal_feat  = pd.read_csv(p / "wallets_features.csv")
wal_cls   = pd.read_csv(p / "wallets_classes.csv")

wal_combined = pd.read_csv(p / "wallets_features_classes_combined.csv")

addr_addr = pd.read_csv(p / "AddrAddr_edgelist.csv")
addr_tx   = pd.read_csv(p / "AddrTx_edgelist.csv")
tx_addr   = pd.read_csv(p / "TxAddr_edgelist.csv")

print("Shapes:")
print("tx_feat", tx_feat.shape, "| tx_cls", tx_cls.shape, "| tx_edges", tx_edges.shape)
print("wal_feat", wal_feat.shape, "| wal_cls", wal_cls.shape, "| wal_combined", None if wal_combined is None else wal_combined.shape)
print("addr_addr", addr_addr.shape, "| addr_tx", addr_tx.shape, "| tx_addr", tx_addr.shape)

display(tx_feat.head())
display(tx_cls['class'].value_counts(dropna=False).rename("tx_class_counts"))
display(tx_edges.head())

display(wal_feat.head())
display(wal_cls['class'].value_counts(dropna=False).rename("wallet_class_counts"))
display(wal_combined.head())


Shapes:
tx_feat (203769, 184) | tx_cls (203769, 2) | tx_edges (234355, 2)
wal_feat (1268260, 57) | wal_cls (822942, 2) | wal_combined (1268260, 58)
addr_addr (2868964, 2) | addr_tx (477117, 2) | tx_addr (837124, 2)


Unnamed: 0,txId,Time step,Local_feature_1,Local_feature_2,Local_feature_3,Local_feature_4,Local_feature_5,Local_feature_6,Local_feature_7,Local_feature_8,Local_feature_9,Local_feature_10,Local_feature_11,Local_feature_12,Local_feature_13,Local_feature_14,Local_feature_15,Local_feature_16,Local_feature_17,Local_feature_18,Local_feature_19,Local_feature_20,Local_feature_21,Local_feature_22,Local_feature_23,Local_feature_24,Local_feature_25,Local_feature_26,Local_feature_27,Local_feature_28,...,Aggregate_feature_60,Aggregate_feature_61,Aggregate_feature_62,Aggregate_feature_63,Aggregate_feature_64,Aggregate_feature_65,Aggregate_feature_66,Aggregate_feature_67,Aggregate_feature_68,Aggregate_feature_69,Aggregate_feature_70,Aggregate_feature_71,Aggregate_feature_72,in_txs_degree,out_txs_degree,total_BTC,fees,size,num_input_addresses,num_output_addresses,in_BTC_min,in_BTC_max,in_BTC_mean,in_BTC_median,in_BTC_total,out_BTC_min,out_BTC_max,out_BTC_mean,out_BTC_median,out_BTC_total
0,3321,1,-0.169615,-0.184668,-1.201369,-0.12197,-0.043875,-0.113002,-0.061584,-0.160199,-0.166062,-0.049707,-0.162507,-0.028741,-0.035391,-0.042955,-0.013282,-0.042183,-0.16877,-0.171416,-0.172277,-1.373657,-1.37146,-0.139663,-0.148869,-0.080147,-0.155604,-0.010763,-0.012107,-0.139665,...,0.185492,-0.003773,-0.562664,-0.577099,-0.50008,0.241128,0.241406,-0.098889,-0.08749,-0.084674,-0.140597,1.5197,1.521399,1.0,0.0,0.533972,0.0001,225.0,1.0,2.0,0.534072,0.534072,0.534072,0.534072,0.534072,0.166899,0.367074,0.266986,0.266986,0.533972
1,11108,1,-0.137586,-0.184668,-1.201369,-0.12197,-0.043875,-0.113002,-0.061584,-0.127429,-0.133751,-0.049707,-0.129773,-0.028741,-0.035391,-0.042955,-0.013282,-0.003952,-0.13856,-0.139821,-0.134358,0.887058,0.884557,-0.139564,-0.148805,-0.080147,-0.15552,-0.010763,-0.012107,-0.139566,...,0.185492,-0.216814,-0.605631,-0.562153,-0.600999,-0.979074,-0.978556,0.018279,-0.08749,-0.131155,-0.097524,-0.120613,-0.119792,1.0,1.0,5.611778,0.0001,225.0,1.0,2.0,5.611878,5.611878,5.611878,5.611878,5.611878,0.586194,5.025584,2.805889,2.805889,5.611778
2,51816,1,-0.170103,-0.184668,-1.201369,-0.12197,-0.043875,-0.113002,-0.061584,-0.160699,-0.166555,-0.049707,-0.163006,-0.028741,-0.035391,-0.042955,-0.013282,-0.036613,-0.169668,-0.172904,-0.172855,-1.373657,-1.37146,-0.139731,-0.148912,-0.080147,-0.155661,-0.010763,-0.012107,-0.139732,...,0.185492,-0.216814,-0.617907,-0.577099,-0.613614,0.241128,0.241406,0.018279,-0.08749,-0.131155,-0.097524,-0.120613,-0.119792,1.0,1.0,0.456508,0.0001,226.0,1.0,2.0,0.456608,0.456608,0.456608,0.456608,0.456608,0.2279902,0.228518,0.228254,0.228254,0.456508
3,68869,1,-0.114267,-0.184668,-1.201369,0.028105,-0.043875,-0.113002,0.547008,-0.161652,-0.118555,0.300047,-0.145947,2.017758,1.189967,-0.042955,-0.013282,0.054659,-0.118754,-0.121849,-0.106751,-1.373657,-1.37146,-0.139302,-0.148638,-0.080147,-0.155297,-0.010763,-0.012107,-0.139303,...,0.185492,-0.216814,-0.611769,-0.569626,-0.607306,-0.979074,-0.978556,0.018279,-0.08749,-0.131155,-0.097524,-0.120613,-0.119792,0.0,1.0,9.3088,0.0001,853.0,3.0,2.0,0.3089,8.0,3.102967,1.0,9.3089,1.229,8.0798,4.6544,4.6544,9.3088
4,89273,1,5.202107,-0.210553,-1.756361,-0.12197,260.090707,-0.113002,-0.061584,5.335864,5.252974,-0.049707,5.327423,-0.028741,-0.035391,265.263236,-0.013282,-0.057401,0.096439,-0.167593,-0.175293,-0.474922,-1.37146,1.828567,1.107041,-0.080147,1.512162,-0.010763,-0.012107,1.828864,...,0.084615,-0.216814,4.010246,1.25863,0.982479,0.118347,0.091066,-0.098889,0.854508,-0.066727,-0.150067,-0.08076,-0.070977,1.0,288.0,852.16468,0.0,445268.0,1.0,13107.0,852.16468,852.16468,852.16468,852.16468,852.16468,1.3e-07,41.264036,0.065016,0.000441,852.16468


class
3    157205
2     42019
1      4545
Name: tx_class_counts, dtype: int64

Unnamed: 0,txId1,txId2
0,230425980,5530458
1,232022460,232438397
2,230460314,230459870
3,230333930,230595899
4,232013274,232029206


Unnamed: 0,address,Time step,num_txs_as_sender,num_txs_as receiver,first_block_appeared_in,last_block_appeared_in,lifetime_in_blocks,total_txs,first_sent_block,first_received_block,num_timesteps_appeared_in,btc_transacted_total,btc_transacted_min,btc_transacted_max,btc_transacted_mean,btc_transacted_median,btc_sent_total,btc_sent_min,btc_sent_max,btc_sent_mean,btc_sent_median,btc_received_total,btc_received_min,btc_received_max,btc_received_mean,btc_received_median,fees_total,fees_min,fees_max,fees_mean,fees_median,fees_as_share_total,fees_as_share_min,fees_as_share_max,fees_as_share_mean,fees_as_share_median,blocks_btwn_txs_total,blocks_btwn_txs_min,blocks_btwn_txs_max,blocks_btwn_txs_mean,blocks_btwn_txs_median,blocks_btwn_input_txs_total,blocks_btwn_input_txs_min,blocks_btwn_input_txs_max,blocks_btwn_input_txs_mean,blocks_btwn_input_txs_median,blocks_btwn_output_txs_total,blocks_btwn_output_txs_min,blocks_btwn_output_txs_max,blocks_btwn_output_txs_mean,blocks_btwn_output_txs_median,num_addr_transacted_multiple,transacted_w_address_total,transacted_w_address_min,transacted_w_address_max,transacted_w_address_mean,transacted_w_address_median
0,111112TykSw72ztDN2WJger4cynzWYC5w,25,0.0,1.0,439586.0,439586.0,0.0,1.0,0.0,439586.0,1.0,0.010628,0.010628,0.010628,0.010628,0.010628,0.0,0.0,0.0,0.0,0.0,0.010628,0.010628,0.010628,0.010628,0.010628,0.007042,0.007042,0.007042,0.007042,0.007042,1.2e-05,1.2e-05,1.2e-05,1.2e-05,1.2e-05,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,24.0,1.0,1.0,1.0,1.0
1,1111DAYXhoxZx2tsRnzimfozo783x1yC2,25,0.0,8.0,439589.0,485959.0,46370.0,8.0,0.0,439589.0,6.0,0.273046,0.0039,0.133777,0.034131,0.014352,0.0,0.0,0.0,0.0,0.0,0.273046,0.0039,0.133777,0.034131,0.014352,0.002371,0.000122,0.00058,0.000296,0.000242,0.002217,0.000121,0.000523,0.000277,0.000237,46370.0,0.0,20164.0,6624.285714,8060.0,0.0,0.0,0.0,0.0,0.0,46370.0,0.0,20164.0,6624.285714,8060.0,0.0,8.0,1.0,1.0,1.0,1.0
2,1111DAYXhoxZx2tsRnzimfozo783x1yC2,29,0.0,8.0,439589.0,485959.0,46370.0,8.0,0.0,439589.0,6.0,0.273046,0.0039,0.133777,0.034131,0.014352,0.0,0.0,0.0,0.0,0.0,0.273046,0.0039,0.133777,0.034131,0.014352,0.002371,0.000122,0.00058,0.000296,0.000242,0.002217,0.000121,0.000523,0.000277,0.000237,46370.0,0.0,20164.0,6624.285714,8060.0,0.0,0.0,0.0,0.0,0.0,46370.0,0.0,20164.0,6624.285714,8060.0,0.0,8.0,1.0,1.0,1.0,1.0
3,1111DAYXhoxZx2tsRnzimfozo783x1yC2,39,0.0,8.0,439589.0,485959.0,46370.0,8.0,0.0,439589.0,6.0,0.273046,0.0039,0.133777,0.034131,0.014352,0.0,0.0,0.0,0.0,0.0,0.273046,0.0039,0.133777,0.034131,0.014352,0.002371,0.000122,0.00058,0.000296,0.000242,0.002217,0.000121,0.000523,0.000277,0.000237,46370.0,0.0,20164.0,6624.285714,8060.0,0.0,0.0,0.0,0.0,0.0,46370.0,0.0,20164.0,6624.285714,8060.0,0.0,8.0,1.0,1.0,1.0,1.0
4,1111DAYXhoxZx2tsRnzimfozo783x1yC2,39,0.0,8.0,439589.0,485959.0,46370.0,8.0,0.0,439589.0,6.0,0.273046,0.0039,0.133777,0.034131,0.014352,0.0,0.0,0.0,0.0,0.0,0.273046,0.0039,0.133777,0.034131,0.014352,0.002371,0.000122,0.00058,0.000296,0.000242,0.002217,0.000121,0.000523,0.000277,0.000237,46370.0,0.0,20164.0,6624.285714,8060.0,0.0,0.0,0.0,0.0,0.0,46370.0,0.0,20164.0,6624.285714,8060.0,0.0,8.0,1.0,1.0,1.0,1.0


class
3    557588
2    251088
1     14266
Name: wallet_class_counts, dtype: int64

Unnamed: 0,address,Time step,class,num_txs_as_sender,num_txs_as receiver,first_block_appeared_in,last_block_appeared_in,lifetime_in_blocks,total_txs,first_sent_block,first_received_block,num_timesteps_appeared_in,btc_transacted_total,btc_transacted_min,btc_transacted_max,btc_transacted_mean,btc_transacted_median,btc_sent_total,btc_sent_min,btc_sent_max,btc_sent_mean,btc_sent_median,btc_received_total,btc_received_min,btc_received_max,btc_received_mean,btc_received_median,fees_total,fees_min,fees_max,fees_mean,fees_median,fees_as_share_total,fees_as_share_min,fees_as_share_max,fees_as_share_mean,fees_as_share_median,blocks_btwn_txs_total,blocks_btwn_txs_min,blocks_btwn_txs_max,blocks_btwn_txs_mean,blocks_btwn_txs_median,blocks_btwn_input_txs_total,blocks_btwn_input_txs_min,blocks_btwn_input_txs_max,blocks_btwn_input_txs_mean,blocks_btwn_input_txs_median,blocks_btwn_output_txs_total,blocks_btwn_output_txs_min,blocks_btwn_output_txs_max,blocks_btwn_output_txs_mean,blocks_btwn_output_txs_median,num_addr_transacted_multiple,transacted_w_address_total,transacted_w_address_min,transacted_w_address_max,transacted_w_address_mean,transacted_w_address_median
0,111112TykSw72ztDN2WJger4cynzWYC5w,25,2,0.0,1.0,439586.0,439586.0,0.0,1.0,0.0,439586.0,1.0,0.010628,0.010628,0.010628,0.010628,0.010628,0.0,0.0,0.0,0.0,0.0,0.010628,0.010628,0.010628,0.010628,0.010628,0.007042,0.007042,0.007042,0.007042,0.007042,1.2e-05,1.2e-05,1.2e-05,1.2e-05,1.2e-05,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,24.0,1.0,1.0,1.0,1.0
1,1111DAYXhoxZx2tsRnzimfozo783x1yC2,25,3,0.0,8.0,439589.0,485959.0,46370.0,8.0,0.0,439589.0,6.0,0.273046,0.0039,0.133777,0.034131,0.014352,0.0,0.0,0.0,0.0,0.0,0.273046,0.0039,0.133777,0.034131,0.014352,0.002371,0.000122,0.00058,0.000296,0.000242,0.002217,0.000121,0.000523,0.000277,0.000237,46370.0,0.0,20164.0,6624.285714,8060.0,0.0,0.0,0.0,0.0,0.0,46370.0,0.0,20164.0,6624.285714,8060.0,0.0,8.0,1.0,1.0,1.0,1.0
2,1111DAYXhoxZx2tsRnzimfozo783x1yC2,29,3,0.0,8.0,439589.0,485959.0,46370.0,8.0,0.0,439589.0,6.0,0.273046,0.0039,0.133777,0.034131,0.014352,0.0,0.0,0.0,0.0,0.0,0.273046,0.0039,0.133777,0.034131,0.014352,0.002371,0.000122,0.00058,0.000296,0.000242,0.002217,0.000121,0.000523,0.000277,0.000237,46370.0,0.0,20164.0,6624.285714,8060.0,0.0,0.0,0.0,0.0,0.0,46370.0,0.0,20164.0,6624.285714,8060.0,0.0,8.0,1.0,1.0,1.0,1.0
3,1111DAYXhoxZx2tsRnzimfozo783x1yC2,39,3,0.0,8.0,439589.0,485959.0,46370.0,8.0,0.0,439589.0,6.0,0.273046,0.0039,0.133777,0.034131,0.014352,0.0,0.0,0.0,0.0,0.0,0.273046,0.0039,0.133777,0.034131,0.014352,0.002371,0.000122,0.00058,0.000296,0.000242,0.002217,0.000121,0.000523,0.000277,0.000237,46370.0,0.0,20164.0,6624.285714,8060.0,0.0,0.0,0.0,0.0,0.0,46370.0,0.0,20164.0,6624.285714,8060.0,0.0,8.0,1.0,1.0,1.0,1.0
4,1111DAYXhoxZx2tsRnzimfozo783x1yC2,39,3,0.0,8.0,439589.0,485959.0,46370.0,8.0,0.0,439589.0,6.0,0.273046,0.0039,0.133777,0.034131,0.014352,0.0,0.0,0.0,0.0,0.0,0.273046,0.0039,0.133777,0.034131,0.014352,0.002371,0.000122,0.00058,0.000296,0.000242,0.002217,0.000121,0.000523,0.000277,0.000237,46370.0,0.0,20164.0,6624.285714,8060.0,0.0,0.0,0.0,0.0,0.0,46370.0,0.0,20164.0,6624.285714,8060.0,0.0,8.0,1.0,1.0,1.0,1.0
