In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import networkx as nx
import graspologic

In [None]:
# https://www.kaggle.com/datasets/ellipticco/elliptic-data-set?resource=download

# The elliptic data set maps bitcoin transfers between entities, both licit and illicit.
# There are 203,769 nodes and 234,355 edges. Nodes are entities and edges are transactions.
# Among the nodes, 2% (4545) are illicit and 21% (42019) are licit. The rest are unknown.
# There are 166 features for each node.

# READ THIS: TIME STEPS
# The first feature is the time step for that node. This represents a single strongly-connected component of transactions.
# Each transaction within a timestep appeared within 3 hours of each other.
# Each time step has no edges with each other.
# Therefore it makes sense to analyze the data from each time step, rather than as a whole. 

# The next 93 features give information about the transactions made by that node (fees, volume, averages, etc).
# The last 72 features are aggregated from adjacent nodes.


In [2]:
# reading data
df_features = pd.read_csv("./elliptic/elliptic_txs_features.csv", header=None)
df_classes= pd.read_csv("./elliptic/elliptic_txs_classes.csv")
df_edgelist = pd.read_csv("./elliptic/elliptic_txs_edgelist.csv")

# renaming columns
df_classes.loc[df_classes['class'] == '1', 'class'] = "illicit"
df_classes.loc[df_classes['class'] == '2', 'class'] = "licit"

df_features.columns = ["id", "time step"] + [f"local_feat_{i}" for i in range(93)] + [f"agg_feat_{i}" for i in range(72)]
df_classes.columns = ["id", "class"]

# adding class data
df = pd.merge(df_features, df_classes, how="inner", on="id")
second_column = df.pop('class')
df.insert(1, 'class', second_column)
df.head()

Unnamed: 0,id,class,time step,local_feat_0,local_feat_1,local_feat_2,local_feat_3,local_feat_4,local_feat_5,local_feat_6,...,agg_feat_62,agg_feat_63,agg_feat_64,agg_feat_65,agg_feat_66,agg_feat_67,agg_feat_68,agg_feat_69,agg_feat_70,agg_feat_71
0,230425980,unknown,1,-0.171469,-0.184668,-1.201369,-0.12197,-0.043875,-0.113002,-0.061584,...,-0.562153,-0.600999,1.46133,1.461369,0.018279,-0.08749,-0.131155,-0.097524,-0.120613,-0.119792
1,5530458,unknown,1,-0.171484,-0.184668,-1.201369,-0.12197,-0.043875,-0.113002,-0.061584,...,0.947382,0.673103,-0.979074,-0.978556,0.018279,-0.08749,-0.131155,-0.097524,-0.120613,-0.119792
2,232022460,unknown,1,-0.172107,-0.184668,-1.201369,-0.12197,-0.043875,-0.113002,-0.061584,...,0.670883,0.439728,-0.979074,-0.978556,-0.098889,-0.106715,-0.131155,-0.183671,-0.120613,-0.119792
3,232438397,licit,1,0.163054,1.96379,-0.646376,12.409294,-0.063725,9.782742,12.414558,...,-0.577099,-0.613614,0.241128,0.241406,1.072793,0.08553,-0.131155,0.677799,-0.120613,-0.119792
4,230460314,unknown,1,1.011523,-0.081127,-1.201369,1.153668,0.333276,1.312656,-0.061584,...,-0.511871,-0.400422,0.517257,0.579382,0.018279,0.277775,0.326394,1.29375,0.178136,0.179117


In [3]:
g = nx.from_pandas_edgelist(
    df_edgelist,
    source="txId1",
    target="txId2",
    create_using=nx.DiGraph,
)

In [4]:
def plot(i):
    time_step_i = df.loc[(df['time step'] == i), 'id']
    time_step_i = df_edgelist.loc[df_edgelist['txId1'].isin(time_step_i)]
    g = nx.from_pandas_edgelist(time_step_i, source = 'txId1', target = 'txId2', create_using = nx.DiGraph())

In [6]:
from graspologic.utils import is_fully_connected

for i in range (1,50):
    time_step_i = df.loc[(df['time step'] == i), 'id']
    time_step_i = df_edgelist.loc[df_edgelist['txId1'].isin(time_step_i)]
    g = nx.from_pandas_edgelist(time_step_i, source = 'txId1', target = 'txId2', create_using = nx.DiGraph())
    if(is_fully_connected(g) == True):
        print("Time Step ", i, "is strongly connected.")
    else:
        if(nx.is_weakly_connected(g) == True):
            print("Time Step ", i, "is strongly connected.")
        else:
            print("Time Step ", i, "isn't connected.")


Time Step  1 is strongly connected.
Time Step  2 is strongly connected.
Time Step  3 is strongly connected.
Time Step  4 is strongly connected.
Time Step  5 is strongly connected.
Time Step  6 is strongly connected.
Time Step  7 is strongly connected.
Time Step  8 is strongly connected.
Time Step  9 is strongly connected.
Time Step  10 is strongly connected.
Time Step  11 is strongly connected.
Time Step  12 is strongly connected.
Time Step  13 is strongly connected.
Time Step  14 is strongly connected.
Time Step  15 is strongly connected.
Time Step  16 is strongly connected.
Time Step  17 is strongly connected.
Time Step  18 is strongly connected.
Time Step  19 is strongly connected.
Time Step  20 is strongly connected.
Time Step  21 is strongly connected.
Time Step  22 is strongly connected.
Time Step  23 is strongly connected.
Time Step  24 is strongly connected.
Time Step  25 is strongly connected.
Time Step  26 is strongly connected.
Time Step  27 is strongly connected.
Time Step 