This notebook constructs daily directed transaction graphs for each stablecoin based on cleaned ERC20 transfer data. Each node represents a wallet address, and each directed edge represents a token transfer between addresses, weighted by the transferred amount.


In [None]:
import os
import pickle
import pandas as pd
import networkx as nx
from tqdm import tqdm
from glob import glob
from datetime import datetime

input_file = '../data/cleaned/token_transfers_V3.0.0_cleaned.parquet'
output_dir = '../data/graphs/daily/'
metrics_dir = '../data/graphs/metrics'
os.makedirs(output_dir, exist_ok=True)
os.makedirs(metrics_dir, exist_ok=True)

df = pd.read_parquet(input_file)

df.info()
df.head()

Iteration over each stablecoin and daily timestamp in the dataset to generate directed weighted transaction graphs. Each graph represents token transfers between wallets on a specific day and is saved as a .gpickle file for later network analysis.

In [None]:
for stablecoin in df['stablecoin'].unique():
    df_stablecoin = df[df['stablecoin'] == stablecoin]
    
    for date in df_stablecoin['date'].unique():
        df_day = df_stablecoin[df_stablecoin['date'] == date]
        
        G = nx.DiGraph()
        
        for _, row in df_day.iterrows():
            u = row['from_address']
            v = row['to_address']
            w = row['value']
            if G.has_edge(u, v):
                G[u][v]['weight'] += w
            else:
                G.add_edge(u, v, weight=w)
        
        # Save graph with Pickle
        filename = f"{stablecoin}_{date}.gpickle"
        path = os.path.join(output_dir, filename)
        with open(path, 'wb') as f:
            pickle.dump(G, f)

This script loads each daily transaction graph per stablecoin, computes node-level metrics such as in-degree, out-degree, PageRank, and degree centrality, and stores the results in Parquet format.

In [None]:
# List .gpickle files
gpickle_files = [f for f in os.listdir(output_dir) if f.endswith('.gpickle')]

# Group files per stablecoin
coin_files = {}
for file in gpickle_files:
    if '_' in file:
        coin = file.split('_')[0]
        coin_files.setdefault(coin, []).append(file)  # fix: use 'coin' not 'stablecoin'

# Loop per stablecoin
for coin, files in coin_files.items():
    records = []

    for file in tqdm(files, desc=f'Processing {coin}'):
        path = os.path.join(output_dir, file)

        # Load Graph
        with open(path, 'rb') as f:
            G = pickle.load(f)

        date = file.replace(f'{coin}_', '').replace('.gpickle', '')

        # Metrics
        in_degrees = dict(G.in_degree())
        out_degrees = dict(G.out_degree())
        pagerank = nx.pagerank(G, alpha=0.85)
        degree_centrality = nx.degree_centrality(G)

        for node in G.nodes():
            records.append({
                'date': date,
                'token': coin,
                'address': node,
                'in_degree': in_degrees.get(node, 0),
                'out_degree': out_degrees.get(node, 0),
                'pagerank': pagerank.get(node, 0),
                'degree_centrality': degree_centrality.get(node, 0)
            })

    # Save metrics as Parquet
    df_metrics = pd.DataFrame(records)
    output_file = os.path.join(metrics_dir, f'{coin}_metrics.parquet')
    df_metrics.to_parquet(output_file, index=False)
    print(f'Saved: {output_file}')


Loads all token-specific metric files, merges them into a single DataFrame, and stores the combined result in Parquet format to facilitate global comparative analysis.

In [None]:
metric_files = glob(os.path.join(metrics_dir, '*_metrics.parquet'))

dfs = []

for file in metric_files:
    token = os.path.basename(file).split('_')[0]
    df = pd.read_parquet(file)
    dfs.append(df)

df_all = pd.concat(dfs, ignore_index=True)
df_all.to_parquet(os.path.join(metrics_dir, 'all_metrics.parquet'), index=False)
print(f"Saved")
