This notebook constructs daily directed transaction graphs for each stablecoin based on cleaned ERC20 transfer data. Each node represents a wallet address, and each directed edge represents a token transfer between addresses, weighted by the transferred amount.


In [None]:
import pandas as pd
import networkx as nx
import os
import pickle

input_file = '../data/cleaned/token_transfers_cleaned.csv'
output_dir = '../data/graphs/daily'
os.makedirs(output_dir, exist_ok=True)

df = pd.read_csv(input_file)

# Loop over tokens and dates to build daily graphs
for stablecoin in df['stablecoin'].unique():
    df_stablecoin = df[df['stablecoin'] == stablecoin]
    
    for date in df_stablecoin['date'].unique():
        df_day = df_stablecoin[df_stablecoin['date'] == date]
        
        G = nx.DiGraph()
        
        for _, row in df_day.iterrows():
            u = row['from_address']
            v = row['to_address']
            w = row['value']
            if G.has_edge(u, v):
                G[u][v]['weight'] += w
            else:
                G.add_edge(u, v, weight=w)
        
        # Save graph with pickle
        filename = f"{stablecoin}_{date}.gpickle"
        path = os.path.join(output_dir, filename)
        with open(path, 'wb') as f:
            pickle.dump(G, f)


In [None]:
from tqdm import tqdm

input_dir = '../data/graphs/daily'
output_dir = '../data/graphs/metrics'
os.makedirs(output_dir, exist_ok=True)

# List .gpickle files
gpickle_files = [f for f in os.listdir(input_dir) if f.endswith('.gpickle')]

# Group files per stablecoin
token_files = {}
for file in gpickle_files:
    if '_' in file:
        token = file.split('_')[0]
        token_files.setdefault(token, []).append(file)

# Loop por stablecoin
for token, files in token_files.items():
    records = []

    for file in tqdm(files, desc=f'Processing {token}'):
        path = os.path.join(input_dir, file)

        # Load Graph with pickle
        with open(path, 'rb') as f:
            G = pickle.load(f)

        date = file.replace(f'{token}_', '').replace('.gpickle', '')

        # The metrics
        in_degrees = dict(G.in_degree())
        out_degrees = dict(G.out_degree())
        pagerank = nx.pagerank(G, alpha=0.85)
        degree_centrality = nx.degree_centrality(G)

        for node in G.nodes():
            records.append({
                'date': date,
                'address': node,
                'in_degree': in_degrees.get(node, 0),
                'out_degree': out_degrees.get(node, 0),
                'pagerank': pagerank.get(node, 0),
                'degree_centrality': degree_centrality.get(node, 0)
            })

    # Save metrics per stablecoin
    df_metrics = pd.DataFrame(records)
    df_metrics.to_csv(os.path.join(output_dir, f'{token}_metrics.csv'), index=False)


Processing DAI: 100%|██████████| 10/10 [00:02<00:00,  4.67it/s]
Processing PAX: 100%|██████████| 10/10 [00:00<00:00, 95.10it/s]
Processing USDC: 100%|██████████| 10/10 [00:02<00:00,  4.72it/s]
Processing USDT: 100%|██████████| 10/10 [00:03<00:00,  2.62it/s]
Processing UST: 100%|██████████| 10/10 [00:00<00:00, 83.23it/s]
Processing WLUNA: 100%|██████████| 10/10 [00:00<00:00, 98.02it/s]


In [None]:
from glob import glob

input_dir = '../data/graphs/metrics'
output_file = '../data/graphs/all_metrics.csv'

# Find all metric files
metric_files = glob(os.path.join(input_dir, '*_metrics.csv'))

# Store the DataFrames
dfs = []

for file in metric_files:
    token = os.path.basename(file).split('_')[0]
    df = pd.read_csv(file)
    df['token'] = token
    dfs.append(df)

# concatenate Dataframes
df_all = pd.concat(dfs, ignore_index=True)

df_all.to_csv(output_file, index=False)
print(f"Salvo em: {output_file}")


Salvo em: ../data/graphs/all_metrics.csv


In [None]:
import pandas as pd
from datetime import datetime

# Carrega os eventos
event_df = pd.read_csv('../data/event_data.csv', encoding='latin1')


# Converte timestamp para date (formato YYYY-MM-DD)
event_df['date'] = pd.to_datetime(event_df['timestamp'], unit='s').dt.date

# Visualiza
print(event_df.head())

# Carrega as métricas
metrics_df = pd.read_csv('../data/graphs/all_metrics.csv')
metrics_df['date'] = pd.to_datetime(metrics_df['date']).dt.date  # garante formato compatível

# Seleciona o primeiro evento como exemplo
event = event_df.iloc[0]
event_token = event['stablecoin'].upper()  # Ex: 'usdc' → 'USDC'
event_date = event['date']


metrics_df['token'] = metrics_df['token'].str.upper()
event_df['stablecoin'] = event_df['stablecoin'].str.upper()

# Filtra métricas por token e data do evento
snapshot = metrics_df[(metrics_df['token'] == event_token) & (metrics_df['date'] == event_date)]

print(snapshot.head())

                                               event   timestamp      type  \
0  BlackRock and Fidelity Back USDC in $400 Milli...  1649721600  positive   
1  Terra UST takes over BUSD to become third larg...  1650412800  positive   
2  LARGE amounts of UST selling on ANCHOR (approx...  1651881600  negative   
3  UST depegs LFG deploys assets to defend peg (7...  1651968000  negative   
4    UST Depegs again to 35 cents LUNA keeps falling  1652054400  negative   

  stablecoin        date  
0       usdc  2022-04-12  
1       ustc  2022-04-20  
2       ustc  2022-05-07  
3       ustc  2022-05-08  
4       ustc  2022-05-09  
Empty DataFrame
Columns: [date, address, in_degree, out_degree, pagerank, degree_centrality, token]
Index: []


In [17]:
print(metrics_df['token'].unique())
print(event_df['stablecoin'].unique())


['DAI' 'PAX' 'USDC' 'USDT' 'UST']
['usdc' 'ustc' 'lunaterra' 'usdt' 'unknown' 'dai']
