In [2]:

# %%
import pandas as pd
import networkx as nx
import matplotlib.pyplot as plt
import seaborn as sns

# %% [markdown]
# ## 1. Load and Inspect Data  
# Load the CSV into a DataFrame and take a quick look at its shape and columns.

# %%
# Adjust path as needed
data_path = "../data/pepe-transfers.csv"
df = pd.read_csv(data_path)
print("Data shape:", df.shape)
print("Columns:", list(df.columns))
df.head()

# %% [markdown]
# ## 2. Basic Data Checks  
# - Check for missing values  
# - Inspect top senders/receivers

# %%
# Missing values per column
print(df.isna().sum())

# Top 5 senders and receivers
print("Top 5 senders:", df['from_address'].value_counts().head(), sep="\n")
print("Top 5 receivers:", df['to_address'].value_counts().head(), sep="\n")

# %% [markdown]
# ## 3. Build the Unweighted Graph  
# We’ll add one edge per transfer so degrees reflect transaction counts.

# %%
G = nx.DiGraph()
for _, row in df.iterrows():
    G.add_edge(row['from_address'], row['to_address'])
print(f"Built graph with {G.number_of_nodes()} nodes and {G.number_of_edges()} edges")

# %% [markdown]
# ## 4. Initial Network Statistics  
# Compute basic stats: density, components.

# %%
print("Density:", nx.density(G))
print("Number of weakly connected components:", nx.number_weakly_connected_components(G))
print("Number of strongly connected components:", nx.number_strongly_connected_components(G))

# %% [markdown]
# ## Next Steps  
# 1. Plot degree distributions  
# 2. Compute centralities  
# 3. Explore clustering and assortativity


Data shape: (800955, 10)
Columns: ['block_number', 'timestamp', 'date', 'from_address', 'to_address', 'value_token', 'value_base', 'transaction_index', 'log_index', 'transaction_hash']
block_number         0
timestamp            0
date                 0
from_address         0
to_address           0
value_token          0
value_base           0
transaction_index    0
log_index            0
transaction_hash     0
dtype: int64
Top 5 senders:
from_address
0xa43fe16908251ee70ef74718545e4fe6c5ccec9f    159319
0x11950d141ecb863f01007add7d1a342041227b58    129394
0x74de5d4fcbf63e00296fd95d33236b9794016631     75357
0xf239009a101b6b930a527deaab6961b6e7dec8a6     63499
0xe66b31678d6c16e9ebf358268a790b763c133750     14345
Name: count, dtype: int64
Top 5 receivers:
to_address
0xa43fe16908251ee70ef74718545e4fe6c5ccec9f    159319
0x11950d141ecb863f01007add7d1a342041227b58    129394
0x74de5d4fcbf63e00296fd95d33236b9794016631     75357
0xf239009a101b6b930a527deaab6961b6e7dec8a6     63499
0xe66b31678d6