# Step 1: List all pools

In [8]:
# 📊 Data Handling
import pandas as pd
import numpy as np
import requests

# 📈 Visualization
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px

# ⚙️ Preprocessing & Scaling
from datetime import timedelta
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import StandardScaler

# 📈 Analysis
import scipy.stats as stats
from sklearn.feature_selection import VarianceThreshold
from sklearn.decomposition import PCA
from sklearn.manifold import TSNE
  
# 🧠 Modeling
from sklearn.cluster import KMeans, DBSCAN
import hdbscan
from sklearn.metrics import silhouette_score

In [9]:
df = pd.read_csv('all_pools.csv',on_bad_lines='skip').sort_values(by='COUNT(TX_HASH)', ascending=False)

In [10]:
df.head(1)

Unnamed: 0,POOL_NAME,PLATFORM,COUNT(TX_HASH)
19361,WETH-USDT 100 1 UNI-V3 LP,uniswap-v3,1448252.0


In [11]:
df['PLATFORM'].value_counts()

uniswap-v2        45398
uniswap-v3         7748
uniswap-v4         3908
sushiswap          1874
dodo-v2             782
balancer            476
pancakeswap-v2      438
shibaswap           418
curve               357
hashflow-v3         353
pancakeswap-v3      260
woofi               137
kyberswap-v1         27
verse                22
kyberswap-v2         17
fraxswap             11
dodo-v1              10
trader-joe-v2         1
maverick              1
Name: PLATFORM, dtype: int64

In [12]:
df.loc[df["POOL_NAME"].str.contains("WETH-USDT", case=False, na=False), "pool_type"] = "Major Token Pair"
df.loc[df["POOL_NAME"].str.contains("USDC-WETH", case=False, na=False), "pool_type"] = "Major Token Pair"
df.loc[df["POOL_NAME"].str.contains("ETH-USDC", case=False, na=False), "pool_type"] = "Major Token Pair"
df.loc[df["POOL_NAME"].str.contains("WBTC-WETH", case=False, na=False), "pool_type"] = "Major Token Pair"
df.loc[df["POOL_NAME"].str.contains("WBTC-USDC", case=False, na=False), "pool_type"] = "Major Token Pair"
df.loc[df["POOL_NAME"].str.contains("WBTC-USDT", case=False, na=False), "pool_type"] = "Major Token Pair"
df.loc[df["POOL_NAME"].str.contains("WBTC-USDC", case=False, na=False), "pool_type"] = "Major Token Pair"
df.loc[df["POOL_NAME"].str.contains("tBTC-WBTC", case=False, na=False), "pool_type"] = "Major Token Pair"
#df.loc[df["POOL_NAME"].str.contains("WBTC-USDC", case=False, na=False), "pool_type"] = "Major Token Pair"
#df.loc[df["POOL_NAME"].str.contains("WBTC-USDC", case=False, na=False), "pool_type"] = "Major Token Pair"
#df.loc[df["POOL_NAME"].str.contains("WBTC-USDC", case=False, na=False), "pool_type"] = "Major Token Pair"





df.loc[df["POOL_NAME"].str.contains("USDC-USDT", case=False, na=False), "pool_type"] = "Stable Pair"
df.loc[df["POOL_NAME"].str.contains("crvUSD/USDC", case=False, na=False), "pool_type"] = "Stable Pair"
df.loc[df["POOL_NAME"].str.contains("crvUSD/USDT", case=False, na=False), "pool_type"] = "Stable Pair"
df.loc[df["POOL_NAME"].str.contains("USDe-USDT", case=False, na=False), "pool_type"] = "Stable Pair"
df.loc[df["POOL_NAME"].str.contains("DAI-USDT", case=False, na=False), "pool_type"] = "Stable Pair"
df.loc[df["POOL_NAME"].str.contains("DAI-USDC", case=False, na=False), "pool_type"] = "Stable Pair"
df.loc[df["POOL_NAME"].str.contains("USDe-USDC", case=False, na=False), "pool_type"] = "Stable Pair"




df.loc[df["POOL_NAME"].str.contains("KEKIUS", case=False, na=False), "pool_type"] = "Other"
df.loc[df["POOL_NAME"].str.contains("WHITE-USDC", case=False, na=False), "pool_type"] = "Other"
df.loc[df["POOL_NAME"].str.contains("9279", case=False, na=False), "pool_type"] = "Other"
df.loc[df["POOL_NAME"].str.contains("WBTC-cbBTC", case=False, na=False), "pool_type"] = "Other"
df.loc[df["POOL_NAME"].str.contains("USDC-SEI", case=False, na=False), "pool_type"] = "Other"
df.loc[df["POOL_NAME"].str.contains("PLUME-USDC", case=False, na=False), "pool_type"] = "Other"
df.loc[df["POOL_NAME"].str.contains("DOLO-USDC", case=False, na=False), "pool_type"] = "Other"
df.loc[df["POOL_NAME"].str.contains("PROMPT-USDC", case=False, na=False), "pool_type"] = "Other"
df.loc[df["POOL_NAME"].str.contains("VATAN-USDT", case=False, na=False), "pool_type"] = "Other"
df.loc[df["POOL_NAME"].str.contains("UNI-USDT", case=False, na=False), "pool_type"] = "Other"
df.loc[df["POOL_NAME"].str.contains("CGPT-USDT", case=False, na=False), "pool_type"] = "Other"




df.loc[df["POOL_NAME"].str.contains("LINK-USDC", case=False, na=False), "pool_type"] = "Other"
df.loc[df["POOL_NAME"].str.contains("SWFTC-USDT", case=False, na=False), "pool_type"] = "Other"
df.loc[df["POOL_NAME"].str.contains("WBTC-BADGER", case=False, na=False), "pool_type"] = "Other"
df.loc[df["POOL_NAME"].str.contains("USUALX-USUAL", case=False, na=False), "pool_type"] = "Stable Pair"
df.loc[df["POOL_NAME"].str.contains("ENA-USDC", case=False, na=False), "pool_type"] = "Other"
df.loc[df["POOL_NAME"].str.contains("RAD-USDC", case=False, na=False), "pool_type"] = "Other"
df.loc[df["POOL_NAME"].str.contains("PAXG-USDC", case=False, na=False), "pool_type"] = "Other"
df.loc[df["POOL_NAME"].str.contains("USDT-MOCA", case=False, na=False), "pool_type"] = "Other"
df.loc[df["POOL_NAME"].str.contains("UNI-AAVE", case=False, na=False), "pool_type"] = "Other"
df.loc[df["POOL_NAME"].str.contains("ZND-USDT", case=False, na=False), "pool_type"] = "Other"
df.loc[df["POOL_NAME"].str.contains("OMIKAMI-USDC", case=False, na=False), "pool_type"] = "Other"
df.loc[df["POOL_NAME"].str.contains("USDC-STG", case=False, na=False), "pool_type"] = "Other"
df.loc[df["POOL_NAME"].str.contains("deUSD-USDC", case=False, na=False), "pool_type"] = "Stable Pair"
df.loc[df["POOL_NAME"].str.contains("USR-USDC", case=False, na=False), "pool_type"] = "Other"
df.loc[df["POOL_NAME"].str.contains("WBTC-LBTC", case=False, na=False), "pool_type"] = "Major Token Pair"


df.loc[df["POOL_NAME"].str.contains("FRAX-USDC", case=False, na=False), "pool_type"] = "Stable Pair"
df.loc[df["POOL_NAME"].str.contains("AAVE-USDC", case=False, na=False), "pool_type"] = "Other"
df.loc[df["POOL_NAME"].str.contains("cvxCrv/Crv", case=False, na=False), "pool_type"] = "Other"
df.loc[df["POOL_NAME"].str.contains("USD0-USDC", case=False, na=False), "pool_type"] = "Stable Pair"
df.loc[df["POOL_NAME"].str.contains("SHFL-USDC", case=False, na=False), "pool_type"] = "Other"
df.loc[df["POOL_NAME"].str.contains("EUROC-USDC", case=False, na=False), "pool_type"] = "Stable Pair"
df.loc[df["POOL_NAME"].str.contains("FXS-FRAX", case=False, na=False), "pool_type"] = "Other"
df.loc[df["POOL_NAME"].str.contains("USD0-USDT", case=False, na=False), "pool_type"] = "Stable Pair"
df.loc[df["POOL_NAME"].str.contains("OBOL-USDT", case=False, na=False), "pool_type"] = "Other"
df.loc[df["POOL_NAME"].str.contains("DRAGONX-TITANX", case=False, na=False), "pool_type"] = "Other"
df.loc[df["POOL_NAME"].str.contains("VEE-USDT", case=False, na=False), "pool_type"] = "Other"
df.loc[df["POOL_NAME"].str.contains("MMX-USDT", case=False, na=False), "pool_type"] = "Other"
df.loc[df["POOL_NAME"].str.contains("AXL-USDC", case=False, na=False), "pool_type"] = "Other"
df.loc[df["POOL_NAME"].str.contains("KERNEL-USDT", case=False, na=False), "pool_type"] = "Other"
df.loc[df["POOL_NAME"].str.contains("TKX-USDT", case=False, na=False), "pool_type"] = "Other"

In [13]:
df.loc[df["POOL_NAME"].str.contains("SD-USDC", case=False, na=False), "pool_type"] = "Other"
df.loc[df["POOL_NAME"].str.contains("CPOOL-USDC", case=False, na=False), "pool_type"] = "Other"
df.loc[df["POOL_NAME"].str.contains("USDC-MYTH", case=False, na=False), "pool_type"] = "Other"
df.loc[df["POOL_NAME"].str.contains("G-USDC", case=False, na=False), "pool_type"] = "Other"
df.loc[df["POOL_NAME"].str.contains("USDT-USDC", case=False, na=False), "pool_type"] = "Stable Pair"
df.loc[df["POOL_NAME"].str.contains("DOGE-USDT", case=False, na=False), "pool_type"] = "Other"
df.loc[df["POOL_NAME"].str.contains("CAH-USDT", case=False, na=False), "pool_type"] = "Other"
df.loc[df["POOL_NAME"].str.contains("NEAR-USDC", case=False, na=False), "pool_type"] = "Other"
df.loc[df["POOL_NAME"].str.contains("ENS-USDT", case=False, na=False), "pool_type"] = "Other"
df.loc[df["POOL_NAME"].str.contains("ALI-USDC", case=False, na=False), "pool_type"] = "Other"
df.loc[df["POOL_NAME"].str.contains("TARA-USDT", case=False, na=False), "pool_type"] = "Other"
df.loc[df["POOL_NAME"].str.contains("VOLT-TITANX", case=False, na=False), "pool_type"] = "Other"
df.loc[df["POOL_NAME"].str.contains("msUSD/FRAXBP", case=False, na=False), "pool_type"] = "Stable Pair"
df.loc[df["POOL_NAME"].str.contains("0x315...784f-USDC", case=False, na=False), "pool_type"] = "Other"
df.loc[df["POOL_NAME"].str.contains("MASK-USDC", case=False, na=False), "pool_type"] = "Other"

In [14]:
df.loc[df["POOL_NAME"].str.contains("GHO-USDC", case=False, na=False), "pool_type"] = "Stable Pair"
df.loc[df["POOL_NAME"].str.contains("LINK-USDT", case=False, na=False), "pool_type"] = "Other"
df.loc[df["POOL_NAME"].str.contains("RLB-USDC", case=False, na=False), "pool_type"] = "Other"
df.loc[df["POOL_NAME"].str.contains("ENA-sENA", case=False, na=False), "pool_type"] = "Other"
df.loc[df["POOL_NAME"].str.contains("HEX-USDC", case=False, na=False), "pool_type"] = "Other"
df.loc[df["POOL_NAME"].str.contains("GASP-USDT", case=False, na=False), "pool_type"] = "Other"
df.loc[df["POOL_NAME"].str.contains("RLB-USDT", case=False, na=False), "pool_type"] = "Other"
df.loc[df["POOL_NAME"].str.contains("TRX-USDC", case=False, na=False), "pool_type"] = "Other"
df.loc[df["POOL_NAME"].str.contains("PROPC-USDT", case=False, na=False), "pool_type"] = "Other"
df.loc[df["POOL_NAME"].str.contains("TEL-GMRT", case=False, na=False), "pool_type"] = "Other"
df.loc[df["POOL_NAME"].str.contains("USD1-USDC", case=False, na=False), "pool_type"] = "Stable Pair"
df.loc[df["POOL_NAME"].str.contains("USDT-USDf", case=False, na=False), "pool_type"] = "Stable Pair"
df.loc[df["POOL_NAME"].str.contains("BITCOIN-SPX", case=False, na=False), "pool_type"] = "Other"
df.loc[df["POOL_NAME"].str.contains("33APU-33PEPE-33SPX", case=False, na=False), "pool_type"] = "Other"
df.loc[df["POOL_NAME"].str.contains("USDC-cbBTC", case=False, na=False), "pool_type"] = "Other"
df.loc[df["POOL_NAME"].str.contains("NUMI-USDT", case=False, na=False), "pool_type"] = "Other"
df.loc[df["POOL_NAME"].str.contains("INV-DOLA-DBR", case=False, na=False), "pool_type"] = "Other"
df.loc[df["POOL_NAME"].str.contains("wA7A5-USDT", case=False, na=False), "pool_type"] = "Other"
df.loc[df["POOL_NAME"].str.contains("PEAS-DAI", case=False, na=False), "pool_type"] = "Other"
df.loc[df["POOL_NAME"].str.contains("USDT-TRU", case=False, na=False), "pool_type"] = "Other"

In [15]:
df.loc[df["POOL_NAME"].str.contains("ELX-USDC", case=False, na=False), "pool_type"] = "Other"
df.loc[df["POOL_NAME"].str.contains("XAUt-USDT", case=False, na=False), "pool_type"] = "Other"
df.loc[df["POOL_NAME"].str.contains("USDC-CRV", case=False, na=False), "pool_type"] = "Other"
df.loc[df["POOL_NAME"].str.contains("BKN-USDT", case=False, na=False), "pool_type"] = "Other"
df.loc[df["POOL_NAME"].str.contains("DHN-USDT", case=False, na=False), "pool_type"] = "Other"
df.loc[df["POOL_NAME"].str.contains("ID-USDC", case=False, na=False), "pool_type"] = "Other"
df.loc[df["POOL_NAME"].str.contains("PTC-USDT", case=False, na=False), "pool_type"] = "Other"
df.loc[df["POOL_NAME"].str.contains("cvxCRV-USDC", case=False, na=False), "pool_type"] = "Other"
df.loc[df["POOL_NAME"].str.contains("USDC-SHELL", case=False, na=False), "pool_type"] = "Other"
df.loc[df["POOL_NAME"].str.contains("BLZ-USDC", case=False, na=False), "pool_type"] = "Other"
df.loc[df["POOL_NAME"].str.contains("UNI-WBTC", case=False, na=False), "pool_type"] = "Other"
df.loc[df["POOL_NAME"].str.contains("ASTO-USDC", case=False, na=False), "pool_type"] = "Other"
df.loc[df["POOL_NAME"].str.contains("AAVE-USDT", case=False, na=False), "pool_type"] = "Other"
df.loc[df["POOL_NAME"].str.contains("50COW-50GNO", case=False, na=False), "pool_type"] = "Other"
df.loc[df["POOL_NAME"].str.contains("OZK-USDT", case=False, na=False), "pool_type"] = "Other"
df.loc[df["POOL_NAME"].str.contains("OX-SPX", case=False, na=False), "pool_type"] = "Other"
df.loc[df["POOL_NAME"].str.contains("WAXP-USDT", case=False, na=False), "pool_type"] = "Other"
df.loc[df["POOL_NAME"].str.contains("VR-USDT", case=False, na=False), "pool_type"] = "Other"
df.loc[df["POOL_NAME"].str.contains("ID-USDC", case=False, na=False), "pool_type"] = "Other"
df.loc[df["POOL_NAME"].str.contains("IQ-FRAX", case=False, na=False), "pool_type"] = "Other"
df.loc[df["POOL_NAME"].str.contains("REZ-USDC", case=False, na=False), "pool_type"] = "Other"
df.loc[df["POOL_NAME"].str.contains("WBTC-SolvBTC", case=False, na=False), "pool_type"] = "Other"
df.loc[df["POOL_NAME"].str.contains("X28-TITANX", case=False, na=False), "pool_type"] = "Other"
df.loc[df["POOL_NAME"].str.contains("WHITE-USDT", case=False, na=False), "pool_type"] = "Other"

In [16]:
df[df['pool_type'].isnull()].head(3)

Unnamed: 0,POOL_NAME,PLATFORM,COUNT(TX_HASH),pool_type
25700,ETH-USDT 500 10 UNI-V4 LP,uniswap-v4,213905.0,
27459,DAI-WETH 500 10 UNI-V3 LP,uniswap-v3,202293.0,
39877,WETH-SPX,uniswap-v2,178729.0,


In [17]:
df['pool_type'].value_counts()

Other               488
Stable Pair         216
Major Token Pair    151
Name: pool_type, dtype: int64

# Step 2: Filter all pools belong to curve

In [19]:
df[(df['PLATFORM']=="curve") & (df['pool_type']=="Stable Pair")]

Unnamed: 0,POOL_NAME,PLATFORM,COUNT(TX_HASH),pool_type
2479,DAI-USDC-USDT,curve,66308.0,Stable Pair
21297,Curve.fi Factory Plain Pool: crvUSD/USDC,curve,63614.0,Stable Pair
20912,Curve.fi Factory Plain Pool: crvUSD/USDT,curve,61180.0,Stable Pair
23227,Curve.fi Pool: DAI-USDC-USDT-sUSD,curve,21424.0,Stable Pair
36014,FRAX-USDC,curve,18776.0,Stable Pair
27482,Curve.fi Factory USD Metapool: msUSD/FRAXBP,curve,13354.0,Stable Pair
32184,RAI-DAI-3Crv-USDC-USDT,curve,2313.0,Stable Pair
42769,aDAI-aUSDT-DAI-USDC-aUSDC-USDT,curve,1054.0,Stable Pair
27088,GUSD-DAI-3Crv-USDC-USDT,curve,846.0,Stable Pair
23622,Curve.fi Pool: cDAI-cUSDC-USDT,curve,586.0,Stable Pair


In [2]:
# query for main metrics

from flipside import Flipside

flipside = Flipside("bfb1c2a7-30de-46af-a084-5b8d9511280c", "https://api-v2.flipsidecrypto.xyz")

sql = """
-- 🔧 محاسبه تمام متریک‌ها برای استخرهای Curve روی Ethereum
WITH base_swaps AS (
  SELECT
    DATE_TRUNC('hour', s.BLOCK_TIMESTAMP) AS block_hour,
    s.POOL_NAME,
    s.SYMBOL_IN,
    s.SYMBOL_OUT,
    s.AMOUNT_IN_USD,
    s.AMOUNT_OUT_USD,
    s.AMOUNT_IN,
    s.AMOUNT_OUT,
    s.PLATFORM,
    s.BLOCK_TIMESTAMP as BLOCK_TIMESTAMP
  FROM ethereum.defi.ez_dex_swaps s
  WHERE LOWER(s.PLATFORM) = 'curve'
    AND s.BLOCK_TIMESTAMP BETWEEN TIMESTAMP '2025-05-28' AND TIMESTAMP '2025-06-04'
    AND s.AMOUNT_IN_USD > 100
),

slippage_data AS (
  SELECT
BLOCK_TIMESTAMP,
    block_hour,
    POOL_NAME,
    AMOUNT_IN_USD,
    AMOUNT_OUT_USD,
    (AMOUNT_OUT_USD / AMOUNT_IN_USD) AS price_usd,
    (ABS(AMOUNT_IN_USD - AMOUNT_OUT_USD) / AMOUNT_IN_USD) * 100 AS slippage_perc
  FROM base_swaps
),

price_volatility AS (
  SELECT
    pool_name,
BLOCK_TIMESTAMP,
block_hour,
    MAX(price_usd) AS max_price,
    MIN(price_usd) AS min_price,
    (MAX(price_usd) - MIN(price_usd)) / NULLIF(AVG(price_usd), 0) * 100 AS volatility_perc
  FROM slippage_data
  GROUP BY 1,2,3
),

volume_stats AS (
  SELECT
    POOL_NAME,
BLOCK_TIMESTAMP,
block_hour,
    COUNT(*) AS swap_count,
    SUM(AMOUNT_IN_USD + AMOUNT_OUT_USD) AS total_volume_usd,
    MIN(AMOUNT_IN_USD + AMOUNT_OUT_USD) AS min_depth_usd,
    AVG(slippage_perc) AS avg_slippage,
    MAX(slippage_perc) AS max_slippage
  FROM slippage_data
  GROUP BY 1,2,3
),

tvl_data AS (
  SELECT
    s.POOL_NAME,
s.BLOCK_TIMESTAMP,
s.block_hour,
    ROUND(AVG(s.AMOUNT_IN * p0.PRICE + s.AMOUNT_OUT * p1.PRICE), 2) AS avg_tvl_usd
  FROM base_swaps s
  LEFT JOIN ethereum.price.ez_prices_hourly p0 on s.SYMBOL_IN = p0.SYMBOL 
and DATE_TRUNC('hour', s.BLOCK_TIMESTAMP)= DATE_TRUNC('hour', p0.HOUR)
LEFT JOIN ethereum.price.ez_prices_hourly p1
    ON s.SYMBOL_OUT = p1.SYMBOL 
and DATE_TRUNC('hour', s.BLOCK_TIMESTAMP)= DATE_TRUNC('hour', p1.HOUR)
  GROUP BY 1,2,3
)

-- ✅ Final Merge
SELECT
v.BLOCK_TIMESTAMP,
v.block_hour,
  v.POOL_NAME,
  v.swap_count,
  v.total_volume_usd,
  v.min_depth_usd,
  v.avg_slippage,
  v.max_slippage,
  tvl.avg_tvl_usd,
  ROUND(v.total_volume_usd / NULLIF(tvl.avg_tvl_usd, 0), 2) AS volume_tvl_ratio,
  vol.volatility_perc
FROM volume_stats v
LEFT JOIN tvl_data tvl ON LOWER(TRIM(v.POOL_NAME)) = LOWER(TRIM(tvl.POOL_NAME))
LEFT JOIN price_volatility vol ON v.POOL_NAME = vol.pool_name
ORDER BY volume_tvl_ratio DESC;
"""

query_result_set = flipside.query(sql)
df_c = pd.DataFrame(query_result_set.records)


In [None]:
# query for TVL

flipside = Flipside("bfb1c2a7-30de-46af-a084-5b8d9511280c", "https://api-v2.flipsidecrypto.xyz")

sql = """
SELECT
s.BLOCK_TIMESTAMP as BLOCK_TIMESTAMP,
DATE_TRUNC('hour', s.BLOCK_TIMESTAMP) AS BLOCK_HOUR,
s.POOL_NAME as POOL_NAME, 
round((s.AMOUNT_IN * p0.PRICE + s.AMOUNT_OUT * p1.PRICE),2) AS AVG_TVL_USD,
round((s.AMOUNT_OUT_USD / s.AMOUNT_IN_USD),2) as price_usd,
round((ABS(s.AMOUNT_IN_USD - s.AMOUNT_OUT_USD) / s.AMOUNT_IN_USD)*100,4) as slippage_perc
from ethereum.defi.ez_dex_swaps s
left join ethereum.price.ez_prices_hourly p0 on s.SYMBOL_IN = p0.SYMBOL 
and DATE_TRUNC('hour', s.BLOCK_TIMESTAMP)= DATE_TRUNC('hour', p0.HOUR)
left join ethereum.price.ez_prices_hourly p1 on s.SYMBOL_IN = p1.SYMBOL 
and  DATE_TRUNC('hour', s.BLOCK_TIMESTAMP)= DATE_TRUNC('hour', p1.HOUR)
where s.BLOCK_TIMESTAMP between '2025-05-28' AND TIMESTAMP '2025-06-04'
 and s.AMOUNT_IN_USD > 100
and LOWER(s.PLATFORM) = 'curve'

"""

query_result_set = flipside.query(sql)
df_c_tvl = pd.DataFrame(query_result_set.records)


In [23]:
# temporary
df_c = pd.read_csv('df_main_metrics.csv',on_bad_lines='skip')
df_c = df_c.drop(['AVG_TVL_USD','VOLUME_TVL_RATIO'],axis=1)
df_c_tvl = pd.read_csv('df_tvl.csv',on_bad_lines='skip')

In [37]:
# extract date & hour
df_c['BLOCK_TIMESTAMP'] = df_c['BLOCK_TIMESTAMP'].astype(str).str.replace('\ufeff', '').str.strip()
df_c['BLOCK_HOUR'] = df_c['BLOCK_HOUR'].astype(str).str.replace('\ufeff', '').str.strip()

df_c['BLOCK_TIMESTAMP'] = pd.to_datetime(df_c['BLOCK_TIMESTAMP'], errors='coerce')
df_c['BLOCK_HOUR'] = pd.to_datetime(df_c['BLOCK_HOUR'], errors='coerce')

df_c['DATE_ONLY'] = df_c['BLOCK_TIMESTAMP'].dt.date
df_c['HOUR_ONLY'] = df_c['BLOCK_HOUR'].dt.hour

cols = ['DATE_ONLY', 'HOUR_ONLY'] + [col for col in df_c.columns if col not in ['DATE_ONLY', 'HOUR_ONLY']]
df_c = df_c[cols]

df_c = df_c.drop(['BLOCK_TIMESTAMP','BLOCK_HOUR'],axis=1)

In [39]:
# extract date & hour
df_c_tvl['BLOCK_TIMESTAMP'] = df_c_tvl['BLOCK_TIMESTAMP'].astype(str).str.replace('\ufeff', '').str.strip()
df_c_tvl['BLOCK_HOUR'] = df_c_tvl['BLOCK_HOUR'].astype(str).str.replace('\ufeff', '').str.strip()

df_c_tvl['BLOCK_TIMESTAMP'] = pd.to_datetime(df_c_tvl['BLOCK_TIMESTAMP'], errors='coerce')
df_c_tvl['BLOCK_HOUR'] = pd.to_datetime(df_c_tvl['BLOCK_HOUR'], errors='coerce')

df_c_tvl['DATE_ONLY'] = df_c_tvl['BLOCK_TIMESTAMP'].dt.date
df_c_tvl['HOUR_ONLY'] = df_c_tvl['BLOCK_HOUR'].dt.hour

cols = ['DATE_ONLY', 'HOUR_ONLY'] + [col for col in df_c_tvl.columns if col not in ['DATE_ONLY', 'HOUR_ONLY']]
df_c_tvl = df_c_tvl[cols]

df_c_tvl = df_c_tvl.drop(['BLOCK_TIMESTAMP','BLOCK_HOUR'],axis=1)

In [38]:
df_c.head()

Unnamed: 0,DATE_ONLY,HOUR_ONLY,POOL_NAME,SWAP_COUNT,TOTAL_VOLUME_USD,MIN_DEPTH_USD,AVG_SLIPPAGE,MAX_SLIPPAGE,VOLATILITY_PERC
0,2025-05-28,9.0,Curve.fi Factory Plain Pool: frxETH/WETH,1.0,1488.82,1488.82,0.09399,0.09399,0.0
1,2025-05-30,0.0,Curve.fi Pool: DAI-USDC-USDT-sUSD,1.0,8157.66,8157.66,0.057353,0.057353,0.171377
2,2025-05-29,16.0,Curve.fi Pool: DAI-USDC-USDT-sUSD,1.0,16002.57,16002.57,0.05038,0.05038,0.171377
3,2025-05-30,10.0,Curve.fi Pool: DAI-USDC-USDT-sUSD,2.0,104496.35,52207.15,0.173977,0.193923,0.171377
4,2025-05-31,11.0,Curve.fi Pool: DAI-USDC-USDT-sUSD,1.0,2016.25,2016.25,0.040678,0.040678,0.171377


In [22]:
df_c['POOL_NAME'].value_counts()

Curve.fi Pool: DAI-USDC-USDT-sUSD           74345
Curve.fi Factory Plain Pool: frxETH/WETH    23226
Curve.fi Factory Plain Pool: cvxCRV          2429
Name: POOL_NAME, dtype: int64

In [40]:
df_c_tvl.head()

Unnamed: 0,DATE_ONLY,HOUR_ONLY,POOL_NAME,AVG_TVL_USD,PRICE_USD,SLIPPAGE_PERC
0,2025-05-30,6.0,Curve.fi Factory Plain Pool: 3EURpool,533.9,0.99,0.7385
1,2025-06-01,4.0,WBTC-WETH-USDT,3394.95,0.99,0.5587
2,2025-06-01,4.0,WBTC-WETH-USDT,3394.94,0.99,0.5587
3,2025-06-01,4.0,DAI-USDC-USDT,70410.19,1.0,0.0307
4,2025-06-01,4.0,DAI-USDC-USDT,70219.05,1.0,0.0307


In [25]:
df_c_tvl['POOL_NAME'].value_counts()

WBTC-WETH-USDT                               9916
-OGN-OETH                                    6500
DAI-USDC-USDT                                5603
Curve.fi Pool: DAI-USDC-USDT-sUSD            5125
WETH-CRV-crvUSD                              4096
                                             ... 
rETH-WETH                                       1
Curve.fi Factory Plain Pool: crvUSD/PYUSD       1
FPI-FRAX                                        1
Curve.fi Factory Plain Pool: rETH/wstETH        1
Curve.fi Factory USD Metapool: REUSD/3Crv       1
Name: POOL_NAME, Length: 147, dtype: int64

In [63]:
df_c_tvl['DATE_ONLY'] = df_c_tvl['DATE_ONLY'].astype(str)
df_c_tvl[(df_c_tvl['POOL_NAME']=="Curve.fi Pool: DAI-USDC-USDT-sUSD") & (df_c_tvl['DATE_ONLY']=='2025-06-01') &
        (df_c_tvl['HOUR_ONLY']==12.0)]

Unnamed: 0,DATE_ONLY,HOUR_ONLY,POOL_NAME,AVG_TVL_USD,PRICE_USD,SLIPPAGE_PERC
29182,2025-06-01,12.0,Curve.fi Pool: DAI-USDC-USDT-sUSD,1949.36,1.0,0.0665
47536,2025-06-01,12.0,Curve.fi Pool: DAI-USDC-USDT-sUSD,,1.0,0.0289
47583,2025-06-01,12.0,Curve.fi Pool: DAI-USDC-USDT-sUSD,,1.0,0.0026


In [62]:
df_c['DATE_ONLY'] = df_c['DATE_ONLY'].astype(str)
df_c[(df_c['POOL_NAME']=="Curve.fi Pool: DAI-USDC-USDT-sUSD") & (df_c['DATE_ONLY']=='2025-06-01') &
    (df_c['HOUR_ONLY']==12.0)]

Unnamed: 0,DATE_ONLY,HOUR_ONLY,POOL_NAME,SWAP_COUNT,TOTAL_VOLUME_USD,MIN_DEPTH_USD,AVG_SLIPPAGE,MAX_SLIPPAGE,VOLATILITY_PERC
28,2025-06-01,12.0,Curve.fi Pool: DAI-USDC-USDT-sUSD,1.0,2009.87,2009.87,0.028862,0.028862,0.171377
318,2025-06-01,12.0,Curve.fi Pool: DAI-USDC-USDT-sUSD,1.0,2009.87,2009.87,0.028862,0.028862,0.171377
392,2025-06-01,12.0,Curve.fi Pool: DAI-USDC-USDT-sUSD,1.0,2009.87,2009.87,0.028862,0.028862,0.845399
436,2025-06-01,12.0,Curve.fi Pool: DAI-USDC-USDT-sUSD,1.0,2009.87,2009.87,0.028862,0.028862,0.845399
686,2025-06-01,12.0,Curve.fi Pool: DAI-USDC-USDT-sUSD,1.0,2009.87,2009.87,0.028862,0.028862,0.171377
...,...,...,...,...,...,...,...,...,...
98453,2025-06-01,12.0,Curve.fi Pool: DAI-USDC-USDT-sUSD,1.0,2009.87,2009.87,0.028862,0.028862,0.190249
98767,2025-06-01,12.0,Curve.fi Pool: DAI-USDC-USDT-sUSD,1.0,2009.87,2009.87,0.028862,0.028862,0.190249
98786,2025-06-01,12.0,Curve.fi Pool: DAI-USDC-USDT-sUSD,1.0,2009.87,2009.87,0.028862,0.028862,0.079199
99258,2025-06-01,12.0,Curve.fi Pool: DAI-USDC-USDT-sUSD,1.0,2009.87,2009.87,0.028862,0.028862,0.312569


In [64]:
df_c_tvl.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 49020 entries, 0 to 49019
Data columns (total 6 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   DATE_ONLY      49020 non-null  object 
 1   HOUR_ONLY      49019 non-null  float64
 2   POOL_NAME      49019 non-null  object 
 3   AVG_TVL_USD    43314 non-null  float64
 4   PRICE_USD      48917 non-null  float64
 5   SLIPPAGE_PERC  48917 non-null  float64
dtypes: float64(4), object(2)
memory usage: 2.2+ MB
