In [1]:
%load_ext autoreload
%autoreload 2

import plotly.io as pio
pio.renderers.default = 'iframe'

In [2]:
import pandas as pd
import plotly.express as px
import numpy as np
import pandas as pd
import re
from datetime import timedelta
import plotly.express as px

from op_analytics.datasources.defillama.dataaccess import DefiLlama

import urllib3
import warnings
pd.set_option('display.float_format', lambda x: '%.3f' % x)
urllib3.disable_warnings()
warnings.filterwarnings("ignore")

In [117]:
PATTERNS_TO_FILTER = [
    "-borrowed",
    "-vesting",
    "-staking",
    "-pool2",
    "-treasury",
    "-cex",
    "^treasury$",
    "^borrowed$",
    "^staking$",
    "^pool2$",
    "^pool2$",
    "polygon-bridge-&-staking",
    ".*-cex$",
]

CATEGORIES_TO_FILTER = ["CEX", "Chain"]

alignment_dict = {
    "Metis": "OP Stack fork",
    "Blast": "OP Stack fork",
    "Mantle": "OP Stack fork",
    "Zircuit": "OP Stack fork",
    "RSS3": "OP Stack fork",
    "Rollux": "OP Stack fork",
    "Ancient8": "OP Stack fork",
    "Manta": "OP Stack fork",
    "Cyber": "OP Chain",
    "Mint": "OP Chain",
    "Ham": "OP Chain",
    "Polynomial": "OP Chain",
    "Lisk": "OP Chain",
    "BOB": "OP Chain",
    "Mode": "OP Chain",
    "World Chain": "OP Chain",
    "Base": "OP Chain",
    "Kroma": "OP Chain",
    "Boba": "OP Chain",
    "Fraxtal": "OP Chain",
    "Optimism": "OP Chain",
    "Shape": "OP Chain",
    "Zora": "OP Chain",
    "Ink": "OP Chain",
    "Swell": "OP Chain"
}

alignment_df = pd.DataFrame(list(alignment_dict.items()), columns=["chain", "alignment"])

token_data = [
    # Native Assets
    {"token": "ETH", "token_category": "Native Asset"},
    {"token": "WETH", "token_category": "Native Asset"},
    {"token": "SOL", "token_category": "Native Asset"},

    # Wrapped Assets
    {"token": "WBTC", "token_category": "Wrapped Assets"},
    {"token": "CBBTC", "token_category": "Wrapped Assets"},
    {"token": "MBTC", "token_category": "Wrapped Assets"},

    # ETH Liquid Staking
    {"token": "STETH", "token_category": "Liquid Staking"},
    {"token": "WSTETH", "token_category": "Liquid Staking"},
    {"token": "SFRXETH", "token_category": "Liquid Staking"},
    {"token": "RETH", "token_category": "Liquid Staking"},
    {"token": "METH", "token_category": "Liquid Staking"},
    {"token": "CBETH", "token_category": "Liquid Staking"},
    {"token": "SWETH", "token_category": "Liquid Staking"},
    {"token": "FRXETH", "token_category": "Liquid Staking"},
    {"token": "ETHX", "token_category": "Liquid Staking"},
    {"token": "LSETH", "token_category": "Liquid Staking"},
    {"token": "OETH", "token_category": "Liquid Staking"},
    {"token": "SUPEROETHB", "token_category": "Liquid Staking"},
    {"token": "WSUPEROETHB", "token_category": "Liquid Staking"},
    {"token": "TETH", "token_category": "Liquid Staking"},
    {"token": "OSETH", "token_category": "Liquid Staking"},

    # ETH Liquid Restaking
    {"token": "EETH", "token_category": "Liquid Restaking"},
    {"token": "WEETH", "token_category": "Liquid Restaking"},
    {"token": "RSETH", "token_category": "Liquid Restaking"},
    {"token": "EZETH", "token_category": "Liquid Restaking"},
    {"token": "RSWETH", "token_category": "Liquid Restaking"},
    {"token": "CMETH", "token_category": "Liquid Restaking"},
    {"token": "WRSETH", "token_category": "Liquid Restaking"},
    {"token": "WEETH.BASE", "token_category": "Liquid Restaking"},
    {"token": "LBTC", "token_category": "Liquid Restaking"},
    {"token": "EBTC", "token_category": "Liquid Restaking"},
    {"token": "AGETH", "token_category": "Liquid Restaking"},
    {"token": "SWBTC", "token_category": "Liquid Restaking"},

    # Solana Liquid Staking
    {"token": "MSOL", "token_category": "Liquid Staking"},
    {"token": "JUPSOL", "token_category": "Liquid Staking"},
    {"token": "BNSOL", "token_category": "Liquid Staking"},
    {"token": "LAINESOL", "token_category": "Liquid Staking"},
    {"token": "STSOL", "token_category": "Liquid Staking"},
    {"token": "STRONGSOL", "token_category": "Liquid Staking"},
    {"token": "HUBSOL", "token_category": "Liquid Staking"},
    {"token": "PATHSOL", "token_category": "Liquid Staking"},
    {"token": "STEPSOL", "token_category": "Liquid Staking"},
    {"token": "EDGESOL", "token_category": "Liquid Staking"},
    {"token": "JITOSOL", "token_category": "Liquid Staking"},
    {"token": "DSOL", "token_category": "Liquid Staking"},
    {"token": "BONKSOL", "token_category": "Liquid Staking"},
    {"token": "VSOL", "token_category": "Liquid Staking"},
    {"token": "HSOL", "token_category": "Liquid Staking"},

    # Solana Liquid Restaking
    {"token": "SSOL", "token_category": "Liquid Restaking"},
    {"token": "BBSOL", "token_category": "Liquid Restaking"},

    # Stablecoins
    {"token": "USDC", "token_category": "Stablecoins"},
    {"token": "USDT", "token_category": "Stablecoins"},
    {"token": "FDUSD", "token_category": "Stablecoins"},
    {"token": "PYUSD", "token_category": "Stablecoins"},
    {"token": "TUSD", "token_category": "Stablecoins"},
    {"token": "DAI", "token_category": "Stablecoins"},
    {"token": "USDE", "token_category": "Stablecoins"},
    {"token": "USDD", "token_category": "Stablecoins"},
    {"token": "FRAX", "token_category": "Stablecoins"},
    {"token": "EURC", "token_category": "Stablecoins"},
    {"token": "AGEUR", "token_category": "Stablecoins"},
    {"token": "USDS", "token_category": "Stablecoins"},
    {"token": "USDB", "token_category": "Stablecoins"},
    {"token": "DOLA", "token_category": "Stablecoins"},
    {"token": "SUSDE", "token_category": "Stablecoins"},
    {"token": "USD0++", "token_category": "Stablecoins"},
    {"token": "USD0", "token_category": "Stablecoins"},
    {"token": "SUSD", "token_category": "Stablecoins"},
    {"token": "CRVUSD", "token_category": "Stablecoins"},
    {"token": "USDC+", "token_category": "Stablecoins"},
    {"token": "USDZ", "token_category": "Stablecoins"},
    {"token": "STAR", "token_category": "Stablecoins"},
    {"token": "USDBC", "token_category": "Stablecoins"},
    {"token": "USD+", "token_category": "Stablecoins"},
    {"token": "CDXUSD", "token_category": "Stablecoins"},
    {"token": "HYUSD", "token_category": "Stablecoins"},
    {"token": "AXLEUROC", "token_category": "Stablecoins"},
    {"token": "EURS", "token_category": "Stablecoins"},
]

token_categories = pd.DataFrame(token_data)

token_categories["token"] = token_categories["token"].str.upper()


mapping = {
    "Dexes": "Dexes",
    "Liquidity manager": "Yield",
    "Derivatives": "Derivatives",
    "Yield Aggregator": "Yield",
    "Indexes": "Yield",
    "Bridge": "Bridge",
    "Leveraged Farming": "Yield",
    "Cross Chain": "Bridge",
    "CDP": "Lending",
    "Farm": "Yield",
    "Options": "Other Trading",
    "DCA Tools": "Other Trading",
    "Services": "TradFi/Fintech",
    "Chain": "TradFi/Fintech",
    "Privacy": "TradFi/Fintech",
    "RWA": "TradFi/Fintech",
    "Payments": "TradFi/Fintech",
    "Launchpad": "TradFi/Fintech",
    "Synthetics": "Derivatives",
    "SoFi": "TradFi/Fintech",
    "Prediction Market": "Other Trading",
    "Token Locker": "Yield",
    "Yield Lottery": "Yield",
    "Algo-Stables": "Stablecoins",
    "DEX Aggregator": "Dexes",
    "Liquid Restaking": "Restaking/Liquid Restaking",
    "Governance Incentives": "Yield",
    "Restaking": "Restaking/Liquid Restaking",
    "Liquid Staking": "Liquid Staking",
    "Uncollateralized Lending": "Lending",
    "Managed Token Pools": "Other Trading",
    "Insurance": "TradFi/Fintech",
    "NFT Marketplace": "Other Trading",
    "NFT Lending": "Lending",
    "Options Vault": "Other Trading",
    "NftFi": "Other Trading",
    "Basis Trading": "Other Trading",
    "Bug Bounty": "TradFi/Fintech",
    "OTC Marketplace": "Other Trading",
    "Reserve Currency": "Stablecoins",
    "Gaming": "Other",
    "AI Agents": "TradFi/Fintech",
    "Treasury Manager": "TradFi/Fintech",
    "CDP Manager": "Lending",
    "Decentralized Stablecoin": "Stablecoins",
    "Restaked BTC": "Restaking/Liquid Restaking",
    "RWA Lending": "Lending",
    "Staking Pool": "Staking/Liquid Staking",
    "CeDeFi": "TradFi/Fintech",
    "Staking": "Staking/Liquid Staking",
    "Oracle": "Other",
    "Ponzi": "Other",
    "Anchor BTC": "Other",
    "Decentralized BTC": "Other",
    "CEX": "Other",
    "Lending": "Lending"
}


In [118]:
from op_analytics.coreutils.duckdb_inmem.client import init_client
from op_analytics.coreutils.duckdb_inmem.localcopy import dump_local_copy, load_local_copy
from op_analytics.datasources.defillama.dataaccess import DefiLlama

# duckdb_client = init_client()
ctx = init_client()
client = ctx.client

- Pull this data fresh, should be okay to leave protocol metadata date as-is
- I would use "2024-11-30" as your latest date, we ran into a few data issues with more recent data
- Make sure your secrets are up to date, Pedro updated them on Dec 2nd to work with GCS
- There could be lingering data issues but Pedro addressed a bunch today

In [119]:
view1 = DefiLlama.PROTOCOLS_TOKEN_TVL.read(min_date="2025-01-07")

df_protocol_tvl = client.sql(
f"""
SELECT
    dt,
    protocol_slug,
    chain,
    token,
    app_token_tvl,
    app_token_tvl_usd
FROM {view1}
""").to_df()

[2m2025-01-10 14:41:39[0m [[32m[1minfo     [0m] [1mReading data from 'defillama/protocols_token_tvl_v1' with filters min_date=2025-01-07, max_date=None, date_range_spec=None[0m [36mfilename[0m=[35mdailydata.py[0m [36mlineno[0m=[35m174[0m [36mprocess[0m=[35m77273[0m
[2m2025-01-10 14:41:39[0m [[32m[1minfo     [0m] [1mquerying markers for 'defillama/protocols_token_tvl_v1' DateFilter(min_date=datetime.date(2025, 1, 7), max_date=None, datevals=None)[0m [36mfilename[0m=[35mdailydata.py[0m [36mlineno[0m=[35m101[0m [36mprocess[0m=[35m77273[0m
[2m2025-01-10 14:41:39[0m [[32m[1minfo     [0m] [1m14 markers found              [0m [36mfilename[0m=[35mdailydata.py[0m [36mlineno[0m=[35m115[0m [36mmax_dt[0m=[35m2025-01-10[0m [36mmin_dt[0m=[35m2025-01-07[0m [36mprocess[0m=[35m77273[0m
[2m2025-01-10 14:41:39[0m [[32m[1minfo     [0m] [1m4 distinct paths              [0m [36mfilename[0m=[35mdailydata.py[0m [36mlineno[0m=[35m12

In [120]:
view2 = DefiLlama.PROTOCOLS_METADATA.read(min_date="2025-01-09")

df_metadata = client.sql(
f"""
SELECT 
    protocol_name,
    protocol_slug,
    protocol_category,
    parent_protocol,
    CASE WHEN misrepresented_tokens = 'True' THEN 1
        WHEN misrepresented_tokens = 'False' THEN 0
        ELSE 0
    END AS misrepresented_tokens
FROM {view2}
""").to_df()

[2m2025-01-10 14:41:42[0m [[32m[1minfo     [0m] [1mReading data from 'defillama/protocols_metadata_v1' with filters min_date=2025-01-09, max_date=None, date_range_spec=None[0m [36mfilename[0m=[35mdailydata.py[0m [36mlineno[0m=[35m174[0m [36mprocess[0m=[35m77273[0m
[2m2025-01-10 14:41:42[0m [[32m[1minfo     [0m] [1mquerying markers for 'defillama/protocols_metadata_v1' DateFilter(min_date=datetime.date(2025, 1, 9), max_date=None, datevals=None)[0m [36mfilename[0m=[35mdailydata.py[0m [36mlineno[0m=[35m101[0m [36mprocess[0m=[35m77273[0m
[2m2025-01-10 14:41:42[0m [[32m[1minfo     [0m] [1m3 markers found               [0m [36mfilename[0m=[35mdailydata.py[0m [36mlineno[0m=[35m115[0m [36mmax_dt[0m=[35m2025-01-10[0m [36mmin_dt[0m=[35m2025-01-10[0m [36mprocess[0m=[35m77273[0m
[2m2025-01-10 14:41:42[0m [[32m[1minfo     [0m] [1m1 distinct paths              [0m [36mfilename[0m=[35mdailydata.py[0m [36mlineno[0m=[35m125

In [121]:

# duckdb_client = DefiLlama.PROTOCOLS_TOKEN_TVL.read(min_date="2024-10-01")

# print(duckdb_client.sql("DESCRIBE protocols_token_tvl_v1"))

In [122]:
# duckdb_client = DefiLlama.PROTOCOLS_METADATA.read(min_date="2024-12-03")

# df_metadata = duckdb_client.sql(
# """
# SELECT 
#     *
# FROM protocols_metadata_v1
# """).to_df()

In [123]:
df_metadata

Unnamed: 0,protocol_name,protocol_slug,protocol_category,parent_protocol,misrepresented_tokens
0,0.exchange,0.exchange,Dexes,0.exchange,0
1,01,01,Derivatives,01,0
2,0vix,0vix,Lending,0vix,0
3,0x,0x,DEX Aggregator,0x,0
4,0x Aggregator,0x-aggregator,DEX Aggregator,0x-aggregator,0
...,...,...,...,...,...
5356,Zunami Protocol,zunami-protocol,Yield Aggregator,zunami-protocol,1
5357,Zyberswap AMM,zyberswap-amm,Dexes,zyberswap,1
5358,ZyberSwap Stableswap,zyberswap-stableswap,Dexes,zyberswap,0
5359,Zyberswap V3,zyberswap-v3,Dexes,zyberswap,0


In [124]:
# drop duplicates due to an ongoing data upload issue
df_all = pd.merge(
    df_protocol_tvl.drop_duplicates(),
    df_metadata.drop_duplicates(), 
    on="protocol_slug",
    how="left"
)


In [125]:
# Merge data and join alignment and token categories
df_all = pd.merge(df_all, alignment_df, on="chain", how="left")
df_all["alignment"] = df_all["alignment"].fillna("Other")
df_all = pd.merge(df_all, token_categories, on="token", how="left")
df_all["token_category"] = df_all["token_category"].fillna("Other")


In [126]:
# Chain level misrepresented tokens
df_misrep = (
    df_all[df_all.dt == df_all["dt"].max()-pd.Timedelta(days=1)]
    [["protocol_slug", "chain", "misrepresented_tokens", "token"]]
    .groupby(["protocol_slug", "chain", "misrepresented_tokens"])
    .agg(
        token_count=("token", "nunique"),
        has_usdt=("token", lambda x: 1 if "USDT" in x.values else 0)
    )
    .reset_index()
)

df_misrep["chain_misrepresented_tokens"] = (
    (df_misrep["misrepresented_tokens"] == 1) 
    & (df_misrep["token_count"] == 1) 
    & (df_misrep["has_usdt"] == 1)
).astype(int)

df_all = pd.merge(
    df_all, 
    df_misrep[["protocol_slug", "chain", "chain_misrepresented_tokens"]], 
    on=["protocol_slug", "chain"],
    how="left"
)

In [127]:
# remove protocols and chains

def matches_filter_pattern(s):
    return any(re.search(pattern, s, re.IGNORECASE) for pattern in PATTERNS_TO_FILTER)

df_all["chain"] = df_all["chain"].astype(str)

df_chain_protocol = df_all[["chain", "protocol_slug", "protocol_category"]].drop_duplicates()

df_chain_protocol["protocol_filters"] = (
    df_chain_protocol["chain"].apply(matches_filter_pattern)
    | (df_chain_protocol["protocol_slug"] == "polygon-bridge-&-staking")
    | df_chain_protocol["protocol_slug"].str.endswith("-cex")
    | df_chain_protocol.protocol_category.isin(CATEGORIES_TO_FILTER)
).astype(int)

# small subset for analysis, actual logic will include more (all?) chains
# df_chain_protocol["chains_to_keep"] = (
#     # (df_all.alignment.isin(["OP Chain", "OP Stack Fork"]) |
#      df_all.chain.isin(["Ethereum", "Base", "Optimism", "Arbitrum", "Solana", "Polygon", "Sui", "Binance", "Avalanche", "Tron"])
#     ).astype(int)

# filter_mask = (df_chain_protocol.protocol_filters == 0) & (df_chain_protocol.chains_to_keep == 1)
filter_mask = (df_chain_protocol.protocol_filters == 0)

df_filtered = pd.merge(
    df_all,
    df_chain_protocol[filter_mask][["chain", "protocol_slug", "protocol_category"]],
    on=["chain", "protocol_slug", "protocol_category"],
    how="inner",
)



In [128]:
# misc data processing
df_filtered["dt"] = pd.to_datetime(df_filtered["dt"])
df_filtered["parent_protocol"] = df_filtered["parent_protocol"].str.replace("parent#", "")
df_filtered["token"] = df_filtered["token"].str.upper()
df_filtered["token_category"] = df_filtered["token_category"].fillna("Other")

df_filtered["token_category_misrep"] = np.where(
    (df_filtered.chain_misrepresented_tokens == 1),
    "Misrepresented TVL", 
    df_filtered.token_category
)

In [129]:
df_filtered["protocol_category_mapped"] = df_filtered["protocol_category"].map(mapping, na_action="ignore")
df_filtered.loc[df_filtered["protocol_category_mapped"].isna(), "protocol_category_mapped"] = df_filtered["protocol_category"]


In [130]:
df_filtered

Unnamed: 0,dt,protocol_slug,chain,token,app_token_tvl,app_token_tvl_usd,protocol_name,protocol_category,parent_protocol,misrepresented_tokens,alignment,token_category,chain_misrepresented_tokens,token_category_misrep,protocol_category_mapped
0,2025-01-07,0.exchange,Avalanche,WAVAX,2383.305,104341.087,0.exchange,Dexes,0.exchange,0,Other,Other,0.000,Other,Dexes
1,2025-01-07,0.exchange,Binance,BUSD,5170.337,5193.611,0.exchange,Dexes,0.exchange,0,Other,Other,0.000,Other,Dexes
2,2025-01-07,0.exchange,Binance,WBNB,41.540,30328.799,0.exchange,Dexes,0.exchange,0,Other,Other,0.000,Other,Dexes
3,2025-01-07,0.exchange,Polygon,WPOL,2497.214,1315.619,0.exchange,Dexes,0.exchange,0,Other,Other,0.000,Other,Dexes
4,2025-01-07,01,Solana,ETH,0.134,492.525,01,Derivatives,01,0,Other,Native Asset,0.000,Native Asset,Derivatives
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
296609,2025-01-10,zyberswap-v3,Optimism,USDC,0.000,0.000,Zyberswap V3,Dexes,zyberswap,0,OP Chain,Stablecoins,0.000,Stablecoins,Dexes
296610,2025-01-10,zyberswap-v3,Optimism,USDC.E,256.522,256.511,Zyberswap V3,Dexes,zyberswap,0,OP Chain,Other,0.000,Other,Dexes
296611,2025-01-10,zyberswap-v3,Optimism,USDT,101.071,101.052,Zyberswap V3,Dexes,zyberswap,0,OP Chain,Stablecoins,0.000,Stablecoins,Dexes
296612,2025-01-10,zyberswap-v3,Optimism,WBTC,0.000,0.001,Zyberswap V3,Dexes,zyberswap,0,OP Chain,Wrapped Assets,0.000,Wrapped Assets,Dexes


In [131]:
df_filtered[
    (df_filtered.chain == "Base")
    & (df_filtered.dt == "2025-01-08")


]



Unnamed: 0,dt,protocol_slug,chain,token,app_token_tvl,app_token_tvl_usd,protocol_name,protocol_category,parent_protocol,misrepresented_tokens,alignment,token_category,chain_misrepresented_tokens,token_category_misrep,protocol_category_mapped
75740,2025-01-08,9mm-v3,Base,AERO,0.000,0.000,9mm V3,Dexes,9mm,0,OP Chain,Other,0.000,Other,Dexes
75741,2025-01-08,9mm-v3,Base,AYB,44609400.908,16.048,9mm V3,Dexes,9mm,0,OP Chain,Other,0.000,Other,Dexes
75742,2025-01-08,9mm-v3,Base,BASE,39628.814,0.327,9mm V3,Dexes,9mm,0,OP Chain,Other,0.000,Other,Dexes
75743,2025-01-08,9mm-v3,Base,BASEAI,0.000,0.000,9mm V3,Dexes,9mm,0,OP Chain,Other,0.000,Other,Dexes
75744,2025-01-08,9mm-v3,Base,BASED,7934.199,3.716,9mm V3,Dexes,9mm,0,OP Chain,Other,0.000,Other,Dexes
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
150278,2025-01-08,zerolend,Base,USDZ,9649.862,9373.287,ZeroLend,Lending,zerolend,0,OP Chain,Stablecoins,0.000,Stablecoins,Lending
150279,2025-01-08,zerolend,Base,WETH,19.111,64602.438,ZeroLend,Lending,zerolend,0,OP Chain,Native Asset,0.000,Native Asset,Lending
150280,2025-01-08,zerolend,Base,WSUPEROETHB,26.113,91771.074,ZeroLend,Lending,zerolend,0,OP Chain,Liquid Staking,0.000,Liquid Staking,Lending
150281,2025-01-08,zerolend,Base,XUSDZ,883861.766,874114.538,ZeroLend,Lending,zerolend,0,OP Chain,Other,0.000,Other,Lending


In [132]:
from op_analytics.datapipeline.models.code.defillama_filter_logic import process_defillama_data

In [145]:
polars_df = process_defillama_data()

[2m2025-01-10 14:42:20[0m [[32m[1minfo     [0m] [1mReading data from 'defillama/protocols_token_tvl_v1' with filters min_date=2025-01-07, max_date=None, date_range_spec=None[0m [36mfilename[0m=[35mdailydata.py[0m [36mlineno[0m=[35m174[0m [36mprocess[0m=[35m77273[0m
[2m2025-01-10 14:42:20[0m [[32m[1minfo     [0m] [1mquerying markers for 'defillama/protocols_token_tvl_v1' DateFilter(min_date=datetime.date(2025, 1, 7), max_date=None, datevals=None)[0m [36mfilename[0m=[35mdailydata.py[0m [36mlineno[0m=[35m101[0m [36mprocess[0m=[35m77273[0m
[2m2025-01-10 14:42:20[0m [[32m[1minfo     [0m] [1m14 markers found              [0m [36mfilename[0m=[35mdailydata.py[0m [36mlineno[0m=[35m115[0m [36mmax_dt[0m=[35m2025-01-10[0m [36mmin_dt[0m=[35m2025-01-07[0m [36mprocess[0m=[35m77273[0m
[2m2025-01-10 14:42:20[0m [[32m[1minfo     [0m] [1m4 distinct paths              [0m [36mfilename[0m=[35mdailydata.py[0m [36mlineno[0m=[35m12

In [146]:
df_tvl_breakdown = polars_df.df_tvl_breakdown.to_pandas()

In [148]:
df_filtered = df_filtered.rename(columns={"chain_misrepresented_tokens": "protocol_misrepresented_tokens"})

In [168]:
del df_filtered["protocol_category_mapped"]

In [195]:
# eval_cols = [
#     "protocol_name",
#     "protocol_slug", 
#     "protocol_category",
#     "parent_protocol", 
#     "misrepresented_tokens",
#     "dt",
#     "chain",
#     "token",
#     "alignment",
#     "token_category",
#     "token_category_misrep"
# ]

# merged = pd.merge(
#     df_tvl_breakdown,
#     df_filtered,
    
#     on=eval_cols,
#     how="left",
#      suffixes=("_polars", "_pandas")

    
# )


In [190]:
df_filtered = df_filtered.sort_values(by=list(df_filtered.columns)).reset_index(drop=True)
df_tvl_breakdown = df_tvl_breakdown.sort_values(by=list(df_tvl_breakdown.columns)).reset_index(drop=True)

In [193]:
df_diff = df_filtered.compare(df_tvl_breakdown)

In [194]:
df_diff

In [16]:
# df_filtered.loc[(df_filtered.chains_to_keep == 0), "chain"] = "Other"

In [17]:
# df_filtered["chain_list"] = np.where((df_filtered.chains_to_keep == 1), df_filtered["chain"], "Other")

In [27]:
(df_filtered[
    (df_filtered.dt == "2024-12-01")
 & (df_filtered.chain.isin(["Base", "Optimism", "Mode", "Solana", "Arbitrum", "Sui", "Polygon"]))
 # & (df_filtered.parent_protocol != "hyperliquid-bridge")
 & (df_filtered.protocol_category_mapped.isin(["Dexes", "Other Trading", "Bridge"]))
    ].groupby(["chain"]).agg({"app_token_tvl_usd": "sum"})
 .reset_index()
).app_token_tvl_usd / (df_filtered[
    (df_filtered.dt == "2024-12-01")
 & (df_filtered.chain.isin(["Base", "Optimism", "Mode", "Solana", "Arbitrum", "Sui", "Polygon"]))
 # & (df_filtered.parent_protocol != "hyperliquid-bridge")
 & (df_filtered.protocol_category_mapped.isin(["Dexes", "Other Trading", "Bridge"]))
    ].groupby(["chain"]).agg({"app_token_tvl_usd": "sum"})
 .reset_index()
).app_token_tvl_usd.sum()

0   0.238
1   0.243
2   0.004
3   0.026
4   0.072
5   0.368
6   0.048
Name: app_token_tvl_usd, dtype: float64

In [20]:
(df_filtered[
    (df_filtered.dt == "2024-12-01")
 & (df_filtered.chain.isin(["Base", "Optimism", "Mode", "Solana", "Arbitrum", "Sui", "Polygon"]))
 & (df_filtered.parent_protocol != "hyperliquid-bridge")
 & (df_filtered.protocol_category_mapped.isin(["Dexes", "Other Trading", "Bridges"]))
    ].groupby(["chain"]).agg({"app_token_tvl_usd": "sum"})
 .reset_index()
)

Unnamed: 0,chain,app_token_tvl_usd


In [41]:
(df_filtered[
    (df_filtered.dt == "2024-12-17")
 & (df_filtered.chain.isin(["Base", "Optimism", "Mode", "Solana", "Arbitrum", "Sui", "Polygon"]))
 # & (df_filtered.protocol_category != "bridge")
 & (df_filtered.protocol_category_mapped == "Dexes")
    ].groupby(["chain", "protocol_category_mapped"]).agg({"app_token_tvl_usd": "sum"})
 .reset_index()
)

Unnamed: 0,chain,protocol_category_mapped,app_token_tvl_usd
0,Arbitrum,Dexes,720462286.383
1,Base,Dexes,2503992219.176
2,Mode,Dexes,51636266.715
3,Optimism,Dexes,196826360.359
4,Polygon,Dexes,549874609.646
5,Solana,Dexes,3128994751.676
6,Sui,Dexes,498772487.454


In [27]:
(df_filtered[
    (df_filtered.dt == "2024-12-17")
 & (df_filtered.chain.isin(["Base", "Optimism", "Mode", "Solana", "Arbitrum", "Sui", "Polygon"]))
 # & (df_filtered.protocol_category != "bridge")
 & (df_filtered.parent_protocol != "hyperliquid-bridge")
  & (df_filtered.protocol_category_mapped.isin(["Trading")

    ].groupby(["chain", "protocol_category_mapped"]).agg({"app_token_tvl_usd": "sum"})
 .reset_index()
)

Unnamed: 0,chain,protocol_category_mapped,app_token_tvl_usd
0,Arbitrum,Trading,937320940.106
1,Base,Trading,2615259500.487
2,Mode,Trading,52108964.819
3,Optimism,Trading,276903353.16
4,Polygon,Trading,760481074.986
5,Solana,Trading,3358823176.915
6,Sui,Trading,518573388.066


In [18]:
# Plot token category TVL breakdown

In [28]:
token_df = (df_filtered[
    (df_filtered.dt == "2024-12-01")
 & (df_filtered.chain.isin(["Base", "Optimism", "Mode", "Solana", "Arbitrum", "Sui", "Polygon"]))
 & (df_filtered.parent_protocol != "hyperliquid-bridge")
    ].groupby(["chain", "token_category_misrep"]).agg({"app_token_tvl_usd": "sum"})
 .reset_index()
)

In [29]:
token_df

Unnamed: 0,chain,token_category_misrep,app_token_tvl_usd
0,Arbitrum,Liquid Restaking,605152548.266
1,Arbitrum,Liquid Staking,314468163.996
2,Arbitrum,Misrepresented TVL,190526957.531
3,Arbitrum,Native Asset,590499331.821
4,Arbitrum,Other,970543161.039
5,Arbitrum,Stablecoins,593552659.665
6,Arbitrum,Wrapped Assets,715707513.573
7,Base,Liquid Restaking,212133537.763
8,Base,Liquid Staking,861815599.066
9,Base,Misrepresented TVL,188497866.454


In [30]:
token_df[token_df.token_category_misrep == "Stablecoins"].app_token_tvl_usd.sum()

np.float64(3296418946.2979503)

In [31]:
token_df[token_df.token_category_misrep == "Stablecoins"].app_token_tvl_usd / token_df[token_df.token_category_misrep == "Stablecoins"].app_token_tvl_usd.sum()

5    0.180
12   0.330
18   0.001
25   0.045
31   0.053
38   0.346
42   0.044
Name: app_token_tvl_usd, dtype: float64

In [32]:
token_df[token_df.token_category_misrep == "Stablecoins"]

Unnamed: 0,chain,token_category_misrep,app_token_tvl_usd
5,Arbitrum,Stablecoins,593552659.665
12,Base,Stablecoins,1088042464.117
18,Mode,Stablecoins,4854630.004
25,Optimism,Stablecoins,149664390.7
31,Polygon,Stablecoins,174435409.704
38,Solana,Stablecoins,1140313227.652
42,Sui,Stablecoins,145556164.455


In [33]:
# token_category_order = [
#     "Native Asset", 
#     "Liquid Staking", 
#     "Liquid Restaking", 
#     "Stablecoins", 
#     "Wrapped Assets", 
#     "Other", 
#     "Misrepresented TVL"
# ]

# fig = px.bar(
#     token_df,
#     x="chain",
#     y="percentage",
#     color="token_category_misrep",
#     title="Token Category Breakdown by Chain (Percentage)",
#     labels={
#         "percentage": "Percentage (%)",
#         "chain": "Chain",
#         "token_category_misrep": "Token Category"
#     },
#     category_orders={
#         "chain": order,
#         "token_category_misrep": token_category_order
#     },
#     barmode="stack"
# )

# fig.update_layout(
#     template="plotly_white",
#     xaxis_title="Chain",
#     yaxis_title="Percentage (%)",
#     legend_title_text="Token Category",
#     margin=dict(t=50, l=25, r=25, b=50),
# )

# fig.show()


In [34]:
def get_flow_df(df, target_date, agg_cols, return_tokens=False):
    target_date = pd.to_datetime(target_date)

    target_df = df.loc[
        df.dt == target_date, agg_cols + ["token", "app_token_tvl", "app_token_tvl_usd"]
    ]
    target_df_grouped = target_df.groupby(agg_cols + ["token"], as_index=False).sum()
    target_df_grouped["usd_conversion_rate"] = (
        (target_df_grouped.app_token_tvl_usd / target_df_grouped.app_token_tvl)
        .replace([float("inf"), -float("inf")], 0)
        .fillna(0)
    )

    previous_df = df.loc[
        df.dt <= target_date,
        ["dt"] + agg_cols + ["token", "app_token_tvl", "app_token_tvl_usd"],
    ]
    previous_df_grouped = previous_df.groupby(["dt"] + agg_cols + ["token"], as_index=False).sum()

    # Merge target and previous data
    df_flows = pd.merge(
        target_df_grouped,
        previous_df_grouped,
        on=agg_cols + ["token"],
        how="left",
        suffixes=("", "_previous"),
    )
    df_flows["app_token_tvl_previous"] = df_flows["app_token_tvl_previous"].fillna(0)
    df_flows["app_token_tvl_usd_previous"] = df_flows["app_token_tvl_usd_previous"].fillna(0)
    df_flows["app_token_tvl_usd_previous_adjusted"] = (
        df_flows.app_token_tvl_previous * df_flows.usd_conversion_rate
    ).fillna(0)

    # Group and aggregate
    group_cols = ["dt"] + agg_cols + ["token"] if return_tokens else ["dt"] + agg_cols
    df_flows_grouped = df_flows.groupby(group_cols, as_index=False).agg(
        app_tvl_usd_target=("app_token_tvl_usd", "sum"),
        app_tvl_usd_previous=("app_token_tvl_usd_previous", "sum"),
        app_tvl_usd_previous_adjusted=("app_token_tvl_usd_previous_adjusted", "sum"),
        app_token_count=("token", "nunique"),
    )

    # Calculate metrics
    df_flows_grouped["net_flow_usd"] = (
        df_flows_grouped.app_tvl_usd_target - df_flows_grouped.app_tvl_usd_previous_adjusted
    )
    df_flows_grouped["net_change_tvl"] = (
        df_flows_grouped.app_tvl_usd_target - df_flows_grouped.app_tvl_usd_previous
    )
    df_flows_grouped["flow_percent_change"] = (
        df_flows_grouped.net_flow_usd / df_flows_grouped.app_tvl_usd_previous_adjusted * 100
    )
    df_flows_grouped["tvl_percent_change"] = (
        df_flows_grouped.net_change_tvl / df_flows_grouped.app_tvl_usd_previous * 100
    )

    # Replace infs and fillna
    df_flows_grouped["flow_percent_change"] = (
        df_flows_grouped.flow_percent_change.replace([float("inf"), -float("inf")], 0).fillna(0) 
    )
    df_flows_grouped["tvl_percent_change"] = (
        df_flows_grouped.tvl_percent_change.replace([float("inf"), -float("inf")], 0).fillna(0) 
    )
    
    return df_flows_grouped

In [35]:
token_category_flow_df = get_flow_df(df_filtered, "2024-12-01", ["chain", "token_category_misrep"])

In [36]:
token_category_flow_df[
    (token_category_flow_df.dt == "2024-11-18")
    & (token_category_flow_df.chain == "Optimism")


]

Unnamed: 0,dt,chain,token_category_misrep,app_tvl_usd_target,app_tvl_usd_previous,app_tvl_usd_previous_adjusted,app_token_count,net_flow_usd,net_change_tvl,flow_percent_change,tvl_percent_change
22508,2024-11-18,Optimism,Liquid Restaking,48252739.452,43115926.281,52155770.212,4,-3903030.76,5136813.172,-7.483,11.914
22509,2024-11-18,Optimism,Liquid Staking,158009824.906,134996470.629,163041711.872,6,-5031886.965,23013354.278,-3.086,17.047
22510,2024-11-18,Optimism,Misrepresented TVL,161817420.437,200294687.074,200239704.254,1,-38422283.818,-38477266.638,-19.188,-19.21
22511,2024-11-18,Optimism,Native Asset,126625685.268,103692252.248,125749077.691,2,876607.576,22933433.019,0.697,22.117
22512,2024-11-18,Optimism,Other,172325242.162,150392320.445,187746061.733,258,-15420819.571,21932921.717,-8.214,14.584
22513,2024-11-18,Optimism,Stablecoins,149664390.7,123482572.143,123392916.838,10,26271473.862,26181818.557,21.291,21.203
22514,2024-11-18,Optimism,Wrapped Assets,79380579.731,74215963.423,79749559.117,1,-368979.386,5164616.308,-0.463,6.959


In [37]:
flow_df = get_flow_df(df_filtered, "2024-12-01", ["chain", "parent_protocol"])

In [38]:

def plot_top_protocols_over_time(flow_df, date_diff, chain, top_n=10):

    max_date = pd.to_datetime(flow_df["dt"].max())
    previous_date = max_date - pd.Timedelta(days=date_diff)

    chain_df = flow_df[(flow_df["chain"] == chain) ]
    n_day_df = chain_df[(chain_df["dt"] == previous_date.strftime("%Y-%m-%d"))]
    
    top_protocols = n_day_df.nlargest(top_n, "net_flow_usd")["parent_protocol"]

    top_protocols_df = chain_df[chain_df["parent_protocol"].isin(top_protocols)]

    fig = px.line(
        top_protocols_df,
        x="dt",
        y="app_tvl_usd_previous_adjusted",
        color="parent_protocol",
        title=f"Growth of Top {top_n} Protocols on {chain} in Last {date_diff} Days",
        labels={"app_tvl_usd_previous_adjusted": "Previous Adjusted TVL (USD)", "dt": "Date", "parent_protocol": "Protocol"},
    )

    fig.update_layout(
        template="plotly_white",
        xaxis_title="Date",
        yaxis_title="Target Date Adjusted TVL (USD)",
        margin=dict(t=50, l=25, r=25, b=50),
    )

    fig.show()


In [39]:
plot_top_protocols_over_time(flow_df, 30, "Base", 10)

In [40]:
# Make some treemaps

In [41]:
def plot_nested_protocol_breakdown(data, date, chain, date_diff=90):

    data["dt"] = pd.to_datetime(data["dt"])
    target_date = pd.to_datetime(date)
    previous_date = (target_date - pd.Timedelta(days=date_diff)).strftime("%Y-%m-%d")

    filtered_data = data[
        (data["dt"] == target_date) & (data["chain"] == chain) & (data["app_token_tvl_usd"] >= 10_000)
    ]

    previous_data = data[
        (data["dt"] == previous_date) & (data["chain"] == chain)
    ]

    merged_data = filtered_data.merge(
        previous_data[[ "protocol_category", "parent_protocol", "token_category", "app_token_tvl_usd"]],
        on=[ "protocol_category", "parent_protocol", "token_category"],
        suffixes=("", "_previous"),
        how="left",
    )

    merged_data["app_token_tvl_usd_previous"].fillna(0.01, inplace=True)

    merged_data["percent_change"] = (
        (merged_data["app_token_tvl_usd"] - merged_data["app_token_tvl_usd_previous"])
        / merged_data["app_token_tvl_usd_previous"]
    ) * 100

    merged_data["percent_change"] = merged_data["percent_change"].clip(lower=-500, upper=500)

    fig = px.treemap(
        merged_data,
        path=[px.Constant("Total"), "protocol_category", "parent_protocol", "token_category"],
        values="app_token_tvl_usd", 
        color="percent_change", 
        color_continuous_scale="RdBu",
        range_color=[-100, 100],
        title=f"{chain}: Token Category <> App TVL Last {date_diff} Days",
        width=800,
        height=800,
    )

    fig.update_layout(margin=dict(t=50, l=25, r=25, b=25))

    fig.show()

    return merged_data
    

In [42]:
protocol_breakdown = df_filtered.groupby(["dt", "chain", "protocol_category", "parent_protocol", "token_category_misrep"]).agg(
    {"app_token_tvl_usd": "sum"}
).reset_index().rename(columns={"token_category_misrep": "token_category"})

In [43]:
plot_df = plot_nested_protocol_breakdown(protocol_breakdown, "2024-11-20", "Solana", 30)

In [44]:
protocol_token_breakdown = df_filtered.groupby(["dt", "chain", "protocol_category", "parent_protocol", "token_category_misrep",  "token"]).agg(
    {"app_token_tvl_usd": "sum"}
).reset_index().rename(columns={"token_category_misrep": "token_category"})

In [45]:
def plot_nested_protocol_token_breakdown(data, date, chain, date_diff=90):

    data["dt"] = pd.to_datetime(data["dt"])
    target_date = pd.to_datetime(date)
    previous_date = (target_date - pd.Timedelta(days=date_diff)).strftime("%Y-%m-%d")

    filtered_data = data[
        (data["dt"] == target_date) & (data["chain"] == chain) & (data["app_token_tvl_usd"] >= 10_000)
    ]

    previous_data = data[
        (data["dt"] == previous_date) & (data["chain"] == chain)
    ]

    merged_data = filtered_data.merge(
        previous_data[[ "protocol_category", "parent_protocol", "token_category", "token", "app_token_tvl_usd"]],
        on=[ "protocol_category", "parent_protocol", "token_category", "token"],
        suffixes=("", "_previous"),
        how="left",
    )

    merged_data["app_token_tvl_usd_previous"].fillna(0.01, inplace=True)

    merged_data["percent_change"] = (
        (merged_data["app_token_tvl_usd"] - merged_data["app_token_tvl_usd_previous"])
        / merged_data["app_token_tvl_usd_previous"]
    ) * 100

    merged_data["percent_change"] = merged_data["percent_change"].clip(lower=-500, upper=500)

    fig = px.treemap(
        merged_data,
        path=[px.Constant("Total"), "protocol_category", "parent_protocol", "token_category", "token"], 
        values="app_token_tvl_usd", 
        color="percent_change",
        color_continuous_scale="RdBu",
        range_color=[-100, 100], 
        title=f"{chain}: Token Category <> App TVL Last {date_diff} Days",
        width=800,
        height=800,
    )

    fig.update_layout(margin=dict(t=50, l=25, r=25, b=25))

    fig.show()

    return merged_data

In [47]:
net_flows_stables = get_flow_df(
    df_filtered[df_filtered.token_category_misrep == "Stablecoins"],
    "2024-12-01", 
    ["chain", "protocol_category", "parent_protocol"],
    return_tokens=True
)

In [48]:


def plot_treemap_with_date_diff(df, date_diff, column_list):
    """
    Plots a treemap based on a specified date calculated using max date minus date_diff.

    Parameters:
        df (pd.DataFrame): The DataFrame containing the data.
        date_diff (int): The number of days to subtract from the max date to determine the target date.
        column_list (list): List of columns to use as treemap layers (hierarchy).
        value_col (str): Column to use for treemap values. Default is "app_tvl_usd_target".

    Returns:
        None: Displays the treemap.
    """
    # Ensure the date column is in datetime format
    df["dt"] = pd.to_datetime(df["dt"])

    # Calculate the target date
    max_date = df["dt"].max()
    target_date = max_date - pd.Timedelta(days=date_diff)

    # Filter the DataFrame for the target date
    filtered_df = df[df["dt"] == target_date]

    # Check if the filtered DataFrame is empty
    if filtered_df.empty:
        print(f"No data available for the date {target_date.strftime('%Y-%m-%d')}.")
        return

    # Create the treemap
    fig = px.treemap(
        filtered_df,
        path=column_list,  # Add the hierarchy
        values="app_tvl_usd_target",  # Values for size
        color="flow_percent_change",  # Color based on the same column
        color_continuous_scale="RdBu",
        range_color=[-200, 200],
        title=f"Treemap for {target_date.strftime('%Y-%m-%d')} (Layered by {', '.join(column_list)})",
        width=800,
        height=800,
    )

    # Customize layout
    fig.update_layout(
        margin=dict(t=50, l=25, r=25, b=25),
        template="plotly_white",
    )

    fig.show()


In [49]:
net_flows_stables = get_flow_df(
    df_filtered[(df_filtered.token_category_misrep == "Stablecoins") & (df_filtered.protocol_category != "Bridge")],
    "2024-12-15", 
    ["chain", "protocol_category", "parent_protocol"],
    return_tokens=True
)

plot_treemap_with_date_diff(
    net_flows_stables[(net_flows_stables.app_tvl_usd_target > 10_000)],
    date_diff=30,
    column_list=["chain", "protocol_category", "parent_protocol", "token"],
)

In [50]:
net_flows_stables = get_flow_df(
    df_filtered[
    (df_filtered.token_category_misrep == "Stablecoins") 
    # & (df_filtered.protocol_category != "Bridge") 
    # & (df_filtered.chain != "Ethereum")
    ],
    "2024-12-15", 
    ["chain", "protocol_category", "parent_protocol"],
    return_tokens=True
)

plot_treemap_with_date_diff(
    net_flows_stables[(net_flows_stables.app_tvl_usd_target > 10_000)],
    date_diff=30,
    column_list=["chain", "protocol_category", "parent_protocol", "token"],
)

In [255]:
net_flows_stables = get_flow_df(
    df_filtered[df_filtered.token.isin(["ETH", "WETH"])],
    "2024-12-17", 
    ["chain", "protocol_category"],
    return_tokens=True
)

plot_treemap_with_date_diff(
    net_flows_stables[(net_flows_stables.app_tvl_usd_target > 10_000)],
    date_diff=30,
    column_list=["chain", "protocol_category","token"],
)

In [252]:
df_filtered[
    (df_filtered.token == "ETHFI")
    & (df_filtered.dt == "2024-12-17")
    # & (df_filtered.parent_protocol == "ether-fi")

].sort_values(by="app_token_tvl_usd", ascending=False)

Unnamed: 0,protocol_name,protocol_slug,protocol_category,parent_protocol,misrepresented_tokens,dt,chain,token,app_token_tvl,app_token_tvl_usd,alignment,token_category,chain_misrepresented_tokens,chains_to_keep,token_category_misrep,protocol_category_mapped,chain_list,chains_filtered
2917445,Karak,karak,Restaking,subseaprotocol,0,2024-12-17,Ethereum,ETHFI,6909886.016,20176867.166,Other,Other,0.0,1,Other,Restaking/Liquid Restaking,Ethereum,Ethereum
1821867,Symbiotic,symbiotic,Restaking,symbiotic,0,2024-12-17,Ethereum,ETHFI,5676661.482,16064951.994,Other,Other,0.0,1,Other,Restaking/Liquid Restaking,Ethereum,Ethereum
2917466,Karak,karak,Restaking,subseaprotocol,0,2024-12-17,Arbitrum,ETHFI,2129564.87,6218329.421,Other,Other,0.0,1,Other,Restaking/Liquid Restaking,Arbitrum,Arbitrum
1795093,Uniswap V3,uniswap-v3,Dexes,uniswap,0,2024-12-17,Ethereum,ETHFI,579072.559,1638775.341,Other,Other,0.0,1,Other,Dexes,Ethereum,Ethereum
7131610,Arrakis V2,arrakis-v2,Liquidity manager,arrakis-finance,0,2024-12-17,Ethereum,ETHFI,298833.608,845699.109,Other,Other,0.0,1,Other,Yield,Ethereum,Ethereum
1794041,Uniswap V3,uniswap-v3,Dexes,uniswap,0,2024-12-17,Arbitrum,ETHFI,111770.117,315191.73,Other,Other,0.0,1,Other,Dexes,Arbitrum,Arbitrum
5576148,Camelot V3,camelot-v3,Dexes,camelot,0,2024-12-17,Arbitrum,ETHFI,74380.387,209752.69,Other,Other,0.0,1,Other,Dexes,Arbitrum,Arbitrum
7131644,Arrakis V2,arrakis-v2,Liquidity manager,arrakis-finance,0,2024-12-17,Arbitrum,ETHFI,63871.052,180116.366,Other,Other,0.0,1,Other,Yield,Arbitrum,Arbitrum
7386697,Curve LlamaLend,curve-llamalend,Lending,curve-finance,0,2024-12-17,Ethereum,ETHFI,19305.135,56370.994,Other,Other,0.0,1,Other,Lending,Ethereum,Ethereum
7852541,DeGate,degate,Dexes,degate,0,2024-12-17,Ethereum,ETHFI,7795.941,22764.147,Other,Other,0.0,1,Other,Dexes,Ethereum,Ethereum


In [52]:
net_flows = get_flow_df(
    df_filtered[
      (  df_filtered.chain.isin(["Optimism",]))
        & (df_filtered.protocol_category != "Bridge")
    
    ],
    "2024-12-03", 
    ["chain", "protocol_category", "parent_protocol"],
    return_tokens=True
)

plot_treemap_with_date_diff(
    net_flows[(net_flows.app_tvl_usd_target > 10_000)],
    date_diff=30,
    column_list=["chain", "protocol_category", "parent_protocol", "token"],
)

In [53]:


def plot_stacked_tvl_over_time(df, chain_name, cat_col):
    """
    Plots the percentage of Total TVL by category over time for a specified blockchain chain, with legend for only the top 10 categories by the last available date.

    Parameters:
        df (pd.DataFrame): The DataFrame containing 'date', 'chain', category column, and 'total_app_tvl_7d_avg'.
        chain_name (str): The name of the chain to filter for (e.g., 'Ethereum').
        cat_col (str): The name of the column representing categories.

    Returns:
        None: Displays a stacked bar plot.
    """
    # Filter the DataFrame for the specified chain
    df_chain = df[df["chain"] == chain_name]

    # Check if there is data for the specified chain
    if df_chain.empty:
        print(f"No data available for chain: {chain_name}")
        return

    # Group by date and category and sum the TVLs
    grouped = df_chain.groupby(["dt", cat_col]).app_token_tvl_usd.sum().reset_index()

    # Calculate the total TVL per date to find percentages
    total_per_date = grouped.groupby("dt").app_token_tvl_usd.transform("sum")

    # Calculate percentage
    grouped["percentage"] = 100 * grouped["app_token_tvl_usd"] / total_per_date

    # Determine the maximum date
    max_date = grouped["dt"].max()

    # Find the top 10 categories by percentage on the maximum date
    top_categories = (
        grouped[grouped["dt"] == max_date]
        .sort_values(by="percentage", ascending=False)
        .head(10)[cat_col]
        .tolist()
    )

    # Plot using Plotly Express
    fig = px.bar(
        grouped,
        x="dt",
        y="percentage",
        color=cat_col,
        labels={"percentage": "Percentage of Total TVL", cat_col: "Category"},
        title=f"Percentage of Total TVL by Category Over Time for {chain_name}",
        template="plotly_white",
        width=900,
        height=600,
    )
    fig.update_layout(barmode="stack", xaxis_title="Date", yaxis_title="Percentage of Total TVL")

    # Update legend to show only top 10 categories
    fig.for_each_trace(
        lambda trace: trace.update(showlegend=True if trace.name in top_categories else False)
    )

    fig.show()


# Example usage:
# plot_tvl_over_time(your_dataframe, 'Ethereum', 'parent_protocol')


In [54]:
plot_stacked_tvl_over_time(
    df_filtered,
    "Ethereum",
    "protocol_category"
    

    
)

In [55]:

def plot_stacked_tvl_over_time(df, chain_name, cat_col):
    """
    Plots the percentage of Total TVL by category over time for a specified blockchain chain, 
    with legend for only the top 10 categories by the last available date, ensuring categories
    are ordered from largest to smallest at the final date, and styling the bars to appear 
    smooth without white gaps. The title is moved closer to the graph.

    Parameters:
        df (pd.DataFrame): The DataFrame containing 'dt', 'chain', category column, 
                           and 'app_token_tvl_usd' columns.
        chain_name (str): The name of the chain to filter for (e.g., 'Ethereum').
        cat_col (str): The name of the column representing categories.

    Returns:
        None: Displays a stacked bar plot.
    """
    # Filter the DataFrame for the specified chain
    df_chain = df[df["chain"] == chain_name]

    # Check if there is data for the specified chain
    if df_chain.empty:
        print(f"No data available for chain: {chain_name}")
        return

    # Group by date and category and sum the TVLs
    grouped = df_chain.groupby(["dt", cat_col]).app_token_tvl_usd.sum().reset_index()

    # Calculate the total TVL per date to find percentages
    total_per_date = grouped.groupby("dt").app_token_tvl_usd.transform("sum")

    # Calculate percentage
    grouped["percentage"] = 100 * grouped["app_token_tvl_usd"] / total_per_date

    # Determine the maximum date
    max_date = grouped["dt"].max()

    # Find the top 10 categories by percentage on the maximum date
    top_categories = (
        grouped[grouped["dt"] == max_date]
        .sort_values(by="percentage", ascending=False)
        .head(10)[cat_col]
        .tolist()
    )

    # Create the bar plot, including category ordering
    fig = px.bar(
        grouped,
        x="dt",
        y="percentage",
        color=cat_col,
        labels={"percentage": "Percentage of Total TVL", cat_col: "Category"},
        title=f"Percentage of Total TVL by Category Over Time for {chain_name}",
        template="plotly_white",
        width=900,
        height=600,
        category_orders={cat_col: top_categories}  # Keep category ordering
    )

    # Set barmode to stack, remove gaps between bars, and remove outlines
    fig.update_layout(
        barmode="stack",
        xaxis_title="Date",
        yaxis_title="Percentage of Total TVL",
        bargap=0,
        title=dict(
            # Move title closer by adjusting the vertical position
            y=0.95,
            x=0.4,
            xanchor='center',
            yanchor='top'
        ),
        # Reduce the top margin to bring title closer to the plot area
        margin=dict(t=50)
    )

    fig.update_traces(marker_line_width=0)

    # Update legend to show only top 10 categories
    fig.for_each_trace(
        lambda trace: trace.update(showlegend=(trace.name in top_categories))
    )

    fig.show()


In [56]:
plot_stacked_tvl_over_time(
    df_filtered[
        (df_filtered.dt <= "2024-10-15")
        & (df_filtered.protocol_category != "Bridge")
    ],
    "Ethereum",
    "protocol_category"
    

    
)

In [57]:
plot_stacked_tvl_over_time(
    df_filtered[
        (df_filtered.protocol_category != "Bridge")
        & (df_filtered.token_category == "Stablecoins")
    ],
    "Ethereum",
    "token"
    

    
)

In [58]:
df_chain = df_filtered[
        (df_filtered.protocol_category != "Bridge")
        & (df_filtered.token_category == "Stablecoins")
        & (df_filtered.chain == "Ethereum")
    ]

grouped = df_chain.groupby(["dt", "token"]).app_token_tvl_usd.sum().reset_index()

# Calculate the total TVL per date to find percentages
total_per_date = grouped.groupby("dt").app_token_tvl_usd.transform("sum")

# Calculate percentage
grouped["percentage"] = 100 * grouped["app_token_tvl_usd"] / total_per_date

# Determine the maximum date
max_date = grouped["dt"].max()

# Find the top 10 categories by percentage on the maximum date
top_categories = (
    grouped[grouped["dt"] == max_date]
    .sort_values(by="percentage", ascending=False)
    .head(10)["token"]
    .tolist()
)

# Create the bar plot, including category ordering
fig = px.bar(
    grouped,
    x="dt",
    y="percentage",
    color="token",
    labels={"percentage": "Percentage of Total TVL", "token": "Stablecoin"},
    title=f"Percentage of Stablecoin TVL (locked in contracts) for Ethereum",
    template="plotly_white",
    width=900,
    height=600,
    category_orders={"token": top_categories}  # Keep category ordering
)

# Set barmode to stack, remove gaps between bars, and remove outlines
fig.update_layout(
    barmode="stack",
    xaxis_title="Date",
    yaxis_title="Percentage of Total TVL",
    bargap=0,
    title=dict(
        # Move title closer by adjusting the vertical position
        y=0.95,
        x=0.4,
        xanchor='center',
        yanchor='top'
    ),
    # Reduce the top margin to bring title closer to the plot area
    margin=dict(t=50)
)

fig.update_traces(marker_line_width=0)

# Update legend to show only top 10 categories
fig.for_each_trace(
    lambda trace: trace.update(showlegend=(trace.name in top_categories))
)

fig.show()

In [59]:
import pandas as pd
import plotly.express as px

# Define the color mapping for the specific stablecoins
color_mapping = {
    "USDE": "black",
    "SUSDE": "#6b6b6b",
    "USDT": "#26a17b",
    "USDC": "#2775ca",
    "USD0++": "forestgreen",
    "DAI": "#febe44",
    "USDS": "gold",
    "FRAX": "lightgrey",
    "CRVUSD": "purple",
    "USDZ": "pink"
}

# Ensure the stablecoins are ordered correctly
stablecoin_order = ["USDE", "SUSDE", "USDT", "USDC", "USD0++", "DAI", "USDS", "FRAX", "CRVUSD", "USDZ"]

# Filter the data for stablecoins in the specified list
df_chain = df_filtered[
    (df_filtered.protocol_category != "Bridge")
    & (df_filtered.token_category == "Stablecoins")
    & (df_filtered.chain == "Ethereum")
    & (df_filtered.token.isin(stablecoin_order))
    & (df_filtered.parent_protocol != "ethena")
]

# Group the data by date and token
grouped = df_chain.groupby(["dt", "token"]).app_token_tvl_usd.sum().reset_index()

# Calculate the total TVL per date to find percentages
total_per_date = grouped.groupby("dt").app_token_tvl_usd.transform("sum")

# Calculate percentage
grouped["percentage"] = 100 * grouped["app_token_tvl_usd"] / total_per_date

# Determine the maximum date
max_date = grouped["dt"].max()

# Find the top 10 categories by percentage on the maximum date
top_categories = (
    grouped[grouped["dt"] == max_date]
    .sort_values(by="percentage", ascending=False)
    .head(10)["token"]
    .tolist()
)

# Create the bar plot with custom colors and ordering
fig = px.bar(
    grouped,
    x="dt",
    y="percentage",
    color="token",
    color_discrete_map=color_mapping,
    labels={"percentage": "Percentage of Total TVL", "token": "Stablecoin"},
    title=f"Percentage of Stablecoin TVL (locked in contracts) for Ethereum",
    template="plotly_white",
    width=900,
    height=600,
    category_orders={"token": stablecoin_order}  # Specify the order for stacking
)

# Set barmode to stack, remove gaps between bars, and remove outlines
fig.update_layout(
    barmode="stack",
    xaxis_title="Date",
    yaxis_title="Percentage of Total TVL",
    bargap=0,
    title=dict(
        y=0.95,
        x=0.4,
        xanchor='center',
        yanchor='top'
    ),
    margin=dict(t=50),
    legend=dict(
        title="Stablecoin",
        yanchor="top",
        y=1.05,
        xanchor="left",
        x=1.02
    )
)

# Remove marker outlines
fig.update_traces(marker_line_width=0)

# Show the plot
fig.show()


In [60]:
df_filtered[
(df_filtered.token == "SUSDE")
& (df_filtered.chain == "Ethereum")
& (df_filtered.dt == "2024-12-15")

].sort_values(by="app_token_tvl_usd", ascending=False)

Unnamed: 0,protocol_name,protocol_slug,protocol_category,parent_protocol,misrepresented_tokens,dt,chain,token,app_token_tvl,app_token_tvl_usd,alignment,token_category,chain_misrepresented_tokens,token_category_misrep,protocol_category_mapped
33914,AAVE V3,aave-v3,Lending,aave,0,2024-12-15,Ethereum,SUSDE,1040000000.0,1182637086.045,Other,Stablecoins,0.0,Stablecoins,Lending
1486007,Morpho Blue,morpho-blue,Lending,morpho,0,2024-12-15,Ethereum,SUSDE,159820051.552,181739538.518,Other,Stablecoins,0.0,Stablecoins,Lending
1489147,Symbiotic,symbiotic,Restaking,symbiotic,0,2024-12-15,Ethereum,SUSDE,91004933.344,103486354.986,Other,Stablecoins,0.0,Stablecoins,Restaking/Liquid Restaking
2501333,Fluid Lending,fluid-lending,Lending,fluid,0,2024-12-15,Ethereum,SUSDE,66179186.594,75255731.145,Other,Stablecoins,0.0,Stablecoins,Lending
4923946,Contango V2,contango-v2,Derivatives,contango,0,2024-12-15,Ethereum,SUSDE,48244419.264,54861191.746,Other,Stablecoins,0.0,Stablecoins,Derivatives
4060249,Derive V2,derive-v2,Derivatives,lyra,0,2024-12-15,Ethereum,SUSDE,35961447.556,40893597.642,Other,Stablecoins,0.0,Stablecoins,Derivatives
2456003,Mellow LRT,mellow-lrt,Liquid Restaking,mellow-protocol,0,2024-12-15,Ethereum,SUSDE,30817180.821,35043789.352,Other,Stablecoins,0.0,Stablecoins,Restaking/Liquid Restaking
4076460,Upshift,upshift,Managed Token Pools,upshift,0,2024-12-15,Ethereum,SUSDE,27471435.548,31239171.621,Other,Stablecoins,0.0,Stablecoins,Other Trading
1992379,Curve DEX,curve-dex,Dexes,curve-finance,0,2024-12-15,Ethereum,SUSDE,25768885.114,29303114.617,Other,Stablecoins,0.0,Stablecoins,Dexes
4047117,Gearbox,gearbox,Leveraged Farming,gearbox,1,2024-12-15,Ethereum,SUSDE,21129011.723,24026290.707,Other,Stablecoins,0.0,Stablecoins,Yield


In [61]:
plot_stacked_tvl_over_time(
    df_filtered[
        (df_filtered.dt <= "2024-10-15")
        & (df_filtered.protocol_category != "Bridge")
    ],
    "Ethereum",
    "protocol_category"
    

    
)

In [62]:
todays_date = "2024-12-01"
exclude_categories = ["Chain", "CEX", "Infrastructure", "Staking Pool", "Liquid Staking", "RWA", "CeDeFi", "Basis Trading"]

today_df = df_filtered[(df_filtered.dt == todays_date) & (df_filtered.protocol_category != "Bridge")]

op_chain_totals = today_df[today_df["alignment"] == "OP Chain"].groupby("token_category")["app_token_tvl_usd"].sum()

# Convert the result to a DataFrame and set 'Superchain' as the index name
op_chain_row = pd.DataFrame(op_chain_totals).transpose()
op_chain_row.index = pd.Index(["Superchain"], name="chain")  # Set 'Superchain' as the index

# Add a 'Grand Total' column by summing across all token categories
op_chain_row["Grand Total"] = op_chain_row.sum(axis=1)


today_tvl_pivot = today_df[(~today_df.protocol_category.isin(exclude_categories))].pivot_table(
    values="app_token_tvl_usd",
    index="chain",
    columns="token_category",
    aggfunc="sum",
    margins=True,  # Adds a grand total row and column
    margins_name="Grand Total"  # Name of the grand total column and row
)


In [63]:
import plotly.graph_objects as go

In [64]:

# Define the list of chains to include in the plot
chains_to_include = ["Solana", "Arbitrum", "Base", "Optimism", "Mode", "Sui", "Polygon"]

# Reset the index of `today_tvl_pivot` to make `chain` a column, then filter
pivot_table_reset = today_tvl_pivot.reset_index().sort_values(by="Grand Total", ascending=False)
pivot_filtered = pivot_table_reset.query("chain in @chains_to_include")

# Filter columns to include only the categories of interest
categories_to_plot = ["Native Asset", "Liquid Staking", "Liquid Restaking", "Stablecoins", "Wrapped Assets", "Other"]
pivot_filtered = pivot_filtered.set_index("chain")[categories_to_plot]

# Create the stacked bar plot
fig = go.Figure()

# Add a bar for each category to the stacked bar plot
for category in categories_to_plot:
    fig.add_trace(
        go.Bar(
            x=pivot_filtered.index,
            y=pivot_filtered[category],
            name=category
        )
    )

# Update layout for clarity and aesthetics
fig.update_layout(
    barmode="stack",  # Stacked bars
    title="USD Value by Chain and Token Category",
    xaxis_title="Chain",
    yaxis_title="Total USD Value",
    width=900,
    height=600,
    legend_title="Token Category",
    # yaxis_type="log",
    template="plotly_white"
)

# Show the plot
# fig.update_axes(type="log")
fig.show()


In [65]:


def plot_stablecoin_tokens_by_chain(df, date, token_category="Stablecoins", exclude_protocol="Bridge", min_tvl=100000):
    """
    Plots a stacked bar chart of total TVL for each token in a given token category 
    across multiple chains on a specific date, excluding a specified protocol category.
    Only includes tokens with TVL >= min_tvl. The chains are ordered by total TVL,
    and tokens are ordered by their global aggregate TVL across all chains.

    Parameters:
        df (pd.DataFrame): DataFrame containing:
                           - 'dt' (datetime): dates
                           - 'chain' (str): chain name
                           - 'token_category_misrep' (str): token category label
                           - 'protocol_category' (str): protocol category label
                           - 'token' (str): token identifier
                           - 'app_token_tvl_usd' (float): raw TVL values
        date (str or datetime): The date to filter on (e.g., '2024-12-01').
        token_category (str): The token category to filter for (default: 'Stablecoins').
        exclude_protocol (str): The protocol category to exclude (default: 'Bridge').
        min_tvl (float): Minimum TVL threshold for tokens to be included (default: 100000).

    Returns:
        None: Displays a stacked bar chart.
    """
    # Filter the DataFrame based on the provided conditions
    df_filtered = df[
        (df.token_category_misrep == token_category) &
        (df.protocol_category != exclude_protocol) &
        (df.dt == pd.to_datetime(date))
    ]

    if df_filtered.empty:
        print(f"No data available for the given filters on {date}.")
        return

    # Group by chain and token, summing the TVL
    grouped = df_filtered.groupby(["chain", "token"]).app_token_tvl_usd.sum().reset_index()

    # Exclude tokens with total TVL under the specified threshold
    grouped = grouped[grouped["app_token_tvl_usd"] >= min_tvl]
    if grouped.empty:
        print(f"No tokens meet the minimum TVL requirement of {min_tvl} USD on {date}.")
        return

    # Determine the order of chains from largest to smallest total TVL
    chain_totals = (
        grouped.groupby("chain")
        .app_token_tvl_usd.sum()
        .reset_index()
        .sort_values("app_token_tvl_usd", ascending=False)
    )
    chain_order = chain_totals["chain"].tolist()

    # Order tokens by their global aggregate TVL across all chains
    token_totals = (
        grouped.groupby("token")
        .app_token_tvl_usd.sum()
        .reset_index()
        .sort_values("app_token_tvl_usd", ascending=False)
    )
    token_order = token_totals["token"].tolist()

    # Create the stacked bar chart
    fig = px.bar(
        grouped,
        x="chain",
        y="app_token_tvl_usd",
        color="token",
        title=f"Total {token_category} TVL by Token Across Chains on {date}",
        labels={"chain": "Chain", "app_token_tvl_usd": "TVL (USD)", "token": "Token"},
        category_orders={"chain": chain_order, "token": token_order},  # Ordering chains and tokens by global totals
        template="plotly_white",
        width=900,
        height=600
    )

    # Configure layout for a stacked appearance without gaps
    fig.update_layout(
        barmode="stack",
        bargap=0.1,
        xaxis_title="",
        yaxis_title="TVL (USD)"
    )

    fig.update_traces(marker_line_width=0)

    fig.show()

# Example usage:
# plot_stablecoin_tokens_by_chain(df_filtered, '2024-12-01')


In [66]:
plot_stablecoin_tokens_by_chain(df_filtered[
    (df_filtered.chain.isin(["Ethereum", "Solana", "Arbitrum", "Base", "Optimism", "Mode", "Sui", "Polygon"]))
],
                                "2024-12-01"
                               )

In [67]:
plot_stacked_tvl_over_time(
    df_filtered[
       (df_filtered.protocol_category != "Bridge")
        # & (df_filtered.dt <= "2024-10-15")
        & (df_filtered.token_category == "Stablecoins")
    ],
    "Ethereum",
    "token"
)

In [None]:
(df_filtered[
    (df_filtered.dt == "2024-12-01")
 & (df_filtered.chain.isin(["Base", "Optimism", "Mode", "Solana", "Arbitrum", "Sui", "Polygon"]))
 # & (df_filtered.parent_protocol != "hyperliquid-bridge")
 & (df_filtered.protocol_category_mapped.isin(["Dexes", "Other Trading", "Bridge"]))
    ].groupby(["chain"]).agg({"app_token_tvl_usd": "sum"})
 .reset_index()
).app_token_tvl_usd / (df_filtered[
    (df_filtered.dt == "2024-12-01")
 & (df_filtered.chain.isin(["Base", "Optimism", "Mode", "Solana", "Arbitrum", "Sui", "Polygon"]))
 # & (df_filtered.parent_protocol != "hyperliquid-bridge")
 & (df_filtered.protocol_category_mapped.isin(["Dexes", "Other Trading", "Bridge"]))
    ].groupby(["chain"]).agg({"app_token_tvl_usd": "sum"})
 .reset_index()
).app_token_tvl_usd.sum()

In [96]:
df_dex = (df_filtered[
    (df_filtered.dt >= "2024-01-01")
 # & (df_filtered.chain.isin(["Base", "Optimism", "Mode", "Solana", "Arbitrum", "Sui", "Polygon"]))
    & (df_filtered.protocol_category == "Dexes")
 # & (df_filtered.protocol_category_mapped.isin(["Dexes"]))
    ].groupby(["dt", "chain"]).agg({"app_token_tvl_usd": "sum"})
 .reset_index()
)

In [97]:
df_dex

Unnamed: 0,dt,chain,app_token_tvl_usd
0,2024-01-01,Arbitrum,708569848.933
1,2024-01-01,Avalanche,208142634.501
2,2024-01-01,Base,181478293.118
3,2024-01-01,Ethereum,6252826447.046
4,2024-01-01,Optimism,275570157.789
...,...,...,...
3535,2024-12-19,Other,8574723612.791
3536,2024-12-19,Polygon,59031718.476
3537,2024-12-19,Solana,3010144515.996
3538,2024-12-19,Sui,382095550.983


In [98]:
import pandas as pd
import plotly.express as px

# Calculate the total TVL per date to find percentages
total_per_date = df_dex.groupby("dt")["app_token_tvl_usd"].transform("sum")

# Calculate percentage
df_dex["percentage"] = 100 * df_dex["app_token_tvl_usd"] / total_per_date

# Apply a 3-day moving average to smooth the percentages
df_dex = df_dex.sort_values(by="dt")
df_dex["percentage_3d_avg"] = df_dex.groupby("chain")["percentage"].transform(
    lambda x: x.rolling(window=3, min_periods=1).mean()
)

# Determine the maximum date
max_date = df_dex["dt"].max()

# Sort chains by their total percentage on the maximum date, largest to smallest
chain_order = (
    df_dex[df_dex["dt"] == max_date]
    .groupby("chain")["percentage_3d_avg"]
    .sum()
    .sort_values(ascending=False)
    .index.tolist()
)

# Plot using Plotly Express with the updated chain order and 3-day moving average
fig = px.bar(
    df_dex,
    x="dt",
    y="percentage_3d_avg",
    color="chain",
    labels={"percentage_3d_avg": "Percentage of Total TVL", "chain": "Chain"},
    title="DEX Marketshare of TVL Over Time",
    template="plotly_white",
    width=900,
    height=600,
    category_orders={"chain": chain_order}  # Order chains by largest to smallest
)

# Update layout to stack bars without gaps
fig.update_layout(
    barmode="stack",
    xaxis_title="Date",
    yaxis_title="Percentage of Total TVL",
    bargap=0,  # No gap between bars
    title=dict(
        y=0.95,
        x=0.4,
        xanchor='center',
        yanchor='top'
    ),
    margin=dict(t=50),
    legend=dict(
        title="Chain",
        yanchor="top",
        y=1.05,
        xanchor="left",
        x=1.02
    )
)

# Set the opacity of the bars to 1 to ensure solid colors
fig.update_traces(marker=dict(line=dict(width=0)), opacity=1)

# Show the plot
fig.show()


In [106]:
df_dex = (df_filtered[
    (df_filtered.dt >= "2024-12-01")
    & (df_filtered.dt < "2024-12-17")

 # & (df_filtered.chain.isin(["Base", "Optimism", "Mode", "Solana", "Arbitrum", "Sui", "Polygon"]))
    & (df_filtered.protocol_category == "Dexes")
 # & (df_filtered.protocol_category_mapped.isin(["Dexes"]))
    ].groupby(["chain"]).agg({"app_token_tvl_usd": "mean"})
 .reset_index()
)

In [104]:
import pandas as pd
import plotly.express as px

# Ensure dt is in datetime format
df_dex["dt"] = pd.to_datetime(df_dex["dt"])

# Create a new 'month' column representing the month period
df_dex["month"] = df_dex["dt"].dt.to_period("M").dt.to_timestamp()

# Group by month and chain, summing app_token_tvl_usd
df_monthly = (
    df_dex.groupby(["month", "chain"], as_index=False)
    .agg({"app_token_tvl_usd": "sum"})
)

# Calculate the total TVL per month to find percentages
total_per_month = df_monthly.groupby("month")["app_token_tvl_usd"].transform("sum")

# Calculate percentage
df_monthly["percentage"] = 100 * df_monthly["app_token_tvl_usd"] / total_per_month

# Determine the latest month
max_month = df_monthly["month"].max()

# Sort chains by their total percentage on the latest month, largest to smallest
chain_order = (
    df_monthly[df_monthly["month"] == max_month]
    .groupby("chain")["percentage"]
    .sum()
    .sort_values(ascending=False)
    .index.tolist()
)

# Plot using Plotly Express with the updated chain order
fig = px.bar(
    df_monthly,
    x="month",
    y="percentage",
    color="chain",
    labels={"percentage": "Percentage of Total TVL", "chain": "Chain"},
    title="DEX Marketshare of TVL Over Time (Monthly Aggregated)",
    template="plotly_white",
    width=900,
    height=600,
    category_orders={"chain": chain_order}  # Order chains by largest to smallest
)

# Update layout to stack bars without gaps
fig.update_layout(
    barmode="stack",
    xaxis_title="Month",
    yaxis_title="Percentage of Total TVL",
    bargap=0,  # No gap between bars
    title=dict(
        y=0.95,
        x=0.4,
        xanchor='center',
        yanchor='top'
    ),
    margin=dict(t=50),
    legend=dict(
        title="Chain",
        yanchor="top",
        y=1.05,
        xanchor="left",
        x=1.02
    )
)

# Set the opacity of the bars to 1 to ensure solid colors
fig.update_traces(marker=dict(line=dict(width=0)), opacity=1)

# Show the plot
fig.show()


In [208]:
top_chains = ['Ethereum',
 'Solana',
 'Base',
 'Binance',
 'Hyperliquid',
 'Tron',
 'Pulse',
 'Arbitrum',
 'Polygon',
 'Sui',
 'Cronos',
 'Avalanche',
 'TON',
 'Cardano',
 'Mantle',
 'Aptos',
 'Blast',
 'Thorchain',
 'Optimism',
 'Ronin',
 'xDai',
 'BOB',
 'Taiko',
 'Fantom',
 'Osmosis',
 'Klaytn',
 'Starknet',
 'Sei',
 'Linea',
 'Near',
 'Stacks',
 'Bitcoin',
 'HydraDX',
 'ZKsync Era',
 'Ripple',
 'zkSync Era',
 'Elrond',
 'EOS']

In [209]:
df_filtered["chains_filtered"] = np.where(df_filtered.chain.isin(top_chains), df_filtered["chain"], "Others")

In [210]:
df_dex = (df_filtered[
    (df_filtered.dt >= "2024-01-01")
    & (df_filtered.dt < "2024-12-17")

 # & (df_filtered.chain.isin(["Base", "Optimism", "Mode", "Solana", "Arbitrum", "Sui", "Polygon"]))
    & (df_filtered.protocol_category == "Dexes")
    # & (df_filtered.chain != "Hyperliquid")
 # & (df_filtered.protocol_category_mapped.isin(["Dexes"]))
    ].groupby(["dt", "chains_filtered"]).agg({"app_token_tvl_usd": "sum"})
 .reset_index()
)

In [None]:
df_dex = (df_filtered[
    (df_filtered.dt >= "2024-01-01")
    & (df_filtered.dt < "2024-12-17")

 # & (df_filtered.chain.isin(["Base", "Optimism", "Mode", "Solana", "Arbitrum", "Sui", "Polygon"]))
    & (df_filtered. == "Dexes")
    # & (df_filtered.chain != "Hyperliquid")
 # & (df_filtered.protocol_category_mapped.isin(["Dexes"]))
    ].groupby(["dt", "chains_filtered"]).agg({"app_token_tvl_usd": "sum"})
 .reset_index()
)

In [222]:
df_dex["dt"] = pd.to_datetime(df_dex["dt"])

# Create a new 'month' column representing the month period
df_dex["month"] = df_dex["dt"].dt.to_period("M").dt.to_timestamp()

# Group by month and chain, summing app_token_tvl_usd
df_monthly = (
    df_dex.groupby(["month", "chains_filtered"], as_index=False)
    .agg({"app_token_tvl_usd": "mean"})
)

# Calculate the total TVL per month to find percentages
total_per_month = df_monthly.groupby("month")["app_token_tvl_usd"].transform("sum")

# Calculate percentage
df_monthly["percentage"] = df_monthly["app_token_tvl_usd"] / total_per_month



In [216]:
df_pivot_values = df_monthly.pivot(
    index="chains_filtered",
    columns="month",
    values="app_token_tvl_usd"
)

In [221]:
df_pivot_values.fillna(0).to_csv("dex_tvl_by_month.csv")

In [223]:
df_pivot_pct = df_monthly.pivot(
    index="chains_filtered",
    columns="month",
    values="percentage"
)

In [224]:
df_pivot_pct.fillna(0).to_csv("dex_pct_by_month.csv")

In [207]:
df_monthly[(df_monthly.month == "2024-12-01") & (df_monthly.app_token_tvl_usd > 50_000_000)].sort_values(by="app_token_tvl_usd", ascending=False).chain.to_list()

['Ethereum',
 'Solana',
 'Base',
 'Binance',
 'Hyperliquid',
 'Tron',
 'Pulse',
 'Arbitrum',
 'Polygon',
 'Sui',
 'Cronos',
 'Avalanche',
 'TON',
 'Cardano',
 'Mantle',
 'Aptos',
 'Blast',
 'Thorchain',
 'Optimism',
 'Ronin',
 'xDai',
 'BOB',
 'Taiko',
 'Fantom',
 'Osmosis',
 'Klaytn',
 'Starknet',
 'Sei',
 'Linea',
 'Near',
 'Stacks',
 'Bitcoin',
 'HydraDX',
 'ZKsync Era',
 'Ripple',
 'zkSync Era',
 'Elrond',
 'EOS']

In [185]:
top_20_chains = df_monthly[df_monthly.month == "2024-12-01"].sort_values(by="app_token_tvl_usd", ascending=False).head(20).chain.to_list()

In [191]:
df_monthly.chain.unique()

array(['Others', 'Aptos', 'Arbitrum', 'Avalanche', 'Base', 'Binance',
       'Cardano', 'Cronos', 'Ethereum', 'Mantle', 'Optimism', 'Polygon',
       'Pulse', 'Ronin', 'Solana', 'Sui', 'TON', 'Thorchain', 'Tron',
       'Blast', 'Hyperliquid'], dtype=object)

In [192]:
df_monthly[df_monthly.month == "2024-12-01"].sort_values(by="app_token_tvl_usd", ascending=False)

Unnamed: 0,month,chain,app_token_tvl_usd,percentage
2756,2024-12-01,Ethereum,8684545940.047,33.065
2889,2024-12-01,Solana,3194823131.403,12.164
2699,2024-12-01,Base,2335181983.952,8.891
2703,2024-12-01,Binance,2073465433.676,7.894
2787,2024-12-01,Hyperliquid,1261499911.652,4.803
...,...,...,...,...
2689,2024-12-01,Others,21.204,0.000
2706,2024-12-01,Others,16.504,0.000
2891,2024-12-01,Others,0.242,0.000
2944,2024-12-01,Others,0.010,0.000


In [175]:
df_filtered[
    (df_filtered.chain == "Hyperliquid")
    & (df_filtered.dt == "2024-12-17")
].groupby("protocol_category").app_token_tvl_usd.sum()

protocol_category
Derivatives    327256527.222
Dexes         3113639573.734
Name: app_token_tvl_usd, dtype: float64

In [138]:
df_dex_grouped["percentage"] = df_dex_grouped.app_token_tvl_usd / df_dex_grouped.app_token_tvl_usd.sum() * 100

In [139]:
df_dex_grouped.sort_values(by="percentage", ascending=False)

Unnamed: 0,chain,app_token_tvl_usd,percentage
4,Ethereum,8684545940.047,34.099
6,Other,6096921074.441,23.939
8,Solana,3194823131.403,12.544
2,Base,2335181983.952,9.169
3,Binance,2073465433.676,8.141
10,Tron,920027836.966,3.612
0,Arbitrum,730337288.815,2.868
7,Polygon,494576877.015,1.942
9,Sui,460826297.772,1.809
1,Avalanche,294815985.987,1.158


In [243]:
df_dex = (df_filtered[
    (df_filtered.dt == "2024-12-01")
    # & (df_filtered.dt < "2024-12-17")

 & (df_filtered.chain.isin(["Base", "Optimism", "Mode", "Solana", "Arbitrum", "Sui", "Polygon"]))
    # & (df_filtered.protocol_category_mapped == "Bridge")
    # & (df_filtered.chain != "Hyperliquid")
 & (df_filtered.protocol_category_mapped.isin(["Dexes"]))
    ].groupby(["dt", "chain"]).agg({"app_token_tvl_usd": "sum"})
 .reset_index()
)


In [244]:
df_dex["percentage"] = df_dex.app_token_tvl_usd / df_dex.app_token_tvl_usd.sum() * 100

In [245]:
df_dex

Unnamed: 0,dt,chain,app_token_tvl_usd,percentage
0,2024-12-01,Arbitrum,727525952.703,9.903
1,2024-12-01,Base,2239911078.378,30.489
2,2024-12-01,Mode,40801474.535,0.555
3,2024-12-01,Optimism,176681101.6,2.405
4,2024-12-01,Polygon,463262659.629,6.306
5,2024-12-01,Solana,3258362438.19,44.351
6,2024-12-01,Sui,440186500.75,5.992


### Bridges

In [240]:
df_bridge = (df_filtered[
    (df_filtered.dt == "2024-12-01")
    # & (df_filtered.dt < "2024-12-17")

 & (df_filtered.chain.isin(["Base", "Optimism", "Mode", "Solana", "Arbitrum", "Sui", "Polygon"]))
    & (df_filtered.protocol_category_mapped == "Bridge")
    # & (df_filtered.chain != "Hyperliquid")
 # & (df_filtered.protocol_category_mapped.isin(["Dexes"]))
    ].groupby(["dt", "chain"]).agg({"app_token_tvl_usd": "sum"})
 .reset_index()
)


In [241]:
df_bridge["percentage"] = df_bridge.app_token_tvl_usd / df_bridge.app_token_tvl_usd.sum() * 100

In [242]:
df_bridge

Unnamed: 0,dt,chain,app_token_tvl_usd,percentage
0,2024-12-01,Arbitrum,1476657994.373,76.852
1,2024-12-01,Base,66630653.405,3.468
2,2024-12-01,Mode,136104.398,0.007
3,2024-12-01,Optimism,69700318.151,3.628
4,2024-12-01,Polygon,92787342.313,4.829
5,2024-12-01,Solana,214566466.007,11.167
6,2024-12-01,Sui,955867.53,0.05


In [236]:
df_dex = (df_filtered[
    (df_filtered.dt == "2024-12-01")
    # & (df_filtered.dt < "2024-12-17")

 & (df_filtered.chain.isin(["Base", "Optimism", "Mode", "Solana", "Arbitrum", "Sui", "Polygon"]))
    # & (df_filtered.protocol_category_mapped == "Bridge")
    # & (df_filtered.chain != "Hyperliquid")
 & (df_filtered.protocol_category_mapped.isin(["Dexes"]))
    ].groupby(["dt", "chains_filtered"]).agg({"app_token_tvl_usd": "sum"})
 .reset_index()
)


In [237]:
df_dex

Unnamed: 0,dt,chains_filtered,app_token_tvl_usd
0,2024-12-01,Arbitrum,727525952.703
1,2024-12-01,Base,2239911078.378
2,2024-12-01,Optimism,176681101.6
3,2024-12-01,Others,40801474.535
4,2024-12-01,Polygon,463262659.629
5,2024-12-01,Solana,3258362438.19
6,2024-12-01,Sui,440186500.75
