In [1]:
%load_ext autoreload
%autoreload 2

import plotly.io as pio
pio.renderers.default = 'iframe'

In [2]:
import pandas as pd
import plotly.express as px
import numpy as np
import pandas as pd
import re
from datetime import timedelta
import plotly.express as px
import plotly.graph_objects as go

from op_analytics.cli.subcommands.pulls.defillama.dataaccess import DefiLlama

import urllib3
import warnings
pd.set_option('display.float_format', lambda x: '%.3f' % x)
urllib3.disable_warnings()
warnings.filterwarnings("ignore")

In [3]:
PATTERNS_TO_FILTER = [
    "-borrowed",
    "-vesting",
    "-staking",
    "-pool2",
    "-treasury",
    "-cex",
    "^treasury$",
    "^borrowed$",
    "^staking$",
    "^pool2$",
    "^pool2$",
    "polygon-bridge-&-staking",  # Added this as a full match
    ".*-cex$",  # Added this to match anything ending with -cex
]

CATEGORIES_TO_FILTER = ["CEX", "Chain"]

alignment_dict = {
    "Metis": "OP Stack fork",
    "Blast": "OP Stack fork",
    "Mantle": "OP Stack fork",
    "Zircuit": "OP Stack fork",
    "RSS3": "OP Stack fork",
    "Rollux": "OP Stack fork",
    "Ancient8": "OP Stack fork",
    "Manta": "OP Stack fork",
    "Cyber": "OP Chain",
    "Mint": "OP Chain",
    "Ham": "OP Chain",
    "Polynomial": "OP Chain",
    "Lisk": "OP Chain",
    "BOB": "OP Chain",
    "Mode": "OP Chain",
    "World Chain": "OP Chain",
    "Base": "OP Chain",
    "Kroma": "OP Chain",
    "Boba": "OP Chain",
    "Fraxtal": "OP Chain",
    "Optimism": "OP Chain",
    "Shape": "OP Chain",
    "Zora": "OP Chain"
}

alignment_df = pd.DataFrame(list(alignment_dict.items()), columns=["chain", "alignment"])

token_data = [
    {"token": "ETH", "token_category": "Native Asset"},
    {"token": "WETH", "token_category": "Native Asset"},
    {"token": "SOL", "token_category": "Native Asset"},
    {"token": "wBTC", "token_category": "Wrapped Assets"},
    {"token": "cbBTC", "token_category": "Wrapped Assets"},
    {"token": "MBTC", "token_category": "Wrapped Assets"},

    {"token": "stETH", "token_category": "Liquid Staking"},
    {"token": "wstETH", "token_category": "Liquid Staking"},
    {"token": "eETH", "token_category": "Liquid Restaking"},
    {"token": "weETH", "token_category": "Liquid Restaking"},
    {"token": "sfrxETH", "token_category": "Liquid Staking"},
    {"token": "rETH", "token_category": "Liquid Staking"},
    {"token": "mETH", "token_category": "Liquid Staking"},
    {"token": "rsETH", "token_category": "Liquid Restaking"},
    {"token": "cbETH", "token_category": "Liquid Staking"},
    {"token": "ezETH", "token_category": "Liquid Restaking"},
    {"token": "rswETH", "token_category": "Liquid Restaking"},
    {"token": "swETH", "token_category": "Liquid Staking"},
    {"token": "frxETH", "token_category": "Liquid Staking"},
    {"token": "ETHX", "token_category": "Liquid Staking"},
    {"token": "lsETH", "token_category": "Liquid Staking"},
    {"token": "oETH", "token_category": "Liquid Staking"},
    {"token": "EBTC", "token_category": "Liquid Restaking"},
    {"token": "LBTC", "token_category": "Liquid Restaking"},
    {"token": "SUPEROETHB", "token_category": "Liquid Staking"},
    {"token": "WSUPEROETHB", "token_category": "Liquid Staking"},
    {"token": "TETH", "token_category": "Liquid Staking"},
    {"token": "OSETH", "token_category": "Liquid Staking"},
    {"token": "cmETH", "token_category": "Liquid Restaking"},
    {"token": "WRSETH", "token_category": "Liquid Restaking"},
    {"token": "WEETH.BASE", "token_category": "Liquid Restaking"},
    
    {"token": "USDC", "token_category": "Stablecoins"},
    {"token": "USDT", "token_category": "Stablecoins"},
    {"token": "FDUSD", "token_category": "Stablecoins"},
    {"token": "PYUSD", "token_category": "Stablecoins"},
    {"token": "TUSD", "token_category": "Stablecoins"},
    {"token": "DAI", "token_category": "Stablecoins"},
    {"token": "USDE", "token_category": "Stablecoins"},
    {"token": "USDD", "token_category": "Stablecoins"},
    {"token": "FRAX", "token_category": "Stablecoins"},
    {"token": "EURC", "token_category": "Stablecoins"},
    {"token": "AGEUR", "token_category": "Stablecoins"},
    {"token": "USDS", "token_category": "Stablecoins"},
    {"token": "USDB", "token_category": "Stablecoins"},
    {"token": "DOLA", "token_category": "Stablecoins"},
    {"token": "SUSDE", "token_category": "Stablecoins"},
    {"token": "USD0++", "token_category": "Stablecoins"},
    {"token": "USD0", "token_category": "Stablecoins"},
    {"token": "SUSD", "token_category": "Stablecoins"},
    {"token": "CRVUSD", "token_category": "Stablecoins"},
    {"token": "USDC+", "token_category": "Stablecoins"},
    {"token": "USDZ", "token_category": "Stablecoins"},
    {"token": "STAR", "token_category": "Stablecoins"},
    {"token": "USDBC", "token_category": "Stablecoins"},
    {"token": "USD+", "token_category": "Stablecoins"},
    {"token": "CDXUSD", "token_category": "Stablecoins"},
    {"token": "HYUSD", "token_category": "Stablecoins"},
    {"token": "STAR", "token_category": "Stablecoins"},
    {"token": "EURS", "token_category": "Stablecoins"},
    {"token": "AXLEUROC", "token_category": "Stablecoins"},


    # Solana Liquid staking
    {"token": "MSOL", "token_category": "Liquid Staking"},
    {"token": "JUPSOL", "token_category": "Liquid Staking"},
    {"token": "BNSOL", "token_category": "Liquid Staking"},
    {"token": "SSOL", "token_category": "Liquid Restaking"},
    {"token": "BBSOL", "token_category": "Liquid Restaking"},
    {"token": "LAINESOL", "token_category": "Liquid Staking"},
    {"token": "STSOL", "token_category": "Liquid Staking"},
    {"token": "STRONGSOL", "token_category": "Liquid Staking"},
    {"token": "HUBSOL", "token_category": "Liquid Staking"},
    {"token": "PATHSOL", "token_category": "Liquid Staking"},
    {"token": "STEPSOL", "token_category": "Liquid Staking"},
    {"token": "EDGESOL", "token_category": "Liquid Staking"},
    {"token": "JITOSOL", "token_category": "Liquid Staking"},
    {"token": "DSOL", "token_category": "Liquid Staking"},
    {"token": "BONKSOL", "token_category": "Liquid Staking"},
    {"token": "VSOL", "token_category": "Liquid Staking"},
    {"token": "HSOL", "token_category": "Liquid Staking"},
    # {"token": "ARB", "token_category": "Layer 2 Token"},
    # {"token": "OP", "token_category": "Layer 2 Token"},
    # {"token": "MODE", "token_category": "Layer 2 Token"},
]

token_categories = pd.DataFrame(token_data)

token_categories["token"] = token_categories["token"].str.upper()


mapping = {
    "Dexes": "Trading",
    "Liquidity manager": "Yield",
    "Derivatives": "Derivatives",
    "Yield Aggregator": "Yield",
    "Indexes": "Yield",
    "Bridge": "Trading",
    "Leveraged Farming": "Yield",
    "Cross Chain": "Trading",
    "CDP": "Lending",
    "Farm": "Yield",
    "Options": "Trading",
    "DCA Tools": "Trading",
    "Services": "TradFi/Fintech",
    "Chain": "TradFi/Fintech",
    "Privacy": "TradFi/Fintech",
    "RWA": "TradFi/Fintech",
    "Payments": "TradFi/Fintech",
    "Launchpad": "TradFi/Fintech",
    "Synthetics": "Derivatives",
    "SoFi": "TradFi/Fintech",
    "Prediction Market": "Trading",
    "Token Locker": "Yield",
    "Yield Lottery": "Yield",
    "Algo-Stables": "Stablecoins",
    "DEX Aggregator": "Trading",
    "Liquid Restaking": "Restaking/Liquid Restaking",
    "Governance Incentives": "Yield",
    "Restaking": "Restaking/Liquid Restaking",
    "Liquid Staking": "Liquid Staking",
    "Uncollateralized Lending": "Lending",
    "Managed Token Pools": "Trading",
    "Insurance": "TradFi/Fintech",
    "NFT Marketplace": "Trading",
    "NFT Lending": "Lending",
    "Options Vault": "Trading",
    "NftFi": "Trading",
    "Basis Trading": "Trading",
    "Bug Bounty": "TradFi/Fintech",
    "OTC Marketplace": "Trading",
    "Reserve Currency": "Stablecoins",
    "Gaming": "Other",
    "AI Agents": "TradFi/Fintech",
    "Treasury Manager": "TradFi/Fintech",
    "CDP Manager": "Lending",
    "Decentralized Stablecoin": "Stablecoins",
    "Restaked BTC": "Restaking/Liquid Restaking",
    "RWA Lending": "Lending",
    "Staking Pool": "Staking/Liquid Staking",
    "CeDeFi": "TradFi/Fintech",
    "Staking": "Staking/Liquid Staking",
    "Oracle": "Other",
    "Ponzi": "Other",
    "Anchor BTC": "Other",
    "Decentralized BTC": "Other",
    "CEX": "Other",
    "Lending": "Lending"
}


- Pull this data fresh, should be okay to leave protocol metadata date as-is
- I would use "2024-11-30" as your latest date, we ran into a few data issues with more recent data
- Make sure your secrets are up to date, Pedro updated them on Dec 2nd to work with GCS
- There could be lingering data issues but Pedro addressed a bunch today

In [4]:
duckdb_client = DefiLlama.PROTOCOLS_TOKEN_TVL.read(min_date="2023-12-01")

df_protocol_tvl = duckdb_client.sql(
"""
SELECT
    dt,
    protocol_slug,
    chain,
    token,
    app_token_tvl,
    app_token_tvl_usd
FROM protocols_token_tvl_v1
""").to_df()

[2m2024-12-06 13:33:14[0m [[32m[1minfo     [0m] [1mloaded vault from .env file   [0m [36mfilename[0m=[35mvault.py[0m [36mlineno[0m=[35m32[0m
[2m2024-12-06 13:33:14[0m [[32m[1mdebug    [0m] [1mloaded vault: 17 items        [0m [36mfilename[0m=[35mvault.py[0m [36mlineno[0m=[35m76[0m
[2m2024-12-06 13:33:14[0m [[32m[1minfo     [0m] [1mquerying markers for 'protocols_token_tvl_v1' DateFilter(min_date=datetime.date(2023, 12, 1), max_date=None, datevals=None)[0m [36mfilename[0m=[35mdataaccess.py[0m [36mlineno[0m=[35m203[0m
[2m2024-12-06 13:33:14[0m [[32m[1mdebug    [0m] [1mconnecting to OPLABS Clickhouse client...[0m [36mfilename[0m=[35mclient.py[0m [36mlineno[0m=[35m26[0m
[2m2024-12-06 13:33:15[0m [[32m[1mdebug    [0m] [1minitialized OPLABS Clickhouse client.[0m [36mfilename[0m=[35mclient.py[0m [36mlineno[0m=[35m38[0m
[2m2024-12-06 13:33:15[0m [[32m[1minfo     [0m] [1m384 markers found             [0m [36mfile

In [7]:
duckdb_client = DefiLlama.PROTOCOLS_METADATA.read(min_date="2024-12-03")

df_metadata = duckdb_client.sql(
"""
SELECT 
    protocol_name,
    protocol_slug,
    protocol_category,
    parent_protocol,
    CASE WHEN misrepresented_tokens = 'True' THEN 1
        WHEN misrepresented_tokens = 'False' THEN 0
        ELSE 0
    END AS misrepresented_tokens
FROM protocols_metadata_v1
""").to_df()

[2m2024-12-06 13:35:09[0m [[32m[1minfo     [0m] [1mquerying markers for 'protocols_metadata_v1' DateFilter(min_date=datetime.date(2024, 12, 3), max_date=None, datevals=None)[0m [36mfilename[0m=[35mdataaccess.py[0m [36mlineno[0m=[35m203[0m
[2m2024-12-06 13:35:09[0m [[32m[1minfo     [0m] [1m1 markers found               [0m [36mfilename[0m=[35mdataaccess.py[0m [36mlineno[0m=[35m217[0m
[2m2024-12-06 13:35:09[0m [[32m[1minfo     [0m] [1m1 distinct paths              [0m [36mfilename[0m=[35mdataaccess.py[0m [36mlineno[0m=[35m223[0m
[2m2024-12-06 13:35:11[0m [[32m[1minfo     [0m] [1mregistered view 'protocols_metadata_v1' using 1 parquet paths[0m [36mfilename[0m=[35mdataaccess.py[0m [36mlineno[0m=[35m233[0m
┌────────────────────────┐
│          name          │
│        varchar         │
├────────────────────────┤
│ protocols_metadata_v1  │
│ protocols_token_tvl_v1 │
└────────────────────────┘



In [8]:
# drop duplicates due to an ongoing data upload issue
df_all = pd.merge(
    df_metadata.drop_duplicates(), 
    df_protocol_tvl.drop_duplicates(), 
    on="protocol_slug",
    how="left"
)


In [9]:
# Merge data and join alignment and token categories
df_all = pd.merge(df_all, alignment_df, on="chain", how="left")
df_all["alignment"] = df_all["alignment"].fillna("Other")
df_all = pd.merge(df_all, token_categories, on="token", how="left")
df_all["token_category"] = df_all["token_category"].fillna("Other")


In [10]:
# Chain level misrepresented tokens
df_misrep = (
    df_all[df_all.dt == df_all["dt"].max()-pd.Timedelta(days=1)]
    [["protocol_slug", "chain", "misrepresented_tokens", "token"]]
    .groupby(["protocol_slug", "chain", "misrepresented_tokens"])
    .agg(
        token_count=("token", "nunique"),
        has_usdt=("token", lambda x: 1 if "USDT" in x.values else 0)
    )
    .reset_index()
)

df_misrep["chain_misrepresented_tokens"] = (
    (df_misrep["misrepresented_tokens"] == 1) 
    & (df_misrep["token_count"] == 1) 
    & (df_misrep["has_usdt"] == 1)
).astype(int)

df_all = pd.merge(
    df_all, 
    df_misrep[["protocol_slug", "chain", "chain_misrepresented_tokens"]], 
    on=["protocol_slug", "chain"],
    how="left"
)

In [11]:
# remove protocols and chains

def matches_filter_pattern(s):
    return any(re.search(pattern, s, re.IGNORECASE) for pattern in PATTERNS_TO_FILTER)

df_all["chain"] = df_all["chain"].astype(str)

df_chain_protocol = df_all[["chain", "protocol_slug", "protocol_category"]].drop_duplicates()

df_chain_protocol["protocol_filters"] = (
    df_chain_protocol["chain"].apply(matches_filter_pattern)
    | (df_chain_protocol["protocol_slug"] == "polygon-bridge-&-staking")
    | df_chain_protocol["protocol_slug"].str.endswith("-cex")
    | df_chain_protocol.protocol_category.isin(CATEGORIES_TO_FILTER)
).astype(int)

# small subset for analysis, actual logic will include more (all?) chains
df_chain_protocol["chains_to_keep"] = (
    (df_all.alignment.isin(["OP Chain", "OP Stack Fork"]) 
    | df_all.chain.isin(["Ethereum", "Arbitrum", "Solana", "Polygon", "Sui"]))
    ).astype(int)

filter_mask = (df_chain_protocol.protocol_filters == 0) & (df_chain_protocol.chains_to_keep == 1)

df_filtered = pd.merge(
    df_all,
    df_chain_protocol[filter_mask][["chain", "protocol_slug", "protocol_category"]],
    on=["chain", "protocol_slug", "protocol_category"],
    how="inner",
)



In [12]:
# misc data processing
df_filtered["dt"] = pd.to_datetime(df_filtered["dt"])
df_filtered["parent_protocol"] = df_filtered["parent_protocol"].str.replace("parent#", "")
df_filtered["token"] = df_filtered["token"].str.upper()
df_filtered["token_category"] = df_filtered["token_category"].fillna("Other")

df_filtered["token_category_misrep"] = np.where(
    (df_filtered.chain_misrepresented_tokens == 1),
    "Misrepresented TVL", 
    df_filtered.token_category
)

In [14]:
df_filtered["protocol_category_mapped"] = df_filtered["protocol_category"].map(mapping, na_action="ignore")
df_filtered.loc[df_filtered["protocol_category_mapped"].isna(), "protocol_category_mapped"] = df_filtered["protocol_category"]


In [None]:
# Plot token category TVL breakdown

In [187]:
token_df = (df_filtered[
    (df_filtered.dt == "2024-12-01")
 & (df_filtered.chain.isin(["Base", "Optimism", "Mode", "Solana", "Arbitrum", "Sui", "Polygon"]))
 & (df_filtered.parent_protocol != "hyperliquid-bridge")
    ].groupby(["chain", "token_category_misrep"]).agg({"app_token_tvl_usd": "sum"})
 .reset_index()
)

In [192]:
token_category_order = [
    "Native Asset", 
    "Liquid Staking", 
    "Liquid Restaking", 
    "Stablecoins", 
    "Wrapped Assets", 
    "Other", 
    "Misrepresented TVL"
]

fig = px.bar(
    token_df,
    x="chain",
    y="percentage",
    color="token_category_misrep",
    title="Token Category Breakdown by Chain (Percentage)",
    labels={
        "percentage": "Percentage (%)",
        "chain": "Chain",
        "token_category_misrep": "Token Category"
    },
    category_orders={
        "chain": order,
        "token_category_misrep": token_category_order
    },
    barmode="stack"
)

fig.update_layout(
    template="plotly_white",
    xaxis_title="Chain",
    yaxis_title="Percentage (%)",
    legend_title_text="Token Category",
    margin=dict(t=50, l=25, r=25, b=50),
)

fig.show()


In [81]:
def get_flow_df(df, target_date, agg_cols, return_tokens=False):
    target_date = pd.to_datetime(target_date)

    target_df = df.loc[
        df.dt == target_date, agg_cols + ["token", "app_token_tvl", "app_token_tvl_usd"]
    ]
    target_df_grouped = target_df.groupby(agg_cols + ["token"], as_index=False).sum()
    target_df_grouped["usd_conversion_rate"] = (
        (target_df_grouped.app_token_tvl_usd / target_df_grouped.app_token_tvl)
        .replace([float("inf"), -float("inf")], 0)
        .fillna(0)
    )

    previous_df = df.loc[
        df.dt <= target_date,
        ["dt"] + agg_cols + ["token", "app_token_tvl", "app_token_tvl_usd"],
    ]
    previous_df_grouped = previous_df.groupby(["dt"] + agg_cols + ["token"], as_index=False).sum()

    # Merge target and previous data
    df_flows = pd.merge(
        target_df_grouped,
        previous_df_grouped,
        on=agg_cols + ["token"],
        how="left",
        suffixes=("", "_previous"),
    )
    df_flows["app_token_tvl_previous"] = df_flows["app_token_tvl_previous"].fillna(0)
    df_flows["app_token_tvl_usd_previous"] = df_flows["app_token_tvl_usd_previous"].fillna(0)
    df_flows["app_token_tvl_usd_previous_adjusted"] = (
        df_flows.app_token_tvl_previous * df_flows.usd_conversion_rate
    ).fillna(0)

    # Group and aggregate
    group_cols = ["dt"] + agg_cols + ["token"] if return_tokens else ["dt"] + agg_cols
    df_flows_grouped = df_flows.groupby(group_cols, as_index=False).agg(
        app_tvl_usd_target=("app_token_tvl_usd", "sum"),
        app_tvl_usd_previous=("app_token_tvl_usd_previous", "sum"),
        app_tvl_usd_previous_adjusted=("app_token_tvl_usd_previous_adjusted", "sum"),
        app_token_count=("token", "nunique"),
    )

    # Calculate metrics
    df_flows_grouped["net_flow_usd"] = (
        df_flows_grouped.app_tvl_usd_target - df_flows_grouped.app_tvl_usd_previous_adjusted
    )
    df_flows_grouped["net_change_tvl"] = (
        df_flows_grouped.app_tvl_usd_target - df_flows_grouped.app_tvl_usd_previous
    )
    df_flows_grouped["flow_percent_change"] = (
        df_flows_grouped.net_flow_usd / df_flows_grouped.app_tvl_usd_previous_adjusted * 100
    )
    df_flows_grouped["tvl_percent_change"] = (
        df_flows_grouped.net_change_tvl / df_flows_grouped.app_tvl_usd_previous * 100
    )

    # Replace infs and fillna
    df_flows_grouped["flow_percent_change"] = (
        df_flows_grouped.flow_percent_change.replace([float("inf"), -float("inf")], 0).fillna(0) 
    )
    df_flows_grouped["tvl_percent_change"] = (
        df_flows_grouped.tvl_percent_change.replace([float("inf"), -float("inf")], 0).fillna(0) 
    )
    
    return df_flows_grouped

In [82]:
flow_df = get_flow_df(df_filtered, "2024-12-01", ["chain", "parent_protocol"])

In [83]:


def plot_top_protocols_over_time(flow_df, date_diff, chain, top_n=10):

    max_date = pd.to_datetime(flow_df["dt"].max())
    previous_date = max_date - pd.Timedelta(days=date_diff)

    chain_df = flow_df[(flow_df["chain"] == chain) ]
    n_day_df = chain_df[(chain_df["dt"] == previous_date.strftime("%Y-%m-%d"))]
    
    top_protocols = n_day_df.nlargest(top_n, "net_flow_usd")["parent_protocol"]

    top_protocols_df = chain_df[chain_df["parent_protocol"].isin(top_protocols)]

    fig = px.line(
        top_protocols_df,
        x="dt",
        y="app_tvl_usd_previous_adjusted",
        color="parent_protocol",
        title=f"Growth of Top {top_n} Protocols on {chain} in Last {date_diff} Days",
        labels={"app_tvl_usd_previous_adjusted": "Previous Adjusted TVL (USD)", "dt": "Date", "parent_protocol": "Protocol"},
    )

    fig.update_layout(
        template="plotly_white",
        xaxis_title="Date",
        yaxis_title="Target Date Adjusted TVL (USD)",
        margin=dict(t=50, l=25, r=25, b=50),
    )

    fig.show()


In [158]:
plot_top_protocols_over_time(flow_df, 30, "Base", 10)

In [13]:
# Make some treemaps

In [33]:
def plot_nested_protocol_breakdown(data, date, chain, date_diff=90):

    data["dt"] = pd.to_datetime(data["dt"])
    target_date = pd.to_datetime(date)
    previous_date = (target_date - pd.Timedelta(days=date_diff)).strftime("%Y-%m-%d")

    filtered_data = data[
        (data["dt"] == target_date) & (data["chain"] == chain) & (data["app_token_tvl_usd"] >= 10_000)
    ]

    previous_data = data[
        (data["dt"] == previous_date) & (data["chain"] == chain)
    ]

    merged_data = filtered_data.merge(
        previous_data[[ "protocol_category", "parent_protocol", "token_category", "app_token_tvl_usd"]],
        on=[ "protocol_category", "parent_protocol", "token_category"],
        suffixes=("", "_previous"),
        how="left",
    )

    merged_data["app_token_tvl_usd_previous"].fillna(0.01, inplace=True)

    merged_data["percent_change"] = (
        (merged_data["app_token_tvl_usd"] - merged_data["app_token_tvl_usd_previous"])
        / merged_data["app_token_tvl_usd_previous"]
    ) * 100

    merged_data["percent_change"] = merged_data["percent_change"].clip(lower=-500, upper=500)

    fig = px.treemap(
        merged_data,
        path=[px.Constant("Total"), "protocol_category", "parent_protocol", "token_category"],
        values="app_token_tvl_usd", 
        color="percent_change", 
        color_continuous_scale="RdBu",
        range_color=[-100, 100],
        title=f"{chain}: Token Category <> App TVL Last {date_diff} Days",
        width=800,
        height=800,
    )

    fig.update_layout(margin=dict(t=50, l=25, r=25, b=25))

    fig.show()

    return merged_data
    

In [34]:
protocol_breakdown = df_filtered.groupby(["dt", "chain", "protocol_category", "parent_protocol", "token_category_misrep"]).agg(
    {"app_token_tvl_usd": "sum"}
).reset_index().rename(columns={"token_category_misrep": "token_category"})

In [35]:
plot_df = plot_nested_protocol_breakdown(protocol_breakdown, "2024-11-20", "Solana", 30)

In [22]:
protocol_token_breakdown = df_filtered.groupby(["dt", "chain", "protocol_category", "parent_protocol", "token_category_misrep",  "token"]).agg(
    {"app_token_tvl_usd": "sum"}
).reset_index().rename(columns={"token_category_misrep": "token_category"})

In [23]:
def plot_nested_protocol_token_breakdown(data, date, chain, date_diff=90):

    data["dt"] = pd.to_datetime(data["dt"])
    target_date = pd.to_datetime(date)
    previous_date = (target_date - pd.Timedelta(days=date_diff)).strftime("%Y-%m-%d")

    filtered_data = data[
        (data["dt"] == target_date) & (data["chain"] == chain) & (data["app_token_tvl_usd"] >= 10_000)
    ]

    previous_data = data[
        (data["dt"] == previous_date) & (data["chain"] == chain)
    ]

    merged_data = filtered_data.merge(
        previous_data[[ "protocol_category", "parent_protocol", "token_category", "token", "app_token_tvl_usd"]],
        on=[ "protocol_category", "parent_protocol", "token_category", "token"],
        suffixes=("", "_previous"),
        how="left",
    )

    merged_data["app_token_tvl_usd_previous"].fillna(0.01, inplace=True)

    merged_data["percent_change"] = (
        (merged_data["app_token_tvl_usd"] - merged_data["app_token_tvl_usd_previous"])
        / merged_data["app_token_tvl_usd_previous"]
    ) * 100

    merged_data["percent_change"] = merged_data["percent_change"].clip(lower=-500, upper=500)

    fig = px.treemap(
        merged_data,
        path=[px.Constant("Total"), "protocol_category", "parent_protocol", "token_category", "token"], 
        values="app_token_tvl_usd", 
        color="percent_change",
        color_continuous_scale="RdBu",
        range_color=[-100, 100], 
        title=f"{chain}: Token Category <> App TVL Last {date_diff} Days",
        width=800,
        height=800,
    )

    fig.update_layout(margin=dict(t=50, l=25, r=25, b=25))

    fig.show()

    return merged_data

In [9]:
plot_df = plot_nested_protocol_token_breakdown(protocol_token_breakdown, "2024-12-01", "Base", 7)

In [161]:
net_flows_stables = get_flow_df(
    df_filtered[df_filtered.token_category_misrep == "Stablecoins"],
    "2024-12-01", 
    ["chain", "protocol_category", "parent_protocol"],
    return_tokens=True
)

In [78]:


def plot_treemap_with_date_diff(df, date_diff, column_list):
    """
    Plots a treemap based on a specified date calculated using max date minus date_diff.

    Parameters:
        df (pd.DataFrame): The DataFrame containing the data.
        date_diff (int): The number of days to subtract from the max date to determine the target date.
        column_list (list): List of columns to use as treemap layers (hierarchy).
        value_col (str): Column to use for treemap values. Default is "app_tvl_usd_target".

    Returns:
        None: Displays the treemap.
    """
    # Ensure the date column is in datetime format
    df["dt"] = pd.to_datetime(df["dt"])

    # Calculate the target date
    max_date = df["dt"].max()
    target_date = max_date - pd.Timedelta(days=date_diff)

    # Filter the DataFrame for the target date
    filtered_df = df[df["dt"] == target_date]

    # Check if the filtered DataFrame is empty
    if filtered_df.empty:
        print(f"No data available for the date {target_date.strftime('%Y-%m-%d')}.")
        return

    # Create the treemap
    fig = px.treemap(
        filtered_df,
        path=column_list,  # Add the hierarchy
        values="app_tvl_usd_target",  # Values for size
        color="flow_percent_change",  # Color based on the same column
        color_continuous_scale="RdBu",
        range_color=[-200, 200],
        title=f"Treemap for {target_date.strftime('%Y-%m-%d')} (Layered by {', '.join(column_list)})",
        width=800,
        height=800,
    )

    # Customize layout
    fig.update_layout(
        margin=dict(t=50, l=25, r=25, b=25),
        template="plotly_white",
    )

    fig.show()


In [86]:
net_flows_stables = get_flow_df(
    df_filtered[(df_filtered.token_category_misrep == "Stablecoins") & (df_filtered.protocol_category != "Bridge")],
    "2024-12-03", 
    ["chain", "protocol_category", "parent_protocol"],
    return_tokens=True
)

plot_treemap_with_date_diff(
    net_flows_stables[(net_flows_stables.app_tvl_usd_target > 10_000)],
    date_diff=30,
    column_list=["chain", "protocol_category", "parent_protocol", "token"],
)

In [92]:
net_flows_stables = get_flow_df(
    df_filtered[
    (df_filtered.token_category_misrep == "Stablecoins") 
    & (df_filtered.protocol_category != "Bridge") 
    & (df_filtered.chain != "Ethereum")
    ],
    "2024-12-03", 
    ["chain", "protocol_category", "parent_protocol"],
    return_tokens=True
)

plot_treemap_with_date_diff(
    net_flows_stables[(net_flows_stables.app_tvl_usd_target > 10_000)],
    date_diff=30,
    column_list=["chain", "protocol_category", "parent_protocol", "token"],
)

In [90]:
net_flows_stables = get_flow_df(
    df_filtered[(df_filtered.token_category_misrep == "Native Asset") & (df_filtered.protocol_category != "Bridge")],
    "2024-12-03", 
    ["chain", "protocol_category", "parent_protocol"],
    return_tokens=True
)

plot_treemap_with_date_diff(
    net_flows_stables[(net_flows_stables.app_tvl_usd_target > 10_000)],
    date_diff=30,
    column_list=["chain", "protocol_category", "parent_protocol", "token"],
)

In [242]:
net_flows = get_flow_df(
    df_filtered[
      (  df_filtered.chain.isin(["Optimism",]))
        & (df_filtered.protocol_category != "Bridge")
    
    ],
    "2024-12-03", 
    ["chain", "protocol_category", "parent_protocol"],
    return_tokens=True
)

plot_treemap_with_date_diff(
    net_flows[(net_flows.app_tvl_usd_target > 10_000)],
    date_diff=30,
    column_list=["chain", "protocol_category", "parent_protocol", "token"],
)

In [23]:


def plot_stacked_tvl_over_time(df, chain_name, cat_col):
    """
    Plots the percentage of Total TVL by category over time for a specified blockchain chain, with legend for only the top 10 categories by the last available date.

    Parameters:
        df (pd.DataFrame): The DataFrame containing 'date', 'chain', category column, and 'total_app_tvl_7d_avg'.
        chain_name (str): The name of the chain to filter for (e.g., 'Ethereum').
        cat_col (str): The name of the column representing categories.

    Returns:
        None: Displays a stacked bar plot.
    """
    # Filter the DataFrame for the specified chain
    df_chain = df[df["chain"] == chain_name]

    # Check if there is data for the specified chain
    if df_chain.empty:
        print(f"No data available for chain: {chain_name}")
        return

    # Group by date and category and sum the TVLs
    grouped = df_chain.groupby(["dt", cat_col]).app_token_tvl_usd.sum().reset_index()

    # Calculate the total TVL per date to find percentages
    total_per_date = grouped.groupby("dt").app_token_tvl_usd.transform("sum")

    # Calculate percentage
    grouped["percentage"] = 100 * grouped["app_token_tvl_usd"] / total_per_date

    # Determine the maximum date
    max_date = grouped["dt"].max()

    # Find the top 10 categories by percentage on the maximum date
    top_categories = (
        grouped[grouped["dt"] == max_date]
        .sort_values(by="percentage", ascending=False)
        .head(10)[cat_col]
        .tolist()
    )

    # Plot using Plotly Express
    fig = px.bar(
        grouped,
        x="dt",
        y="percentage",
        color=cat_col,
        labels={"percentage": "Percentage of Total TVL", cat_col: "Category"},
        title=f"Percentage of Total TVL by Category Over Time for {chain_name}",
        template="plotly_white",
        width=900,
        height=600,
    )
    fig.update_layout(barmode="stack", xaxis_title="Date", yaxis_title="Percentage of Total TVL")

    # Update legend to show only top 10 categories
    fig.for_each_trace(
        lambda trace: trace.update(showlegend=True if trace.name in top_categories else False)
    )

    fig.show()


# Example usage:
# plot_tvl_over_time(your_dataframe, 'Ethereum', 'parent_protocol')


In [25]:
plot_stacked_tvl_over_time(
    df_filtered,
    "Ethereum",
    "protocol_category"
    

    
)

In [51]:

def plot_stacked_tvl_over_time(df, chain_name, cat_col):
    """
    Plots the percentage of Total TVL by category over time for a specified blockchain chain, 
    with legend for only the top 10 categories by the last available date, ensuring categories
    are ordered from largest to smallest at the final date, and styling the bars to appear 
    smooth without white gaps. The title is moved closer to the graph.

    Parameters:
        df (pd.DataFrame): The DataFrame containing 'dt', 'chain', category column, 
                           and 'app_token_tvl_usd' columns.
        chain_name (str): The name of the chain to filter for (e.g., 'Ethereum').
        cat_col (str): The name of the column representing categories.

    Returns:
        None: Displays a stacked bar plot.
    """
    # Filter the DataFrame for the specified chain
    df_chain = df[df["chain"] == chain_name]

    # Check if there is data for the specified chain
    if df_chain.empty:
        print(f"No data available for chain: {chain_name}")
        return

    # Group by date and category and sum the TVLs
    grouped = df_chain.groupby(["dt", cat_col]).app_token_tvl_usd.sum().reset_index()

    # Calculate the total TVL per date to find percentages
    total_per_date = grouped.groupby("dt").app_token_tvl_usd.transform("sum")

    # Calculate percentage
    grouped["percentage"] = 100 * grouped["app_token_tvl_usd"] / total_per_date

    # Determine the maximum date
    max_date = grouped["dt"].max()

    # Find the top 10 categories by percentage on the maximum date
    top_categories = (
        grouped[grouped["dt"] == max_date]
        .sort_values(by="percentage", ascending=False)
        .head(10)[cat_col]
        .tolist()
    )

    # Create the bar plot, including category ordering
    fig = px.bar(
        grouped,
        x="dt",
        y="percentage",
        color=cat_col,
        labels={"percentage": "Percentage of Total TVL", cat_col: "Category"},
        title=f"Percentage of Total TVL by Category Over Time for {chain_name}",
        template="plotly_white",
        width=900,
        height=600,
        category_orders={cat_col: top_categories}  # Keep category ordering
    )

    # Set barmode to stack, remove gaps between bars, and remove outlines
    fig.update_layout(
        barmode="stack",
        xaxis_title="Date",
        yaxis_title="Percentage of Total TVL",
        bargap=0,
        title=dict(
            # Move title closer by adjusting the vertical position
            y=0.95,
            x=0.4,
            xanchor='center',
            yanchor='top'
        ),
        # Reduce the top margin to bring title closer to the plot area
        margin=dict(t=50)
    )

    fig.update_traces(marker_line_width=0)

    # Update legend to show only top 10 categories
    fig.for_each_trace(
        lambda trace: trace.update(showlegend=(trace.name in top_categories))
    )

    fig.show()


In [52]:
plot_stacked_tvl_over_time(
    df_filtered[
        (df_filtered.dt <= "2024-10-15")
        & (df_filtered.protocol_category != "Bridge")
    ],
    "Ethereum",
    "protocol_category"
    

    
)

In [54]:
plot_stacked_tvl_over_time(
    df_filtered[
        (df_filtered.dt <= "2024-10-15")
        & (df_filtered.protocol_category != "Bridge")
    ],
    "Arbitrum",
    "protocol_category"
    

    
)

In [56]:
plot_stacked_tvl_over_time(
    df_filtered[
        (df_filtered.dt <= "2024-10-15")
        & (df_filtered.protocol_category != "Bridge")
    ],
    "Base",
    "protocol_category"
    

    
)

In [74]:
todays_date = "2024-12-01"
exclude_categories = ["Chain", "CEX", "Infrastructure", "Staking Pool", "Liquid Staking", "RWA", "CeDeFi", "Basis Trading"]

today_df = df_filtered[(df_filtered.dt == todays_date) & (df_filtered.protocol_category != "Bridge")]

op_chain_totals = today_df[today_df["alignment"] == "OP Chain"].groupby("token_category")["app_token_tvl_usd"].sum()

# Convert the result to a DataFrame and set 'Superchain' as the index name
op_chain_row = pd.DataFrame(op_chain_totals).transpose()
op_chain_row.index = pd.Index(["Superchain"], name="chain")  # Set 'Superchain' as the index

# Add a 'Grand Total' column by summing across all token categories
op_chain_row["Grand Total"] = op_chain_row.sum(axis=1)


today_tvl_pivot = today_df[(~today_df.protocol_category.isin(exclude_categories))].pivot_table(
    values="app_token_tvl_usd",
    index="chain",
    columns="token_category",
    aggfunc="sum",
    margins=True,  # Adds a grand total row and column
    margins_name="Grand Total"  # Name of the grand total column and row
)


In [76]:

# Define the list of chains to include in the plot
chains_to_include = ["Solana", "Arbitrum", "Base", "Optimism", "Mode", "Sui", "Polygon"]

# Reset the index of `today_tvl_pivot` to make `chain` a column, then filter
pivot_table_reset = today_tvl_pivot.reset_index().sort_values(by="Grand Total", ascending=False)
pivot_filtered = pivot_table_reset.query("chain in @chains_to_include")

# Filter columns to include only the categories of interest
categories_to_plot = ["Native Asset", "Liquid Staking", "Liquid Restaking", "Stablecoins", "Wrapped Assets", "Other"]
pivot_filtered = pivot_filtered.set_index("chain")[categories_to_plot]

# Create the stacked bar plot
fig = go.Figure()

# Add a bar for each category to the stacked bar plot
for category in categories_to_plot:
    fig.add_trace(
        go.Bar(
            x=pivot_filtered.index,
            y=pivot_filtered[category],
            name=category
        )
    )

# Update layout for clarity and aesthetics
fig.update_layout(
    barmode="stack",  # Stacked bars
    title="USD Value by Chain and Token Category",
    xaxis_title="Chain",
    yaxis_title="Total USD Value",
    width=900,
    height=600,
    legend_title="Token Category",
    # yaxis_type="log",
    template="plotly_white"
)

# Show the plot
# fig.update_axes(type="log")
fig.show()


In [116]:


def plot_stablecoin_tokens_by_chain(df, date, token_category="Stablecoins", exclude_protocol="Bridge", min_tvl=100000):
    """
    Plots a stacked bar chart of total TVL for each token in a given token category 
    across multiple chains on a specific date, excluding a specified protocol category.
    Only includes tokens with TVL >= min_tvl. The chains are ordered by total TVL,
    and tokens are ordered by their global aggregate TVL across all chains.

    Parameters:
        df (pd.DataFrame): DataFrame containing:
                           - 'dt' (datetime): dates
                           - 'chain' (str): chain name
                           - 'token_category_misrep' (str): token category label
                           - 'protocol_category' (str): protocol category label
                           - 'token' (str): token identifier
                           - 'app_token_tvl_usd' (float): raw TVL values
        date (str or datetime): The date to filter on (e.g., '2024-12-01').
        token_category (str): The token category to filter for (default: 'Stablecoins').
        exclude_protocol (str): The protocol category to exclude (default: 'Bridge').
        min_tvl (float): Minimum TVL threshold for tokens to be included (default: 100000).

    Returns:
        None: Displays a stacked bar chart.
    """
    # Filter the DataFrame based on the provided conditions
    df_filtered = df[
        (df.token_category_misrep == token_category) &
        (df.protocol_category != exclude_protocol) &
        (df.dt == pd.to_datetime(date))
    ]

    if df_filtered.empty:
        print(f"No data available for the given filters on {date}.")
        return

    # Group by chain and token, summing the TVL
    grouped = df_filtered.groupby(["chain", "token"]).app_token_tvl_usd.sum().reset_index()

    # Exclude tokens with total TVL under the specified threshold
    grouped = grouped[grouped["app_token_tvl_usd"] >= min_tvl]
    if grouped.empty:
        print(f"No tokens meet the minimum TVL requirement of {min_tvl} USD on {date}.")
        return

    # Determine the order of chains from largest to smallest total TVL
    chain_totals = (
        grouped.groupby("chain")
        .app_token_tvl_usd.sum()
        .reset_index()
        .sort_values("app_token_tvl_usd", ascending=False)
    )
    chain_order = chain_totals["chain"].tolist()

    # Order tokens by their global aggregate TVL across all chains
    token_totals = (
        grouped.groupby("token")
        .app_token_tvl_usd.sum()
        .reset_index()
        .sort_values("app_token_tvl_usd", ascending=False)
    )
    token_order = token_totals["token"].tolist()

    # Create the stacked bar chart
    fig = px.bar(
        grouped,
        x="chain",
        y="app_token_tvl_usd",
        color="token",
        title=f"Total {token_category} TVL by Token Across Chains on {date}",
        labels={"chain": "Chain", "app_token_tvl_usd": "TVL (USD)", "token": "Token"},
        category_orders={"chain": chain_order, "token": token_order},  # Ordering chains and tokens by global totals
        template="plotly_white",
        width=900,
        height=600
    )

    # Configure layout for a stacked appearance without gaps
    fig.update_layout(
        barmode="stack",
        bargap=0.1,
        xaxis_title="",
        yaxis_title="TVL (USD)"
    )

    fig.update_traces(marker_line_width=0)

    fig.show()

# Example usage:
# plot_stablecoin_tokens_by_chain(df_filtered, '2024-12-01')


In [118]:
plot_stablecoin_tokens_by_chain(df_filtered[
    (df_filtered.chain.isin(["Ethereum", "Solana", "Arbitrum", "Base", "Optimism", "Mode", "Sui", "Polygon"]))
],
                                "2024-12-01"
                               )

In [124]:
plot_stacked_tvl_over_time(
    df_filtered[
       (df_filtered.protocol_category != "Bridge")
        # & (df_filtered.dt <= "2024-10-15")
        & (df_filtered.token_category == "Stablecoins")
    ],
    "Ethereum",
    "token"
)