In [3]:
%load_ext autoreload
%autoreload 2

import plotly.io as pio
pio.renderers.default = 'iframe'

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [11]:
import pandas as pd
import plotly.express as px
from datetime import datetime, timedelta
import numpy as np
import os
import sys
import shutil
import matplotlib.pyplot as plt
import plotly.graph_objects as go
from op_analytics.coreutils.gcpauth import get_credentials
from op_analytics.cli.subcommands.pulls.defillama.protocols import pull_protocol_tvl
from google.cloud import bigquery
import pandas as pd
import matplotlib.cm as cm
import matplotlib.colors as mcolors
from op_analytics.coreutils.threads import run_concurrently

from op_analytics.cli.subcommands.pulls.app import defillama_protocol_tvl
from concurrent.futures import ThreadPoolExecutor, as_completed
from google.cloud import bigquery
import pandas as pd
from tqdm import tqdm
import re
from textwrap import dedent


import urllib3
import warnings
pd.set_option('display.float_format', lambda x: '%.3f' % x)
urllib3.disable_warnings()
warnings.filterwarnings("ignore")

In [202]:
PATTERNS_TO_FILTER = [
    "-borrowed",
    "-vesting",
    "-staking",
    "-pool2",
    "-treasury",
    "-cex",
    "^treasury$",
    "^borrowed$",
    "^staking$",
    "^pool2$",
    "^pool2$",
    "polygon-bridge-&-staking",  # Added this as a full match
    ".*-cex$",  # Added this to match anything ending with -cex
]

CATEGORIES_TO_FILTER = ["CEX", "Chain"]

alignment_dict = {
    "Metis": "OP Stack fork",
    "Blast": "OP Stack fork",
    "Mantle": "OP Stack fork",
    "Zircuit": "OP Stack fork",
    "RSS3": "OP Stack fork",
    "Rollux": "OP Stack fork",
    "Ancient8": "OP Stack fork",
    "Manta": "OP Stack fork",
    "Cyber": "OP Chain",
    "Mint": "OP Chain",
    "Ham": "OP Chain",
    "Polynomial": "OP Chain",
    "Lisk": "OP Chain",
    "BOB": "OP Chain",
    "Mode": "OP Chain",
    "World Chain": "OP Chain",
    "Base": "OP Chain",
    "Kroma": "OP Chain",
    "Boba": "OP Chain",
    "Fraxtal": "OP Chain",
    "Optimism": "OP Chain",
    "Shape": "OP Chain",
    "Zora": "OP Chain"
}

alignment_df = pd.DataFrame(list(alignment_dict.items()), columns=["chain", "alignment"])

token_data = [
    {"token": "ETH", "token_category": "Native Asset"},
    {"token": "WETH", "token_category": "Native Asset"},
    {"token": "SOL", "token_category": "Native Asset"},
    {"token": "wBTC", "token_category": "Wrapped Assets"},
    {"token": "cbBTC", "token_category": "Wrapped Assets"},
    {"token": "MBTC", "token_category": "Wrapped Assets"},

    {"token": "stETH", "token_category": "Liquid Staking"},
    {"token": "wstETH", "token_category": "Liquid Staking"},
    {"token": "eETH", "token_category": "Liquid Restaking"},
    {"token": "weETH", "token_category": "Liquid Restaking"},
    {"token": "sfrxETH", "token_category": "Liquid Staking"},
    {"token": "rETH", "token_category": "Liquid Staking"},
    {"token": "mETH", "token_category": "Liquid Staking"},
    {"token": "rsETH", "token_category": "Liquid Restaking"},
    {"token": "cbETH", "token_category": "Liquid Staking"},
    {"token": "ezETH", "token_category": "Liquid Restaking"},
    {"token": "rswETH", "token_category": "Liquid Restaking"},
    {"token": "swETH", "token_category": "Liquid Staking"},
    {"token": "frxETH", "token_category": "Liquid Staking"},
    {"token": "ETHX", "token_category": "Liquid Staking"},
    {"token": "lsETH", "token_category": "Liquid Staking"},
    {"token": "oETH", "token_category": "Liquid Staking"},
    {"token": "EBTC", "token_category": "Liquid Restaking"},
    {"token": "LBTC", "token_category": "Liquid Restaking"},
    {"token": "SUPEROETHB", "token_category": "Liquid Staking"},
    {"token": "WSUPEROETHB", "token_category": "Liquid Staking"},
    {"token": "TETH", "token_category": "Liquid Staking"},
    {"token": "OSETH", "token_category": "Liquid Staking"},
    {"token": "cmETH", "token_category": "Liquid Restaking"},
    {"token": "WRSETH", "token_category": "Liquid Restaking"},
    {"token": "WEETH.BASE", "token_category": "Liquid Restaking"},
    
    {"token": "USDC", "token_category": "Stablecoins"},
    {"token": "USDT", "token_category": "Stablecoins"},
    {"token": "FDUSD", "token_category": "Stablecoins"},
    {"token": "PYUSD", "token_category": "Stablecoins"},
    {"token": "TUSD", "token_category": "Stablecoins"},
    {"token": "DAI", "token_category": "Stablecoins"},
    {"token": "USDE", "token_category": "Stablecoins"},
    {"token": "USDD", "token_category": "Stablecoins"},
    {"token": "FRAX", "token_category": "Stablecoins"},
    {"token": "EURC", "token_category": "Stablecoins"},
    {"token": "AGEUR", "token_category": "Stablecoins"},
    {"token": "USDS", "token_category": "Stablecoins"},
    {"token": "USDB", "token_category": "Stablecoins"},
    {"token": "DOLA", "token_category": "Stablecoins"},
    {"token": "SUSDE", "token_category": "Stablecoins"},
    {"token": "USD0++", "token_category": "Stablecoins"},
    {"token": "USD0", "token_category": "Stablecoins"},
    {"token": "SUSD", "token_category": "Stablecoins"},
    {"token": "CRVUSD", "token_category": "Stablecoins"},
    {"token": "USDC+", "token_category": "Stablecoins"},
    {"token": "USDZ", "token_category": "Stablecoins"},
    {"token": "STAR", "token_category": "Stablecoins"},
    {"token": "USDBC", "token_category": "Stablecoins"},
    {"token": "USD+", "token_category": "Stablecoins"},
    {"token": "CDXUSD", "token_category": "Stablecoins"},
    {"token": "HYUSD", "token_category": "Stablecoins"},
    {"token": "STAR", "token_category": "Stablecoins"},
    {"token": "EURS", "token_category": "Stablecoins"},
    {"token": "AXLEUROC", "token_category": "Stablecoins"},


    # Solana Liquid staking
    {"token": "MSOL", "token_category": "Liquid Staking"},
    {"token": "JUPSOL", "token_category": "Liquid Staking"},
    {"token": "BNSOL", "token_category": "Liquid Staking"},
    {"token": "SSOL", "token_category": "Liquid Restaking"},
    {"token": "BBSOL", "token_category": "Liquid Restaking"},
    {"token": "LAINESOL", "token_category": "Liquid Staking"},
    {"token": "STSOL", "token_category": "Liquid Staking"},
    {"token": "STRONGSOL", "token_category": "Liquid Staking"},
    {"token": "HUBSOL", "token_category": "Liquid Staking"},
    {"token": "PATHSOL", "token_category": "Liquid Staking"},
    {"token": "STEPSOL", "token_category": "Liquid Staking"},
    {"token": "EDGESOL", "token_category": "Liquid Staking"},
    {"token": "JITOSOL", "token_category": "Liquid Staking"},
    {"token": "DSOL", "token_category": "Liquid Staking"},
    {"token": "BONKSOL", "token_category": "Liquid Staking"},
    {"token": "VSOL", "token_category": "Liquid Staking"},
    {"token": "HSOL", "token_category": "Liquid Staking"},
    # {"token": "ARB", "token_category": "Layer 2 Token"},
    # {"token": "OP", "token_category": "Layer 2 Token"},
    # {"token": "MODE", "token_category": "Layer 2 Token"},
]

token_categories = pd.DataFrame(token_data)

token_categories["token"] = token_categories["token"].str.upper()

In [6]:
def generate_weekly_date_ranges(start_date: str, end_date: str) -> list[tuple[str, str]]:
    """
    Generate a list of weekly date ranges between a start and end date.
    """
    start = datetime.strptime(start_date, "%Y-%m-%d")
    end = datetime.strptime(end_date, "%Y-%m-%d")

    date_ranges = []

    while start <= end:
        week_end = min(start + timedelta(days=6), end)
        date_ranges.append((start.strftime("%Y-%m-%d"), week_end.strftime("%Y-%m-%d")))
        start = week_end + timedelta(days=1)

    return date_ranges

In [7]:
def run_big_query_concurrently(client, base_query, date_ranges, max_workers: int | None = None):
    
    max_workers = max_workers or 4

    query_template = """
    {base_query}
    WHERE dt BETWEEN '{start_date}' AND '{end_date}'
    """


    def fetch_chunk(start_date, end_date):
        query = query_template.format(base_query=base_query, start_date=start_date, end_date=end_date)
        query_job = client.query(query)
        return query_job.to_dataframe()

    dataframes = []
    with ThreadPoolExecutor() as executor:
        futures = {executor.submit(fetch_chunk, start, end): (start, end) for start, end in date_ranges}

        with tqdm(total=len(futures), desc="Fetching Data") as pbar:
            for future in as_completed(futures):
                dataframes.append(future.result())
                pbar.update(1)

    return pd.concat(dataframes, ignore_index=True)




In [6]:
# client = bigquery.Client(credentials=get_credentials())
# date_range = generate_weekly_date_ranges("2024-08-20", "2024-11-20")

# query = """
# SELECT 
#     tvl.protocol_slug,
#     tvl.chain,
#     m.protocol_name,
#     m.parent_protocol,
#     m.protocol_category,
#     tvl.dt,
#     tvl.token,
#     tvl.app_token_tvl,
#     tvl.app_token_tvl_usd
# FROM `oplabs-tools-data.uploads_api.defillama_protocols_token_tvl` tvl 
# LEFT JOIN `oplabs-tools-data.uploads_api.defillama_protocols_metadata` m 
#     ON tvl.protocol_slug = m.protocol_slug
# """

# df_bq = run_big_query_concurrently(client, query, date_range, max_workers=8)


In [8]:
# ProtocolTvlDataframes = pull_protocol_tvl()

In [8]:
# df_all = pd.merge(
#     ProtocolTvlDataframes.metadata_df.to_pandas(), 
#     ProtocolTvlDataframes.app_token_tvl_df.to_pandas(), 
#     how="inner",
#     on="protocol_slug"
# )

In [9]:
import duckdb as db

In [203]:
# df_all.to_csv("defillama_raw_protocol_token_tvl_2024-11-26.csv", index=False)
q = dedent(
    """
    SELECT
        dt,
        protocol_name,
        protocol_slug,
        protocol_category,
        parent_protocol,
        chain,
        CASE WHEN misrepresented_tokens = 'True' THEN 1
             WHEN misrepresented_tokens = 'False' THEN 0
             ELSE 0
        END AS misrepresented_tokens_flag,
        token,
        app_token_tvl,
        app_token_tvl_usd
    FROM 'defillama_raw_protocol_token_tvl_2024-11-26.csv'
    WHERE dt >= '2024-08-01'
    """
)

df_all = db.query(q).to_df()

df_all = pd.merge(df_all, alignment_df, on="chain", how="left")
df_all["alignment"] = df_all["alignment"].fillna("Other")
df_all = pd.merge(df_all, token_categories, on="token", how="left")
df_all["token_category"] = df_all["token_category"].fillna("Other")



In [204]:
# Chain level misrepresented tokens
df_misrep = (
    df_all
    [["protocol_slug", "chain", "misrepresented_tokens_flag", "token"]]
    .groupby(["protocol_slug", "chain", "misrepresented_tokens_flag"])
    .agg(
        token_count=("token", "nunique"),
        has_usdt=("token", lambda x: 1 if "USDT" in x.values else 0)
    )
    .reset_index()
)

df_misrep["chain_misrepresented_tokens_flag"] = (
    (df_misrep["misrepresented_tokens_flag"] == 1) 
    & (df_misrep["token_count"] == 1) 
    & (df_misrep["has_usdt"] == 1)
).astype(int)

df_all = pd.merge(
    df_all, 
    df_misrep[["protocol_slug", "chain", "chain_misrepresented_tokens_flag"]], 
    on=["protocol_slug", "chain"],
    how="left"
)

In [205]:
# remove protocols and chains

def matches_filter_pattern(s):
    return any(re.search(pattern, s, re.IGNORECASE) for pattern in PATTERNS_TO_FILTER)

df_all["chain"] = df_all["chain"].astype(str)

df_chain_protocol = df_all[["chain", "protocol_slug", "protocol_category"]].drop_duplicates()

df_chain_protocol["protocol_filters"] = (
    df_chain_protocol["chain"].apply(matches_filter_pattern)
    | (df_chain_protocol["protocol_slug"] == "polygon-bridge-&-staking")
    | df_chain_protocol["protocol_slug"].str.endswith("-cex")
    | df_chain_protocol.protocol_category.isin(CATEGORIES_TO_FILTER)
).astype(int)

# small subset for analysis, actual logic will include more (all?) chains
df_chain_protocol["chains_to_keep"] = (
    (df_all.alignment.isin(["OP Chain", "OP Stack Fork"]) 
    | df_all.chain.isin(["Ethereum", "Arbitrum", "Solana"]))
    ).astype(int)

filter_mask = (df_chain_protocol.protocol_filters == 0) & (df_chain_protocol.chains_to_keep == 1)

df_filtered = pd.merge(
    df_all,
    df_chain_protocol[filter_mask][["chain", "protocol_slug", "protocol_category"]],
    on=["chain", "protocol_slug", "protocol_category"],
    how="inner",
)



In [206]:
df_filtered["dt"] = pd.to_datetime(df_filtered["dt"])
df_filtered["parent_protocol"] = df_filtered["parent_protocol"].str.replace("parent#", "")
df_filtered["token"] = df_filtered["token"].str.upper()
df_filtered["token_category"] = df_filtered["token_category"].fillna("Other")

df_filtered["token_category_misrep"] = np.where(
    (df_filtered.chain_misrepresented_tokens_flag == 1),
    "Misrepresented TVL", 
    df_filtered.token_category
)

In [None]:
# Make some treemaps

In [209]:
token_category_breakdown = df_filtered.groupby(["dt", "chain", "token_category_misrep", "protocol_category"]).agg(
    {"app_token_tvl_usd": "sum"}
).reset_index().rename(columns={"token_category_misrep": "token_category"})

In [210]:
token_category_breakdown["dt"] = pd.to_datetime(token_category_breakdown["dt"])

In [211]:
import pandas as pd
import plotly.express as px


def plot_nested_token_category_breakdown(data, date, chain, date_diff=90):

    data["dt"] = pd.to_datetime(data["dt"])
    target_date = pd.to_datetime(date)
    previous_date = (target_date - pd.Timedelta(days=date_diff)).strftime("%Y-%m-%d")

    # filter to 10k min TVL for current date
    filtered_data = data[
        (data["dt"] == target_date) & (data["chain"] == chain) & (data["app_token_tvl_usd"] >= 10_000)
    ]

    # no lower bound
    previous_data = data[
        (data["dt"] == previous_date) & (data["chain"] == chain)
    ]

    merged_data = filtered_data.merge(
        previous_data[["token_category", "protocol_category", "app_token_tvl_usd"]],
        on=["token_category", "protocol_category"],
        suffixes=("", "_previous"),
        how="left",
    )

    # avoid dividing by zero
    merged_data["app_token_tvl_usd_previous"].fillna(0.01, inplace=True)

    merged_data["percent_change"] = (
        (merged_data["app_token_tvl_usd"] - merged_data["app_token_tvl_usd_previous"])
        / merged_data["app_token_tvl_usd_previous"]
    ) * 100

    # cap percent changes
    merged_data["percent_change"] = merged_data["percent_change"].clip(lower=-1000, upper=1000)

    fig = px.treemap(
        merged_data,
        path=[px.Constant("Total"), "token_category", "protocol_category"],
        values="app_token_tvl_usd", 
        color="percent_change", 
        color_continuous_scale="RdBu",
        range_color=[-100, 100], 
        title=f"{chain}: Token Category <> App TVL Last {date_diff} Days",
        width=600,
        height=800,
    )

    fig.update_layout(margin=dict(t=50, l=25, r=25, b=25))

    # Show plot
    fig.show()

    return merged_data


In [221]:
plot_df = plot_nested_token_category_breakdown(token_category_breakdown, "2024-11-20", "Base", 30)

In [214]:
token_breakdown = df_filtered.groupby(["dt", "chain", "token_category_misrep", "protocol_category", "token"]).agg(
    {"app_token_tvl_usd": "sum"}
).reset_index().rename(columns={"token_category_misrep": "token_category"})

In [278]:
def plot_nested_token_breakdown(data, date, chain, date_diff=90):

    data["dt"] = pd.to_datetime(data["dt"])
    target_date = pd.to_datetime(date)
    previous_date = (target_date - pd.Timedelta(days=date_diff)).strftime("%Y-%m-%d")

    filtered_data = data[
        (data["dt"] == target_date) & (data["chain"] == chain) & (data["app_token_tvl_usd"] >= 10_000)
    ]

    previous_data = data[
        (data["dt"] == previous_date) & (data["chain"] == chain)
    ]

    merged_data = filtered_data.merge(
        previous_data[["token_category", "protocol_category", "token", "app_token_tvl_usd"]],
        on=["token_category", "protocol_category", "token"],
        suffixes=("", "_previous"),
        how="left",
    )

    merged_data["app_token_tvl_usd_previous"].fillna(0.01, inplace=True)

    merged_data["percent_change"] = (
        (merged_data["app_token_tvl_usd"] - merged_data["app_token_tvl_usd_previous"])
        / merged_data["app_token_tvl_usd_previous"]
    ) * 100

    merged_data["percent_change"] = merged_data["percent_change"].clip(lower=-500, upper=500)

    fig = px.treemap(
        merged_data,
        path=[px.Constant("Total"), "token_category", "protocol_category", "token"],
        values="app_token_tvl_usd", 
        color="percent_change", 
        color_continuous_scale="RdBu",
        range_color=[-100, 100], 
        title=f"{chain}: Token Category <> App TVL Last {date_diff} Days",
        width=800,
        height=800,
    )

    fig.update_layout(margin=dict(t=50, l=25, r=25, b=25))

    fig.show()

    return merged_data

In [292]:
token_breakdown = df_filtered.groupby(["dt", "chain", "token_category_misrep", "protocol_category", "token"]).agg(
    {"app_token_tvl_usd": "sum"}
).reset_index().rename(columns={"token_category_misrep": "token_category"})

In [293]:
plot_df = plot_nested_token_breakdown(token_breakdown, "2024-11-20", "Base", 30)

In [304]:
protocol_breakdown = df_filtered.groupby(["dt", "chain", "protocol_category", "parent_protocol", "token_category_misrep"]).agg(
    {"app_token_tvl_usd": "sum"}
).reset_index().rename(columns={"token_category_misrep": "token_category"})

In [305]:
def plot_nested_protocol_breakdown(data, date, chain, date_diff=90):

    data["dt"] = pd.to_datetime(data["dt"])
    target_date = pd.to_datetime(date)
    previous_date = (target_date - pd.Timedelta(days=date_diff)).strftime("%Y-%m-%d")

    filtered_data = data[
        (data["dt"] == target_date) & (data["chain"] == chain) & (data["app_token_tvl_usd"] >= 10_000)
    ]

    previous_data = data[
        (data["dt"] == previous_date) & (data["chain"] == chain)
    ]

    merged_data = filtered_data.merge(
        previous_data[[ "protocol_category", "parent_protocol", "token_category", "app_token_tvl_usd"]],
        on=[ "protocol_category", "parent_protocol", "token_category"],
        suffixes=("", "_previous"),
        how="left",
    )

    merged_data["app_token_tvl_usd_previous"].fillna(0.01, inplace=True)

    merged_data["percent_change"] = (
        (merged_data["app_token_tvl_usd"] - merged_data["app_token_tvl_usd_previous"])
        / merged_data["app_token_tvl_usd_previous"]
    ) * 100

    merged_data["percent_change"] = merged_data["percent_change"].clip(lower=-500, upper=500)

    fig = px.treemap(
        merged_data,
        path=[px.Constant("Total"), "protocol_category", "parent_protocol", "token_category"],
        values="app_token_tvl_usd", 
        color="percent_change", 
        color_continuous_scale="RdBu",
        range_color=[-100, 100],
        title=f"{chain}: Token Category <> App TVL Last {date_diff} Days",
        width=800,
        height=800,
    )

    fig.update_layout(margin=dict(t=50, l=25, r=25, b=25))

    fig.show()

    return merged_data
    

In [308]:
plot_df = plot_nested_protocol_breakdown(protocol_breakdown, "2024-11-20", "Solana", 30)

In [309]:
protocol_token_breakdown = df_filtered.groupby(["dt", "chain", "protocol_category", "parent_protocol", "token_category_misrep",  "token"]).agg(
    {"app_token_tvl_usd": "sum"}
).reset_index().rename(columns={"token_category_misrep": "token_category"})

In [310]:
def plot_nested_protocol_token_breakdown(data, date, chain, date_diff=90):

    data["dt"] = pd.to_datetime(data["dt"])
    target_date = pd.to_datetime(date)
    previous_date = (target_date - pd.Timedelta(days=date_diff)).strftime("%Y-%m-%d")

    filtered_data = data[
        (data["dt"] == target_date) & (data["chain"] == chain) & (data["app_token_tvl_usd"] >= 10_000)
    ]

    previous_data = data[
        (data["dt"] == previous_date) & (data["chain"] == chain)
    ]

    merged_data = filtered_data.merge(
        previous_data[[ "protocol_category", "parent_protocol", "token_category", "token", "app_token_tvl_usd"]],
        on=[ "protocol_category", "parent_protocol", "token_category", "token"],
        suffixes=("", "_previous"),
        how="left",
    )

    merged_data["app_token_tvl_usd_previous"].fillna(0.01, inplace=True)

    merged_data["percent_change"] = (
        (merged_data["app_token_tvl_usd"] - merged_data["app_token_tvl_usd_previous"])
        / merged_data["app_token_tvl_usd_previous"]
    ) * 100

    merged_data["percent_change"] = merged_data["percent_change"].clip(lower=-500, upper=500)

    fig = px.treemap(
        merged_data,
        path=[px.Constant("Total"), "protocol_category", "parent_protocol", "token_category", "token"], 
        values="app_token_tvl_usd", 
        color="percent_change",
        color_continuous_scale="RdBu",
        range_color=[-100, 100], 
        title=f"{chain}: Token Category <> App TVL Last {date_diff} Days",
        width=800,
        height=800,
    )

    fig.update_layout(margin=dict(t=50, l=25, r=25, b=25))

    fig.show()

    return merged_data

In [315]:
plot_df = plot_nested_protocol_token_breakdown(protocol_token_breakdown, "2024-11-20", "Solana", 30)