In [8]:
# Import the class from the Python file (module)
import pandas as pd
import matplotlib.pyplot as plt
import os
# from dotenv import load_dotenv
# from pathlib import Path
from sklearn.preprocessing import StandardScaler
import seaborn as sns
from BinanceClient import BinanceClient
import numpy as np
from typing import Final
import joblib
from BatchFeatures import BatchFeatures
from datetime import datetime, timedelta
%matplotlib widget

## Load pair df

In [46]:
import os
from datetime import datetime, timedelta, timezone

def interval_slug(s: str) -> str:
    return s.strip().replace(" ", "").replace("/", "").lower()

def make_db_name(pair: str, interval: str, weeks: int) -> str:
    return f"{pair}_{interval_slug(interval)}_{weeks}weeks.db"

def load_or_fetch_pair_df(pair: str, interval: str, weeks: int) -> tuple[str, "pd.DataFrame"]:
    db_name = make_db_name(pair, interval, weeks)
    db_path = "./db/" + db_name

    print(f"[{pair}] DB: {db_path}")

    binance_client = BinanceClient(db_path)
    binance_client.set_interval(interval)

    df = None

    if os.path.exists(db_path):
        df = binance_client.fetch_data_from_db(pair)
        if df is not None and not df.empty:
            print(f"[{pair}] Loaded {len(df):,} rows from DB.")
        else:
            df = None

    if df is None:
        print(f"[{pair}] No usable DB data found -> fetching from Binance...")

        api_secret = os.getenv("BINANCE_SECRET_KEY")
        api_key = os.getenv("BINANCE_API_KEY")
        binance_client.make(api_key, api_secret)

        server_time = binance_client.get_server_time()
        end_dt = datetime.fromtimestamp(server_time["serverTime"] / 1000, tz=timezone.utc)
        start_dt = end_dt - timedelta(weeks=weeks)

        start_ms = int(start_dt.timestamp() * 1000)
        end_ms = int(end_dt.timestamp() * 1000)

        data = binance_client.fetch_data(pair, start_ms, end_ms)
        if data is None or data.empty:
            raise RuntimeError(f"[{pair}] No data returned from Binance for the requested range.")

        binance_client.store_data_to_db(pair, data)

        df = binance_client.fetch_data_from_db(pair)
        if df is None or df.empty:
            raise RuntimeError(f"[{pair}] Data fetched/stored but DB load returned empty.")

        print(f"[{pair}] Fetched + stored + loaded {len(df):,} rows.")

    df = df.sort_index()
    return db_path, df


## Load COINS, then align timestamps

In [47]:
import pandas as pd

interval = "5m"
weeks = 52
# pairs = ["BTCUSDT", "ETHUSDT"]

symbol = [
    "BTCUSDT","ETHUSDT","BNBUSDT","SOLUSDT","XRPUSDT",
    "ADAUSDT","DOGEUSDT","TRXUSDT","LTCUSDT","LINKUSDT"
]
paths = {}
dfs = {}

for sym in symbol:
    db_path, df = load_or_fetch_pair_df(sym, interval, weeks)
    paths[sym] = db_path
    dfs[sym] = df


Trades: 3549
Avg net: -0.0016076996955678494
Total return: -0.9984135006536289
exit_reason
TIME                      2212
SL                         659
TP                         651
SL_and_TP_same_bar->SL      27
Name: count, dtype: int64


In [7]:
import pandas as pd
import numpy as np

def detect_volume_events(
    df: pd.DataFrame,
    symbol: str,
    vol_win: int = 144,          # 12 hours on 5m
    impulse_k: int = 12,         # 60 min impulse
    rvol_thresh: float = 6.0,    # strict
    impulse_thresh: float = 0.04,# +4% over impulse_k
    lookahead: int = 24,         # 2 hours forward path
    cooldown: int = 12,          # avoid logging same burst repeatedly (60 min)
):
    """
    Logs candidate 'flow shock' events:
      - RVOL spike relative to rolling median
      - Positive impulse over last impulse_k bars
    Then measures forward path stats over lookahead bars.
    """
    d = df.copy().sort_index()
    d = d[["open","high","low","close","volume"]].dropna()

    vol_med = d["volume"].rolling(vol_win).median()
    rvol = d["volume"] / vol_med
    impulse = d["close"] / d["close"].shift(impulse_k) - 1.0

    out = []
    i = 0
    n = len(d)

    while i < n - lookahead:
        if (rvol.iloc[i] >= rvol_thresh) and (impulse.iloc[i] >= impulse_thresh):
            px0 = float(d["close"].iloc[i])
            ts0 = d.index[i]

            future = d["close"].iloc[i+1:i+1+lookahead]
            fmax = float(future.max())
            fmin = float(future.min())
            max_fwd_return = fmax / px0 - 1.0
            max_drawdown = fmin / px0 - 1.0

            # retrace from the peak within the lookahead window
            # find peak time then worst after that peak
            peak_idx = future.values.argmax()
            peak_px = float(future.iloc[peak_idx])
            after_peak = future.iloc[peak_idx:]  # includes peak bar
            trough_after_peak = float(after_peak.min())
            max_retrace = trough_after_peak / peak_px - 1.0  # negative means retrace

            # time to max retrace (bars after event)
            trough_idx = after_peak.values.argmin()
            time_to_max_retrace_bars = int(peak_idx + trough_idx + 1)

            out.append({
                "symbol": symbol,
                "event_ts": ts0,
                "close_event": px0,
                "rvol": float(rvol.iloc[i]),
                "impulse": float(impulse.iloc[i]),
                "max_fwd_return": max_fwd_return,
                "max_drawdown": max_drawdown,
                "max_retrace": max_retrace,
                "time_to_max_retrace_bars": time_to_max_retrace_bars,
            })

            i += cooldown  # skip ahead so we don't log every bar of the same burst
        else:
            i += 1

    return pd.DataFrame(out)


In [15]:
# Convert list of DataFrames into a single DataFrame
events = pd.concat(events, ignore_index=True)

type(events), events.head()


(pandas.core.frame.DataFrame,
     symbol            event_ts  close_event       rvol   impulse  \
 0  BTCUSDT 2025-03-02 16:15:00     89370.23  35.169505  0.049267   
 1  BTCUSDT 2025-03-02 17:20:00     92623.03  24.089593  0.040878   
 2  BTCUSDT 2025-04-07 14:15:00     80243.10  12.829945  0.040335   
 3  BTCUSDT 2025-04-09 17:20:00     80744.00  27.602926  0.040441   
 4  BTCUSDT 2025-10-10 22:15:00    113213.78   6.726021  0.088853   
 
    max_fwd_return  max_drawdown  max_retrace  time_to_max_retrace_bars  
 0        0.058518     -0.004590    -0.012570                        23  
 1        0.021344     -0.001134    -0.022009                        18  
 2       -0.010208     -0.030371    -0.020371                        11  
 3        0.026891      0.000949    -0.013591                        19  
 4        0.012397     -0.012771    -0.024860                        24  )

In [16]:
events["symbol"].value_counts()


symbol
DOGEUSDT    37
LINKUSDT    31
LTCUSDT     28
XRPUSDT     27
ADAUSDT     25
SOLUSDT     22
ETHUSDT     17
BTCUSDT      5
BNBUSDT      4
TRXUSDT      2
Name: count, dtype: int64

In [17]:
events.groupby("symbol")[[
    "rvol",
    "impulse",
    "max_fwd_return",
    "max_drawdown",
    "max_retrace",
    "time_to_max_retrace_bars"
]].median()


Unnamed: 0_level_0,rvol,impulse,max_fwd_return,max_drawdown,max_retrace,time_to_max_retrace_bars
symbol,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
ADAUSDT,11.166297,0.046968,0.020119,-0.011202,-0.019602,21.0
BNBUSDT,9.286704,0.047665,0.016355,-0.008791,-0.021239,17.5
BTCUSDT,24.089593,0.040878,0.021344,-0.00459,-0.020371,19.0
DOGEUSDT,12.303964,0.045536,0.01054,-0.013062,-0.017995,18.0
ETHUSDT,15.074065,0.046413,0.020715,-0.00719,-0.017215,20.0
LINKUSDT,15.380846,0.045759,0.016698,-0.016935,-0.025541,23.0
LTCUSDT,16.809842,0.047733,0.015907,-0.016874,-0.026367,21.0
SOLUSDT,12.07943,0.048086,0.017302,-0.00802,-0.017296,21.0
TRXUSDT,119.576384,0.048816,-0.000174,-0.016803,-0.016635,18.5
XRPUSDT,11.07238,0.045867,0.01272,-0.01054,-0.032066,19.0


## Get all Binance coin pairs

In [22]:
import requests
import pandas as pd

BINANCE_REST = "https://api.binance.com"

def get_spot_usdt_symbols():
    """All Spot symbols that trade against USDT and are currently TRADING."""
    info = requests.get(f"{BINANCE_REST}/api/v3/exchangeInfo", timeout=20).json()
    syms = []
    for s in info["symbols"]:
        if s.get("status") != "TRADING":
            continue
        if s.get("isSpotTradingAllowed") is not True:
            continue
        if s.get("quoteAsset") != "USDT":
            continue

        sym = s["symbol"]

        # Exclude leveraged tokens & some common non-spot-like tickers
        bad_substrings = ["UPUSDT", "DOWNUSDT", "BULLUSDT", "BEARUSDT", "3LUSDT", "3SUSDT", "5LUSDT", "5SUSDT"]
        if any(sym.endswith(x) for x in bad_substrings):
            continue

        syms.append(sym)
    return sorted(set(syms))

def rank_symbols_by_quote_volume(symbols):
    """Return DataFrame of symbols with 24h quoteVolume (USDT) sorted desc."""
    tickers = requests.get(f"{BINANCE_REST}/api/v3/ticker/24hr", timeout=20).json()
    # Build a map for fast lookup
    wanted = set(symbols)

    rows = []
    for t in tickers:
        sym = t.get("symbol")
        if sym not in wanted:
            continue
        # quoteVolume is in quoteAsset units, here USDT
        qv = float(t.get("quoteVolume", 0.0))
        rows.append({
            "symbol": sym,
            "quoteVolumeUSDT_24h": qv,
            "lastPrice": float(t.get("lastPrice", 0.0)),
            "priceChangePercent": float(t.get("priceChangePercent", 0.0)),
            "count": int(t.get("count", 0)),  # trade count 24h
        })

    df = pd.DataFrame(rows)
    df = df.sort_values("quoteVolumeUSDT_24h", ascending=False).reset_index(drop=True)
    return df

def get_top_usdt_pairs(n=100, min_quote_vol_usdt=None):
    """Top-N by 24h quote volume; optionally filter by minimum quote volume."""
    syms = get_spot_usdt_symbols()
    ranked = rank_symbols_by_quote_volume(syms)

    if min_quote_vol_usdt is not None:
        ranked = ranked[ranked["quoteVolumeUSDT_24h"] >= float(min_quote_vol_usdt)].copy()

    top = ranked.head(n).copy()
    return top, ranked


In [23]:
top100, ranked_all = get_top_usdt_pairs(n=100)
pairs = top100["symbol"].tolist()

len(pairs)


100

In [24]:
interval = "5m"
weeks = 52

paths = {}
dfs = {}

for sym in pairs:
    db_path, df = load_or_fetch_pair_df(sym, interval, weeks)
    paths[sym] = db_path
    dfs[sym] = df
    dfs[sym] = df


[BTCUSDT] DB: ./db/BTCUSDT_5m_52weeks.db
[BTCUSDT] Loaded 104,832 rows from DB.
[ETHUSDT] DB: ./db/ETHUSDT_5m_52weeks.db
[ETHUSDT] Loaded 104,832 rows from DB.
[USDCUSDT] DB: ./db/USDCUSDT_5m_52weeks.db
[USDCUSDT] Loaded 104,832 rows from DB.
[SOLUSDT] DB: ./db/SOLUSDT_5m_52weeks.db
[SOLUSDT] Loaded 104,832 rows from DB.
[USD1USDT] DB: ./db/USD1USDT_5m_52weeks.db
[USD1USDT] Loaded 71,475 rows from DB.
[FOGOUSDT] DB: ./db/FOGOUSDT_5m_52weeks.db
[FOGOUSDT] Loaded 2,907 rows from DB.
[XRPUSDT] DB: ./db/XRPUSDT_5m_52weeks.db
[XRPUSDT] Loaded 104,832 rows from DB.
[FDUSDUSDT] DB: ./db/FDUSDUSDT_5m_52weeks.db
[FDUSDUSDT] Loaded 104,832 rows from DB.
[ZKPUSDT] DB: ./db/ZKPUSDT_5m_52weeks.db
[ZKPUSDT] Loaded 5,212 rows from DB.
[PAXGUSDT] DB: ./db/PAXGUSDT_5m_52weeks.db
[PAXGUSDT] Loaded 104,832 rows from DB.
[BNBUSDT] DB: ./db/BNBUSDT_5m_52weeks.db
[BNBUSDT] Loaded 104,832 rows from DB.
[DOGEUSDT] DB: ./db/DOGEUSDT_5m_52weeks.db
[DOGEUSDT] Loaded 104,832 rows from DB.
[ZECUSDT] DB: ./db/ZECUS

In [30]:
events = []

for sym, df in dfs.items():
    ev = detect_volume_events(df, sym)
    events.append(ev)

# Convert list of DataFrames into a single DataFrame
events = pd.concat(events, ignore_index=True)

type(events), events.head()


(pandas.core.frame.DataFrame,
     symbol            event_ts  close_event       rvol   impulse  \
 0  BTCUSDT 2025-03-02 16:15:00     89370.23  35.169505  0.049267   
 1  BTCUSDT 2025-03-02 17:20:00     92623.03  24.089593  0.040878   
 2  BTCUSDT 2025-04-07 14:15:00     80243.10  12.829945  0.040335   
 3  BTCUSDT 2025-04-09 17:20:00     80744.00  27.602926  0.040441   
 4  BTCUSDT 2025-10-10 22:15:00    113213.78   6.726021  0.088853   
 
    max_fwd_return  max_drawdown  max_retrace  time_to_max_retrace_bars  
 0        0.058518     -0.004590    -0.012570                        23  
 1        0.021344     -0.001134    -0.022009                        18  
 2       -0.010208     -0.030371    -0.020371                        11  
 3        0.026891      0.000949    -0.013591                        19  
 4        0.012397     -0.012771    -0.024860                        24  )

In [31]:
events["symbol"].value_counts()


symbol
GPSUSDT       151
RESOLVUSDT    149
PENGUUSDT     148
EIGENUSDT     138
ZENUSDT       125
             ... 
SENTUSDT        2
USDEUSDT        1
FDUSDUSDT       1
PAXGUSDT        1
EURUSDT         1
Name: count, Length: 96, dtype: int64

In [36]:
pd.set_option("display.max_rows", None)

events.groupby("symbol")[[
    "rvol",
    "impulse",
    "max_fwd_return",
    "max_drawdown",
    "max_retrace",
    "time_to_max_retrace_bars"
]].median()


Unnamed: 0_level_0,rvol,impulse,max_fwd_return,max_drawdown,max_retrace,time_to_max_retrace_bars
symbol,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
0GUSDT,11.131891,0.059992,0.020443,-0.034392,-0.055723,20.0
2ZUSDT,10.094747,0.048046,0.018098,-0.026769,-0.036083,22.5
AAVEUSDT,12.506137,0.045298,0.011208,-0.016182,-0.018994,22.0
ADAUSDT,11.166297,0.046968,0.020119,-0.011202,-0.019602,21.0
AGLDUSDT,18.488468,0.046296,0.010791,-0.027433,-0.034789,21.0
APTUSDT,9.349846,0.045948,0.017019,-0.009754,-0.014911,24.0
ARBUSDT,9.142668,0.046468,0.026306,-0.013493,-0.019068,23.0
ASTERUSDT,9.455093,0.049093,0.014449,-0.02224,-0.036796,21.5
ASTRUSDT,15.946426,0.046077,0.018137,-0.0157,-0.034222,22.5
AUCTIONUSDT,15.548623,0.049861,0.031636,-0.026619,-0.041237,22.0


In [43]:
import numpy as np
import pandas as pd

def backtest_digest_fade_v2(
    df: pd.DataFrame,
    events_sym: pd.DataFrame,
    *,
    fee_rate: float = 0.00075,     # 0.075% per side
    impulse_k: int = 12,           # must match detector
    wait_bars: int = 10,
    max_hold_bars: int = 12,
    retrace_frac: float = 0.40,
    cont_frac: float = 0.30,       # require some continuation before entry
    tp_frac: float = 0.50,
    sl_frac: float = 0.50,
    conservative_same_bar: bool = True
) -> pd.DataFrame:
    """
    Long-only backtest aligned with your detector.

    Required df cols: open, high, low, close
    Required events cols: event_ts, close_event, impulse
    """

    df = df.sort_index().copy()
    if not isinstance(df.index, pd.DatetimeIndex):
        raise ValueError("df.index must be a DatetimeIndex")

    required_df = {"open","high","low","close"}
    if not required_df.issubset(df.columns):
        raise ValueError(f"df missing columns: {required_df - set(df.columns)}")

    required_ev = {"event_ts","close_event","impulse"}
    if not required_ev.issubset(events_sym.columns):
        raise ValueError(f"events missing columns: {required_ev - set(events_sym.columns)}")

    ev = events_sym.copy()
    ev["event_ts"] = pd.to_datetime(ev["event_ts"])

    # map ts -> bar index (nearest previous bar)
    idx = df.index
    def ts_to_i(ts):
        return int(idx.searchsorted(ts, side="right") - 1)

    trades = []
    used_entries = set()  # avoid overlapping entries (simple safeguard)

    for _, row in ev.iterrows():
        t0_idx = ts_to_i(row["event_ts"])
        if t0_idx < 0 or t0_idx >= len(df) - 2:
            continue

        px0 = float(row["close_event"])
        impulse = float(row["impulse"])
        if not np.isfinite(px0) or px0 <= 0: 
            continue
        if not np.isfinite(impulse) or impulse <= 0:
            continue

        # Past-only "event high/low" consistent with impulse window
        lo0 = max(0, t0_idx - impulse_k + 1)
        event_high = float(df["high"].iloc[lo0:t0_idx+1].max())
        event_low  = float(df["low"].iloc[lo0:t0_idx+1].min())

        # Wait
        start = t0_idx + wait_bars
        if start >= len(df) - 1:
            continue

        # Entry threshold: retrace >= retrace_frac * impulse from event_high
        entry_threshold = event_high * (1.0 - retrace_frac * impulse)

        # Find first bar where close <= threshold (after wait), then enter next bar open
        entry_signal_idx = None
        for j in range(start, len(df) - 1):
            if float(df["close"].iloc[j]) <= entry_threshold:
                entry_signal_idx = j
                break
        if entry_signal_idx is None:
            continue

        entry_idx = entry_signal_idx + 1
        if entry_idx >= len(df):
            continue

        if entry_idx in used_entries:
            continue

        entry_price = float(df["open"].iloc[entry_idx])
        if not np.isfinite(entry_price) or entry_price <= 0:
            continue

        # Continuation filter: require some run-up between t0 and entry_signal
        max_hi = float(df["high"].iloc[t0_idx:entry_signal_idx+1].max())
        max_fwd_so_far = (max_hi / px0) - 1.0
        if max_fwd_so_far < cont_frac * impulse:
            continue

        # TP/SL relative to entry (consistent units, execution-friendly)
        tp_price = entry_price * (1.0 + tp_frac * impulse)
        sl_price = entry_price * (1.0 - sl_frac * impulse)

        deadline = min(entry_idx + max_hold_bars, len(df) - 1)

        exit_idx = None
        exit_price = None
        reason = None

        for k in range(entry_idx, deadline + 1):
            hi = float(df["high"].iloc[k])
            lo = float(df["low"].iloc[k])

            hit_tp = hi >= tp_price
            hit_sl = lo <= sl_price

            if hit_tp and hit_sl:
                if conservative_same_bar:
                    exit_idx, exit_price, reason = k, sl_price, "SL_and_TP_same_bar->SL"
                else:
                    exit_idx, exit_price, reason = k, tp_price, "SL_and_TP_same_bar->TP"
                break
            elif hit_sl:
                exit_idx, exit_price, reason = k, sl_price, "SL"
                break
            elif hit_tp:
                exit_idx, exit_price, reason = k, tp_price, "TP"
                break

        if exit_idx is None:
            exit_idx = deadline
            exit_price = float(df["close"].iloc[exit_idx])
            reason = "TIME"

        gross = (exit_price / entry_price) - 1.0
        net = gross - 2.0 * fee_rate

        trades.append({
            "t0_time": df.index[t0_idx],
            "entry_time": df.index[entry_idx],
            "exit_time": df.index[exit_idx],
            "t0_idx": t0_idx,
            "entry_idx": entry_idx,
            "exit_idx": exit_idx,
            "close_event": px0,
            "impulse": impulse,
            "event_high_past": event_high,
            "event_low_past": event_low,
            "entry_threshold": entry_threshold,
            "max_fwd_so_far": max_fwd_so_far,
            "entry_price": entry_price,
            "tp_price": tp_price,
            "sl_price": sl_price,
            "exit_price": exit_price,
            "gross_ret": gross,
            "net_ret": net,
            "exit_reason": reason,
            "hold_bars": exit_idx - entry_idx,
        })

        used_entries.add(entry_idx)

    return pd.DataFrame(trades)


In [44]:
all_trades = []

for sym in pairs:
    df = dfs[sym]
    ev = events.query("symbol == @sym").copy()  # assumes events has 'symbol'
    if ev.empty or df.empty:
        continue

    tr = backtest_digest_fade_from_events_schema(df, ev)
    tr["symbol"] = sym
    all_trades.append(tr)

trades = pd.concat(all_trades, ignore_index=True)

print("Trades:", len(trades))
print("Avg net:", trades["net_ret"].mean())
print("Total return:", (1 + trades["net_ret"]).prod() - 1)
print(trades["exit_reason"].value_counts())


Trades: 3849
Avg net: -0.0013462173136468464
Total return: -0.9980881244257173
exit_reason
TIME                      2217
TP                         875
SL                         753
SL_and_TP_same_bar->SL       4
Name: count, dtype: int64


In [45]:
trades.describe()

Unnamed: 0,t0_time,entry_time,exit_time,t0_idx,entry_idx,exit_idx,close_event,impulse,event_high_proxy,entry_threshold,entry_price,tp_price,sl_price,exit_price,gross_ret,net_ret,hold_bars
count,3849,3849,3849,3849.0,3849.0,3849.0,3849.0,3849.0,3849.0,3849.0,3849.0,3849.0,3849.0,3849.0,3849.0,3849.0,3849.0
mean,2025-08-17 17:47:20.374123264,2025-08-18 03:46:44.208885504,2025-08-18 04:32:35.494933504,45894.746168,46014.625617,46023.796571,163.648514,0.068056,171.184472,168.004536,165.209931,169.013572,161.406289,166.220221,0.000154,-0.001346,9.170953
min,2025-01-28 05:55:00,2025-01-28 06:50:00,2025-01-28 07:50:00,184.0,195.0,203.0,4e-06,0.04,4e-06,4e-06,4e-06,4e-06,3e-06,4e-06,-0.114352,-0.115852,0.0
25%,2025-05-01 08:35:00,2025-05-02 00:15:00,2025-05-02 01:05:00,16093.0,16179.0,16183.0,0.1322,0.043274,0.140398,0.136351,0.1326,0.136563,0.127512,0.132651,-0.017149,-0.018649,6.0
50%,2025-09-15 14:10:00,2025-09-17 19:10:00,2025-09-17 19:30:00,40982.0,40998.0,41010.0,0.766,0.048024,0.810202,0.787224,0.763,0.789324,0.731178,0.75669,-0.001637,-0.003137,12.0
75%,2025-11-19 21:20:00,2025-11-20 17:50:00,2025-11-20 18:30:00,74405.0,74441.0,74453.0,4.09,0.059617,4.414676,4.260679,4.105,4.248258,3.908488,4.069742,0.020073,0.018573,12.0
max,2026-01-26 08:30:00,2026-01-26 09:25:00,2026-01-26 10:10:00,104803.0,104826.0,104827.0,92623.03,1.99777,96409.297501,94832.880103,93410.92,95320.157533,91501.682467,93735.522224,0.399922,0.398422,12.0
std,,,,31831.19724,31853.072907,31853.021437,3386.953876,0.107644,3537.519458,3474.589013,3427.878609,3504.193915,3351.576986,3455.24654,0.023883,0.023883,4.067182


In [40]:
events.describe()

Unnamed: 0,event_ts,close_event,rvol,impulse,max_fwd_return,max_drawdown,max_retrace,time_to_max_retrace_bars
count,5343,5343.0,5343.0,5343.0,5343.0,5343.0,5343.0,5343.0
mean,2025-08-17 08:04:37.035373568,148.424758,29.081735,0.066565,0.03302,-0.024115,-0.036187,19.712708
min,2025-01-28 00:00:00,4e-06,6.000611,0.04,-0.200719,-0.41747,-0.43229,2.0
25%,2025-05-02 11:10:00,0.13915,7.757492,0.043105,0.00352,-0.035931,-0.049174,17.0
50%,2025-09-19 13:25:00,0.7887,11.392142,0.048016,0.017977,-0.018755,-0.029079,22.0
75%,2025-11-17 02:15:00,3.8285,22.099205,0.059557,0.043624,-0.006106,-0.01489,24.0
max,2026-01-26 08:30:00,113213.78,4502.710521,2.494949,1.130753,0.139531,0.0,24.0
std,,3266.721559,115.465879,0.112654,0.056112,0.029282,0.032736,5.1135


In [42]:
events.head()

Unnamed: 0,symbol,event_ts,close_event,rvol,impulse,max_fwd_return,max_drawdown,max_retrace,time_to_max_retrace_bars
0,BTCUSDT,2025-03-02 16:15:00,89370.23,35.169505,0.049267,0.058518,-0.00459,-0.01257,23
1,BTCUSDT,2025-03-02 17:20:00,92623.03,24.089593,0.040878,0.021344,-0.001134,-0.022009,18
2,BTCUSDT,2025-04-07 14:15:00,80243.1,12.829945,0.040335,-0.010208,-0.030371,-0.020371,11
3,BTCUSDT,2025-04-09 17:20:00,80744.0,27.602926,0.040441,0.026891,0.000949,-0.013591,19
4,BTCUSDT,2025-10-10 22:15:00,113213.78,6.726021,0.088853,0.012397,-0.012771,-0.02486,24
