# Notebook — Supply & Demand (Sam Seiden) Quantized Scalping
**Scope (v0.1):**
- Input: XAUUSD **M1** OHLCV CSV (UTC)
- HTF: **M5** (zone discovery)
- LTF: **M1** (limit fill + SL/TP simulation)
- Entry: **limit order** at zone level
- Conservative intrabar: **SL-first** when TP & SL both touched inside one M1 candle

> Notebook ini dibuat **step-by-step** dan deterministik (tanpa look-ahead).  
> Silakan jalankan cell dari atas ke bawah.


## 0) Setup
- Pastikan file CSV M1 tersedia.
- Format CSV sesuai fungsi `load_ohlcv()` kamu:
  - skip header 1 baris
  - kolom: `date,time,open,high,low,close,volume`
  - date format `YYYY.MM.DD`, time `HH:MM`


In [None]:
import pandas as pd
import numpy as np
from dataclasses import dataclass
from typing import Optional, List, Dict, Tuple

pd.set_option("display.max_columns", 50)
pd.set_option("display.width", 140)


## 1) Load data (M1)
Gunakan fungsi loader yang kamu berikan (UTC).

In [None]:
def load_ohlcv(path: str) -> pd.DataFrame:
    """
    Load data OHLCV dari CSV.
    - Baris pertama di-skip (label)
    - Kolom: date, time, open, high, low, close, volume
    - Timestamp dianggap UTC.
    """
    names = ['date', 'time', 'open', 'high', 'low', 'close', 'volume']

    df = pd.read_csv(
        path,
        header=None,
        names=names,
        dtype={'date': str, 'time': str},
        skiprows=1
    )

    df['timestamp'] = pd.to_datetime(
        df['date'] + ' ' + df['time'],
        format='%Y.%m.%d %H:%M',
        utc=True
    )

    df = df.drop(columns=['date', 'time'])
    df = df.sort_values('timestamp').reset_index(drop=True)
    df = df.set_index('timestamp')
    return df

# TODO: ganti dengan path file kamu
CSV_PATH = "XAUUSD_M1.csv"

df_m1 = load_ohlcv(CSV_PATH)
df_m1.head(), df_m1.tail(), df_m1.shape


## 2) Audit data M1 (kualitas & gap)
Kita cek:
- index monotonic
- duplikat timestamp
- gap > 1 menit (data bolong)

In [None]:
def audit_m1(df: pd.DataFrame) -> Dict[str, int]:
    if not df.index.is_monotonic_increasing:
        raise ValueError("Index timestamp tidak monotonic increasing. Perlu sort.")
    dup = int(df.index.duplicated().sum())
    if dup:
        raise ValueError(f"Duplicate timestamps: {dup}. Harus dibersihkan dulu.")
    delta = df.index.to_series().diff().dropna()
    gaps = int((delta > pd.Timedelta(minutes=1)).sum())
    return {
        "rows": int(len(df)),
        "gaps_gt_1m": gaps,
        "start": str(df.index.min()),
        "end": str(df.index.max()),
    }

audit_m1(df_m1)


## 3) Resample HTF M5 (MT5-compatible)
Rule:
- `label='right', closed='right'`
- Candle M5 timestamp = waktu **close** interval 5 menit

In [None]:
def resample_m5(df_m1: pd.DataFrame) -> pd.DataFrame:
    ohlc = {
        "open": "first",
        "high": "max",
        "low": "min",
        "close": "last",
        "volume": "sum",
    }
    m5 = df_m1.resample("5min", label="right", closed="right").agg(ohlc).dropna()
    return m5

df_m5 = resample_m5(df_m1)
df_m5.head(), df_m5.tail(), df_m5.shape


## 4) ATR (untuk normalisasi base & departure)
Kita pakai ATR klasik (Wilder) pada M5.

In [None]:
def compute_atr(df: pd.DataFrame, length: int = 14) -> pd.Series:
    high = df["high"]
    low = df["low"]
    close = df["close"]
    prev_close = close.shift(1)
    tr = pd.concat([
        (high - low).abs(),
        (high - prev_close).abs(),
        (low - prev_close).abs()
    ], axis=1).max(axis=1)
    # Wilder's smoothing
    atr = tr.ewm(alpha=1/length, adjust=False, min_periods=length).mean()
    return atr

ATR_LEN = 14
df_m5["atr"] = compute_atr(df_m5, ATR_LEN)
df_m5[["open","high","low","close","atr"]].dropna().head(20)


## 5) Spec v0.1 — Zone Detection (M5)
Kita quantize pola:
- Demand: **Drop–Base–Rally (DBR)**
- Supply: **Rally–Base–Drop (RBD)**

Definisi ringkas:
- Base = `base_len` candle dengan `base_range <= k_base_atr * ATR`
- Departure = `dep_len` candle setelah base yang breakout + impulsif:
  - `departure_range >= k_dep_atr * ATR`
  - `body_pct >= min_body_pct`
  - breakout: mode `"close"` atau `"wick"`

Boundary zona:
- `"proximal_distal"` (default) ala Sam Seiden-ish
- `"wick_wick"` (lebih lebar)


In [None]:
@dataclass
class ZoneParams:
    base_len: int = 2
    k_base_atr: float = 1.0
    dep_len: int = 1
    k_dep_atr: float = 1.2
    min_body_pct: float = 0.6
    breakout_mode: str = "close"  # "close" or "wick"
    zone_boundary: str = "proximal_distal"  # "proximal_distal" or "wick_wick"
    fresh_only: bool = True
    max_zone_age_m5: int = 48
    max_tests: int = 0  # 0 = fresh only

def body_pct(o, h, l, c, eps=1e-12):
    rng = max(h - l, eps)
    return abs(c - o) / rng

def detect_zones_m5(m5: pd.DataFrame, params: ZoneParams) -> pd.DataFrame:
    """
    Return DataFrame zones:
    columns:
      zone_id, type(demand/supply), created_time, base_start, base_end,
      zone_low, zone_high, distal, proximal, departure_end
    NOTE: lifecycle (fresh/test) dihitung di tahap simulasi M1 untuk deterministik.
    """
    out = []
    zone_id = 0

    # Require ATR available
    m5 = m5.copy()
    m5 = m5.dropna(subset=["atr"])

    n = len(m5)
    idx = m5.index

    i = 0
    while i + params.base_len + params.dep_len <= n - 1:
        base_slice = m5.iloc[i:i+params.base_len]
        atr_here = base_slice["atr"].iloc[-1]
        if not np.isfinite(atr_here) or atr_here <= 0:
            i += 1
            continue

        base_high = float(base_slice["high"].max())
        base_low  = float(base_slice["low"].min())
        base_range = base_high - base_low

        # Base validity
        if base_range > params.k_base_atr * atr_here:
            i += 1
            continue

        # Departure slice immediately after base
        dep_slice = m5.iloc[i+params.base_len:i+params.base_len+params.dep_len]

        # For simplicity v0.1: use LAST departure candle for checks
        dep_last = dep_slice.iloc[-1]
        dep_o, dep_h, dep_l, dep_c = map(float, [dep_last.open, dep_last.high, dep_last.low, dep_last.close])
        dep_rng = dep_h - dep_l
        dep_body = body_pct(dep_o, dep_h, dep_l, dep_c)

        if dep_rng < params.k_dep_atr * atr_here:
            i += 1
            continue
        if dep_body < params.min_body_pct:
            i += 1
            continue

        # Breakout direction check
        if params.breakout_mode == "close":
            broke_up = dep_c > base_high
            broke_dn = dep_c < base_low
        elif params.breakout_mode == "wick":
            broke_up = dep_h > base_high
            broke_dn = dep_l < base_low
        else:
            raise ValueError("breakout_mode must be 'close' or 'wick'")

        if not (broke_up or broke_dn):
            i += 1
            continue

        ztype = "demand" if broke_up else "supply"
        base_start_t = idx[i]
        base_end_t   = idx[i + params.base_len - 1]
        dep_end_t    = idx[i + params.base_len + params.dep_len - 1]
        created_time = dep_end_t  # zone formed after departure close

        # Zone boundary
        if params.zone_boundary == "wick_wick":
            zone_low = base_low
            zone_high = base_high
            distal = zone_low if ztype == "demand" else zone_high
            proximal = zone_high if ztype == "demand" else zone_low
        elif params.zone_boundary == "proximal_distal":
            # proximal/distal from base candles body
            base_open = base_slice["open"].astype(float)
            base_close = base_slice["close"].astype(float)
            if ztype == "demand":
                distal = base_low
                proximal = float(np.maximum(base_open, base_close).max())  # top of base bodies
                zone_low, zone_high = distal, proximal
            else:
                distal = base_high
                proximal = float(np.minimum(base_open, base_close).min())  # bottom of base bodies
                zone_low, zone_high = proximal, distal
        else:
            raise ValueError("zone_boundary must be 'proximal_distal' or 'wick_wick'")

        # Sanity: ensure zone_low < zone_high
        if not (zone_low < zone_high):
            i += 1
            continue

        out.append({
            "zone_id": zone_id,
            "type": ztype,
            "created_time": created_time,
            "base_start": base_start_t,
            "base_end": base_end_t,
            "departure_end": dep_end_t,
            "zone_low": float(zone_low),
            "zone_high": float(zone_high),
            "distal": float(distal),
            "proximal": float(proximal),
            "base_len": params.base_len,
            "dep_len": params.dep_len,
            "k_base_atr": params.k_base_atr,
            "k_dep_atr": params.k_dep_atr,
            "min_body_pct": params.min_body_pct,
            "breakout_mode": params.breakout_mode,
            "zone_boundary": params.zone_boundary,
        })
        zone_id += 1

        # Move forward (avoid overlapping excessive). v0.1: step by 1 candle to maximize samples
        i += 1

    return pd.DataFrame(out)

params = ZoneParams(
    base_len=2,
    k_base_atr=1.0,
    dep_len=1,
    k_dep_atr=1.2,
    min_body_pct=0.6,
    breakout_mode="close",
    zone_boundary="proximal_distal",
    fresh_only=True,
    max_zone_age_m5=48,
    max_tests=0,
)

zones = detect_zones_m5(df_m5, params)
zones.head(), zones.shape


## 6) Entry price mode (limit)
Kita pilih salah satu:
- proximal / mid / distal

Default: proximal (lebih dekat, fill lebih sering).

In [None]:
def compute_limit_price(zone_row: pd.Series, entry_price: str) -> float:
    if entry_price == "proximal":
        return float(zone_row["proximal"])
    if entry_price == "distal":
        return float(zone_row["distal"])
    if entry_price == "mid":
        return float((zone_row["zone_low"] + zone_row["zone_high"]) / 2.0)
    raise ValueError("entry_price must be proximal/mid/distal")

ENTRY_PRICE_MODE = "proximal"  # grid later


## 7) Simulasi trade (M1) — limit fill + SL/TP + time-exit
Aturan deterministik:
- Order aktif mulai **M1 setelah created_time + activate_delay_m1-1 menit**
- Fill jika `low <= limit <= high`
- Setelah filled, pantau M1 berikutnya sampai exit
- Intrabar ambiguity: jika candle menyentuh SL dan TP pada bar yang sama -> **SL_first**

Parameter mining utama:
- tp_R in [1.0, 1.5, 2.0, 3.0]
- sl_buffer_points in [0, 20, 50] (sesuaikan digit)
- max_hold_m1 optional (None/30/60/120)


In [None]:
@dataclass
class SimParams:
    entry_price: str = "proximal"
    activate_delay_m1: int = 1
    sl_buffer_points: float = 0.0  # points (instrument dependent)
    tp_R: float = 2.0
    max_hold_m1: Optional[int] = 60
    time_exit_mode: str = "next_open"  # only supported in v0.1
    fill_priority: str = "SL_first"    # only supported in v0.1

def simulate_trades_m1(
    m1: pd.DataFrame,
    zones: pd.DataFrame,
    zone_params: ZoneParams,
    sim: SimParams,
) -> Tuple[pd.DataFrame, pd.DataFrame]:
    """Return (orders_df, trades_df)."""
    if zones.empty:
        return pd.DataFrame(), pd.DataFrame()

    m1 = m1.copy()
    m1 = m1.sort_index()

    # Fast index lookup
    m1_index = m1.index
    def next_m1_time(t):
        # smallest index strictly greater than t
        pos = m1_index.searchsorted(t, side="right")
        if pos >= len(m1_index):
            return None
        return m1_index[pos]

    orders = []
    trades = []

    # For test counting: per zone touch count in M1
    tests = {int(zid): 0 for zid in zones.zone_id.values}
    disabled = set()

    for _, zr in zones.iterrows():
        zid = int(zr.zone_id)
        if zid in disabled:
            continue

        created = pd.Timestamp(zr.created_time)
        # activate time:
        # - first M1 after created_time = next_m1_time(created)
        # - then apply activate_delay_m1 (>=1)
        t0 = next_m1_time(created)
        if t0 is None:
            continue
        # apply delay
        pos0 = m1_index.get_indexer([t0])[0]
        pos_act = pos0 + (sim.activate_delay_m1 - 1)
        if pos_act >= len(m1_index):
            continue
        activate_time = m1_index[pos_act]

        # Expiry based on max_zone_age_m5
        # zone age counted from created_time in M5 candles => convert to time window
        # since M5 is right-labeled, approx = params.max_zone_age_m5 * 5 minutes
        expiry_time = created + pd.Timedelta(minutes=5 * zone_params.max_zone_age_m5)

        ztype = zr.type
        zone_low = float(zr.zone_low)
        zone_high = float(zr.zone_high)
        limit_price = compute_limit_price(zr, sim.entry_price)

        # SL (outside distal)
        if ztype == "demand":
            sl = float(zr.distal) - sim.sl_buffer_points
            side = "long"
            risk = limit_price - sl
            tp = limit_price + sim.tp_R * risk
        else:
            sl = float(zr.distal) + sim.sl_buffer_points
            side = "short"
            risk = sl - limit_price
            tp = limit_price - sim.tp_R * risk

        # invalid risk
        if risk <= 0:
            continue

        # Order scan window in M1
        start_pos = m1_index.searchsorted(activate_time, side="left")
        end_pos = m1_index.searchsorted(expiry_time, side="right")
        if start_pos >= len(m1_index):
            continue
        end_pos = min(end_pos, len(m1_index))

        status = "expired"
        fill_time = None

        # Fill scanning (also counts 'touch' for test lifecycle)
        for j in range(start_pos, end_pos):
            t = m1_index[j]
            row = m1.iloc[j]
            lo, hi = float(row.low), float(row.high)

            # count touch of zone (M1 overlap)
            overlap = not (hi < zone_low or lo > zone_high)
            if overlap:
                tests[zid] += 1
                # disable zone if exceed max_tests (0 means fresh only => first touch disables future usage)
                if zone_params.max_tests == 0 and tests[zid] >= 1:
                    # NOTE: still allow this order/trade; but future zones with same zid won't exist anyway.
                    pass
                elif zone_params.max_tests > 0 and tests[zid] > zone_params.max_tests:
                    disabled.add(zid)
                    status = "disabled"
                    break

            # fill check
            if lo <= limit_price <= hi:
                status = "filled"
                fill_time = t
                break

        orders.append({
            "zone_id": zid,
            "type": ztype,
            "side": side,
            "created_time": created,
            "activate_time": activate_time,
            "expiry_time": expiry_time,
            "limit_price": limit_price,
            "sl": sl,
            "tp": tp,
            "status": status,
            "fill_time": fill_time,
        })

        if status != "filled":
            continue

        # After fill: manage position from fill_time forward
        entry_pos = m1_index.searchsorted(fill_time, side="left")
        entry_price = limit_price

        max_exit_pos = len(m1_index) - 1
        if sim.max_hold_m1 is not None:
            max_exit_pos = min(max_exit_pos, entry_pos + sim.max_hold_m1)

        exit_reason = None
        exit_time = None
        exit_price = None

        for k in range(entry_pos, max_exit_pos + 1):
            t = m1_index[k]
            row = m1.iloc[k]
            lo, hi, o = float(row.low), float(row.high), float(row.open)

            if side == "long":
                hit_sl = (lo <= sl)
                hit_tp = (hi >= tp)
                if hit_sl and hit_tp:
                    # conservative
                    exit_reason = "SL"
                    exit_price = sl
                    exit_time = t
                    break
                if hit_sl:
                    exit_reason = "SL"
                    exit_price = sl
                    exit_time = t
                    break
                if hit_tp:
                    exit_reason = "TP"
                    exit_price = tp
                    exit_time = t
                    break
            else:
                hit_sl = (hi >= sl)
                hit_tp = (lo <= tp)
                if hit_sl and hit_tp:
                    exit_reason = "SL"
                    exit_price = sl
                    exit_time = t
                    break
                if hit_sl:
                    exit_reason = "SL"
                    exit_price = sl
                    exit_time = t
                    break
                if hit_tp:
                    exit_reason = "TP"
                    exit_price = tp
                    exit_time = t
                    break

        # time-exit if no TP/SL hit within window
        if exit_reason is None:
            if sim.max_hold_m1 is None:
                # no forced exit: mark open (should be rare)
                exit_reason = "OPEN"
                exit_time = m1_index[max_exit_pos]
                exit_price = float(m1.iloc[max_exit_pos].close)
            else:
                # next_open exit
                last_t = m1_index[max_exit_pos]
                next_t = next_m1_time(last_t)
                if next_t is None:
                    exit_reason = "TIME"
                    exit_time = last_t
                    exit_price = float(m1.loc[last_t, "close"])
                else:
                    exit_reason = "TIME"
                    exit_time = next_t
                    exit_price = float(m1.loc[next_t, "open"])

        # R multiple
        if side == "long":
            r = (exit_price - entry_price) / risk
        else:
            r = (entry_price - exit_price) / risk

        trades.append({
            "zone_id": zid,
            "side": side,
            "entry_time": fill_time,
            "entry_price": entry_price,
            "sl": sl,
            "tp": tp,
            "exit_time": exit_time,
            "exit_price": exit_price,
            "exit_reason": exit_reason,
            "risk": risk,
            "R": float(r),
            "tp_R": sim.tp_R,
            "sl_buffer_points": sim.sl_buffer_points,
            "entry_price_mode": sim.entry_price,
            "activate_delay_m1": sim.activate_delay_m1,
            "max_hold_m1": sim.max_hold_m1,
        })

    return pd.DataFrame(orders), pd.DataFrame(trades)

sim_params = SimParams(
    entry_price=ENTRY_PRICE_MODE,
    activate_delay_m1=1,
    sl_buffer_points=0.0,
    tp_R=2.0,
    max_hold_m1=60,
)

orders_df, trades_df = simulate_trades_m1(df_m1, zones, params, sim_params)
orders_df.head(), trades_df.head(), (len(orders_df), len(trades_df))


## 8) Metrics cepat (baseline)
Kita hitung:
- jumlah trade
- winrate (R>0)
- profit factor (sum pos R / abs(sum neg R))
- avg R, median R


In [None]:
def compute_metrics(trades: pd.DataFrame) -> Dict[str, float]:
    if trades.empty:
        return {
            "n": 0, "winrate": np.nan, "profit_factor": np.nan,
            "avg_R": np.nan, "median_R": np.nan, "sum_R": 0.0
        }
    r = trades["R"].astype(float)
    pos = r[r > 0].sum()
    neg = r[r < 0].sum()
    pf = (pos / abs(neg)) if neg < 0 else np.inf
    return {
        "n": int(len(trades)),
        "winrate": float((r > 0).mean()),
        "profit_factor": float(pf),
        "avg_R": float(r.mean()),
        "median_R": float(r.median()),
        "sum_R": float(r.sum()),
        "tp_hit_rate": float((trades.exit_reason == "TP").mean()),
        "sl_hit_rate": float((trades.exit_reason == "SL").mean()),
        "time_exit_rate": float((trades.exit_reason == "TIME").mean()),
    }

metrics = compute_metrics(trades_df)
metrics


## 9) Data Mining Grid Runner (v0.1)
Kita jalankan grid kecil dulu agar cepat, lalu bisa diperbesar.

> Catatan: Grid besar bisa berat. Mulai dari kecil, pastikan pipeline benar, baru scale up.


In [None]:
from itertools import product

def run_grid(
    m1: pd.DataFrame,
    m5: pd.DataFrame,
    zone_grid: List[ZoneParams],
    sim_grid: List[SimParams],
    max_zones_cap: Optional[int] = None,
) -> pd.DataFrame:
    rows = []
    scenario_id = 0
    for zp in zone_grid:
        zones = detect_zones_m5(m5, zp)
        if max_zones_cap is not None and len(zones) > max_zones_cap:
            zones = zones.iloc[:max_zones_cap].copy()

        for sp in sim_grid:
            orders_df, trades_df = simulate_trades_m1(m1, zones, zp, sp)
            met = compute_metrics(trades_df)
            rows.append({
                "scenario_id": scenario_id,
                **{k: getattr(zp, k) for k in zp.__dataclass_fields__.keys()},
                **{k: getattr(sp, k) for k in sp.__dataclass_fields__.keys()},
                **met
            })
            scenario_id += 1
    return pd.DataFrame(rows)

# --- Small grid example (adjust as needed) ---
zone_grid = [
    ZoneParams(base_len=2, k_base_atr=1.0, dep_len=1, k_dep_atr=1.2, min_body_pct=0.6, breakout_mode="close", zone_boundary="proximal_distal", fresh_only=True, max_zone_age_m5=48, max_tests=0),
    ZoneParams(base_len=3, k_base_atr=1.0, dep_len=1, k_dep_atr=1.2, min_body_pct=0.6, breakout_mode="close", zone_boundary="proximal_distal", fresh_only=True, max_zone_age_m5=48, max_tests=0),
]

sim_grid = [
    SimParams(entry_price="proximal", activate_delay_m1=1, sl_buffer_points=0.0, tp_R=1.5, max_hold_m1=60),
    SimParams(entry_price="proximal", activate_delay_m1=1, sl_buffer_points=0.0, tp_R=2.0, max_hold_m1=60),
    SimParams(entry_price="mid",      activate_delay_m1=1, sl_buffer_points=0.0, tp_R=2.0, max_hold_m1=60),
]

results = run_grid(df_m1, df_m5, zone_grid, sim_grid, max_zones_cap=None)
results.sort_values(["profit_factor","sum_R"], ascending=False).head(20)


## 10) Export artifacts
- `zones.csv` (baseline)
- `orders.csv` (baseline)
- `trades.csv` (baseline)
- `grid_results.csv` (data mining summary)

In [None]:
OUT_DIR = "out_snd_v0_1"
import os
os.makedirs(OUT_DIR, exist_ok=True)

zones.to_csv(f"{OUT_DIR}/zones_baseline.csv", index=False)
orders_df.to_csv(f"{OUT_DIR}/orders_baseline.csv", index=False)
trades_df.to_csv(f"{OUT_DIR}/trades_baseline.csv", index=False)
results.to_csv(f"{OUT_DIR}/grid_results.csv", index=False)

print("Saved to:", OUT_DIR)


## 11) Next audit checklist (manual)
Setelah kamu jalankan notebook ini, kirimkan:
1) `results.head(10)` dan `results.describe()`
2) contoh 5 trade pertama dari `trades_df`
3) distribusi `exit_reason`

Audit yang kita cari:
- terlalu banyak fill karena zone terlalu lebar?
- winrate vs profit_factor masuk akal?
- banyak TIME exit (berarti TP terlalu jauh / max_hold terlalu kecil)?
