# Breakout + Liquidity Sweep (Audit Fix) â€” Causal Signals + Walk-Forward Backtest
Notebook ini memperbaiki **look-ahead bias** pada pivot (rolling `center=True`) menjadi **pivot confirmed** (delay `w`).
Backtest **bar-by-bar**: signal terbentuk di close bar `t`, entry di **open bar t+1**. Intrabar konflik TP/SL diselesaikan konservatif (**SL-first**).
Walk-forward: optimisasi parameter hanya di train window, lalu dievaluasi di test window.


In [None]:

import numpy as np
import pandas as pd

# ========= CONFIG =========
CSV_PATH = "EURUSD_Candlestick_1_Hour_BID_04.05.2003-15.04.2023.csv"   # ganti sesuai data kamu
TIME_COL = None  # set ke nama kolom waktu jika ada, misal "Local time" / "Time"
TZ = "UTC"       # hanya metadata; data diasumsikan sudah rapi

# Signal params (structure)
BACKCANDLES = 40         # lookback untuk cari pivot yang relevan (dalam bar confirmed time)
PIVOT_W = 5              # pivot confirmation delay (w). pivot terjadi di t-w, confirmed di t
EMA_LEN = 150
ATR_LEN = 14

# Walk-forward
TRAIN_BARS = 24*30*6     # ~6 bulan untuk H1 (perkiraan). sesuaikan timeframe kamu
TEST_BARS  = 24*30*1     # ~1 bulan
STEP_BARS  = TEST_BARS   # rolling forward

# Optimization grid (train only)
ATR_MULT_GRID = np.arange(0.5, 5.1, 0.5)
TP_SL_GRID    = np.arange(0.5, 5.1, 0.5)

# Execution
SPREAD = 0.0             # jika mau simulasikan spread fixed
SL_FIRST_IF_BOTH = True  # konservatif

np.set_printoptions(suppress=True, linewidth=200)
pd.set_option("display.width", 200)
pd.set_option("display.max_columns", 50)


In [None]:

import pandas_ta as ta

df = pd.read_csv(CSV_PATH)

# Basic cleanup: drop zero volume if exists
if "Volume" in df.columns:
    df = df[df["Volume"] != 0].copy()

df = df.reset_index(drop=True)

# Optional time parsing
if TIME_COL and TIME_COL in df.columns:
    df[TIME_COL] = pd.to_datetime(df[TIME_COL], utc=True, errors="coerce")
    df = df.dropna(subset=[TIME_COL]).copy()
    df = df.sort_values(TIME_COL).reset_index(drop=True)

# Ensure standard OHLC column names
# If your CSV uses lowercase or different names, map here.
colmap = {}
for c in ["Open","High","Low","Close"]:
    if c not in df.columns and c.lower() in df.columns:
        colmap[c.lower()] = c
if colmap:
    df = df.rename(columns=colmap)

assert all(c in df.columns for c in ["Open","High","Low","Close"]), df.columns

# Indicators (causal)
df["EMA"] = ta.ema(df["Close"], length=EMA_LEN)
df["ATR"] = ta.atr(df["High"], df["Low"], df["Close"], length=ATR_LEN)

# Trend filter similar to notebook
backcandles = 15
win = backcandles + 1
above = (np.minimum(df["Open"], df["Close"]) > df["EMA"]).astype(int)
below = (np.maximum(df["Open"], df["Close"]) < df["EMA"]).astype(int)
upt = (above.rolling(win, min_periods=win).sum() == win)
dnt = (below.rolling(win, min_periods=win).sum() == win)

emasig = np.zeros(len(df), dtype=np.int8)
emasig[ upt  & ~dnt] = 2
emasig[ dnt  & ~upt] = 1
emasig[ upt  &  dnt] = 3
df["EMASignal"] = emasig

df.head()


In [None]:

def pivots_confirmed(df: pd.DataFrame, w: int) -> pd.DataFrame:
    """
    Causal pivot confirmation:
    At time t, we can confirm whether candle (t-w) was a pivot high/low,
    because we have observed candles (t-2w .. t).

    Returns columns at time t:
      PH_conf, PL_conf : bool
      PH_price, PL_price : price of the pivot candle (t-w)
      PH_idx, PL_idx : integer index of pivot candle (t-w)
    """
    high = df["High"].astype(float)
    low  = df["Low"].astype(float)

    span = 2*w + 1  # window ending at t covers [t-2w .. t]

    cand_high = high.shift(w)  # candidate pivot candle's high at time t
    cand_low  = low.shift(w)

    roll_max = high.rolling(span, min_periods=span).max()
    roll_min = low.rolling(span, min_periods=span).min()

    ph = (cand_high >= roll_max) & roll_max.notna()
    pl = (cand_low  <= roll_min) & roll_min.notna()

    out = df.copy()
    out["PH_conf"] = ph.fillna(False).astype(bool)
    out["PL_conf"] = pl.fillna(False).astype(bool)

    out["PH_price"] = np.where(out["PH_conf"], cand_high, np.nan)
    out["PL_price"] = np.where(out["PL_conf"], cand_low,  np.nan)

    idx = np.arange(len(df))
    out["PH_idx"] = np.where(out["PH_conf"], idx - w, np.nan)
    out["PL_idx"] = np.where(out["PL_conf"], idx - w, np.nan)
    return out

df = pivots_confirmed(df, PIVOT_W)

# quick sanity
df[["PH_conf","PL_conf","PH_idx","PL_idx","PH_price","PL_price"]].tail(20)


In [None]:

from collections import deque

def compute_breakout_signals(df: pd.DataFrame, backcandles: int) -> pd.DataFrame:
    """
    Produces:
      breakout_signal: 2=long, 1=short, 0=none
      pivot_ref_idx  : original pivot candle index used as key level (PH for long, PL for short)
      key_level      : level price used for breakout check
    Signal is generated on bar t close, intended to enter at bar t+1 open.
    """
    n = len(df)
    sig = np.zeros(n, dtype=np.int8)
    ref = np.full(n, np.nan)
    lvl = np.full(n, np.nan)

    # Store confirmed pivots as (pivot_idx, price)
    ph = deque()
    pl = deque()

    closes = df["Close"].values.astype(float)

    for t in range(n):
        # ingest confirmations at time t
        if bool(df["PH_conf"].iloc[t]):
            ph.append((int(df["PH_idx"].iloc[t]), float(df["PH_price"].iloc[t])))
        if bool(df["PL_conf"].iloc[t]):
            pl.append((int(df["PL_idx"].iloc[t]), float(df["PL_price"].iloc[t])))

        # drop old pivots outside backcandles window relative to *current time*
        min_idx = t - backcandles
        while ph and ph[0][0] < min_idx:
            ph.popleft()
        while pl and pl[0][0] < min_idx:
            pl.popleft()

        if t == 0:
            continue

        close_now = closes[t]
        close_prev = closes[t-1]

        # ---------- Bullish ----------
        if ph:
            ph_idx, ph_price = ph[-1]

            # get last two PL after this PH
            pls_after = [x for x in pl if x[0] > ph_idx]
            if len(pls_after) >= 2:
                pl1_idx, pl1_price = pls_after[-2]
                pl2_idx, pl2_price = pls_after[-1]
                if pl2_price < pl1_price:
                    # first close cross above PH
                    if (close_prev <= ph_price) and (close_now > ph_price):
                        sig[t] = 2
                        ref[t] = ph_idx
                        lvl[t] = ph_price

        # ---------- Bearish ----------
        if sig[t] == 0 and pl:
            pl_idx, pl_price = pl[-1]
            phs_after = [x for x in ph if x[0] > pl_idx]
            if len(phs_after) >= 2:
                ph1_idx, ph1_price = phs_after[-2]
                ph2_idx, ph2_price = phs_after[-1]
                if ph2_price > ph1_price:
                    if (close_prev >= pl_price) and (close_now < pl_price):
                        sig[t] = 1
                        ref[t] = pl_idx
                        lvl[t] = pl_price

    out = df.copy()
    out["breakout_signal_raw"] = sig
    out["pivot_ref_idx"] = ref
    out["key_level"] = lvl
    return out

df = compute_breakout_signals(df, BACKCANDLES)

# Apply EMA direction filter similar intent:
# Long only when EMASignal==2, Short only when EMASignal==1
df["breakout_signal"] = 0
df.loc[(df["breakout_signal_raw"]==2) & (df["EMASignal"]==2), "breakout_signal"] = 2
df.loc[(df["breakout_signal_raw"]==1) & (df["EMASignal"]==1), "breakout_signal"] = 1

df[df["breakout_signal"]!=0][["breakout_signal","pivot_ref_idx","key_level","Close","EMA","EMASignal"]].head(20)


In [None]:

def run_backtest_bar_by_bar(df: pd.DataFrame, atr_multiplier: float, tp_sl_ratio: float) -> pd.DataFrame:
    """
    Single-position backtest.
    - Signal evaluated on bar t close
    - Entry on bar t+1 open
    - SL/TP based on ATR at bar t (known at close)
    - Intrabar hits checked on each subsequent bar using High/Low
    - If both TP & SL hit in same bar -> SL-first if SL_FIRST_IF_BOTH else TP-first
    """
    n = len(df)
    o = df["Open"].values.astype(float)
    h = df["High"].values.astype(float)
    l = df["Low"].values.astype(float)
    c = df["Close"].values.astype(float)
    atr = df["ATR"].values.astype(float)
    sig = df["breakout_signal"].values.astype(int)

    trades = []
    pos = 0          # 0 flat, 1 long, -1 short
    entry = sl = tp = np.nan
    entry_t = None
    dist = np.nan

    for t in range(1, n-1):  # need t+1 for entry
        if pos == 0:
            if sig[t] == 2 or sig[t] == 1:
                if not np.isfinite(atr[t]) or atr[t] <= 0:
                    continue
                # enter next bar open
                entry_t = t+1
                entry_price = o[entry_t] + SPREAD/2.0 if sig[t]==2 else o[entry_t] - SPREAD/2.0

                dist = atr[t] * atr_multiplier
                if dist <= 0:
                    continue

                if sig[t] == 2:
                    pos = 1
                    entry = entry_price
                    sl = entry - dist
                    tp = entry + dist * tp_sl_ratio
                else:
                    pos = -1
                    entry = entry_price
                    sl = entry + dist
                    tp = entry - dist * tp_sl_ratio
        else:
            # manage open position at bar t
            # start managing only from entry bar onwards
            if t < entry_t:
                continue

            hi = h[t]; lo = l[t]

            exit_price = np.nan
            exit_reason = None

            if pos == 1:
                hit_sl = lo <= sl
                hit_tp = hi >= tp
                if hit_sl and hit_tp:
                    exit_price = sl if SL_FIRST_IF_BOTH else tp
                    exit_reason = "SL&TP" if SL_FIRST_IF_BOTH else "TP&SL"
                elif hit_sl:
                    exit_price = sl; exit_reason = "SL"
                elif hit_tp:
                    exit_price = tp; exit_reason = "TP"

                if exit_reason:
                    r = (exit_price - entry) / dist
                    trades.append({
                        "side":"LONG",
                        "signal_bar": entry_t-1,
                        "entry_bar": entry_t,
                        "exit_bar": t,
                        "entry": entry,
                        "exit": exit_price,
                        "sl": sl,
                        "tp": tp,
                        "dist": dist,
                        "R": r,
                        "exit_reason": exit_reason
                    })
                    pos = 0

            else:  # short
                hit_sl = hi >= sl
                hit_tp = lo <= tp
                if hit_sl and hit_tp:
                    exit_price = sl if SL_FIRST_IF_BOTH else tp
                    exit_reason = "SL&TP" if SL_FIRST_IF_BOTH else "TP&SL"
                elif hit_sl:
                    exit_price = sl; exit_reason = "SL"
                elif hit_tp:
                    exit_price = tp; exit_reason = "TP"

                if exit_reason:
                    r = (entry - exit_price) / dist
                    trades.append({
                        "side":"SHORT",
                        "signal_bar": entry_t-1,
                        "entry_bar": entry_t,
                        "exit_bar": t,
                        "entry": entry,
                        "exit": exit_price,
                        "sl": sl,
                        "tp": tp,
                        "dist": dist,
                        "R": r,
                        "exit_reason": exit_reason
                    })
                    pos = 0

    return pd.DataFrame(trades)

# quick single run
tr = run_backtest_bar_by_bar(df, atr_multiplier=1.5, tp_sl_ratio=1.0)
tr.head(), len(tr), tr["R"].sum() if len(tr) else 0


In [None]:

def score_trades(trades: pd.DataFrame) -> float:
    # objective: maximize total R
    if trades is None or len(trades)==0:
        return -1e9
    return float(trades["R"].sum())

def walk_forward(df: pd.DataFrame,
                 train_bars: int,
                 test_bars: int,
                 step_bars: int,
                 atr_grid,
                 tp_grid) -> tuple[pd.DataFrame, pd.DataFrame]:
    """
    Walk-forward (rolling):
      For each fold:
        - optimize params on train window ONLY
        - apply best params to test window
    Signals are recomputed on prefix up to (test_end) so pivots can carry context from train into test,
    but no future beyond test_end is used.
    """
    all_test_trades = []
    folds = []

    start = 0
    fold_id = 0
    n = len(df)

    while start + train_bars + test_bars <= n:
        train_start = start
        train_end   = start + train_bars
        test_start  = train_end
        test_end    = train_end + test_bars

        # recompute pivots+signals on prefix up to test_end (causal)
        dfx = df.iloc[:test_end].copy()
        dfx = pivots_confirmed(dfx, PIVOT_W)
        dfx = compute_breakout_signals(dfx, BACKCANDLES)
        dfx["breakout_signal"] = 0
        dfx.loc[(dfx["breakout_signal_raw"]==2) & (dfx["EMASignal"]==2), "breakout_signal"] = 2
        dfx.loc[(dfx["breakout_signal_raw"]==1) & (dfx["EMASignal"]==1), "breakout_signal"] = 1

        # train slice & optimize
        best = None
        best_score = -1e18
        for am in atr_grid:
            for rr in tp_grid:
                trn = run_backtest_bar_by_bar(dfx.iloc[train_start:train_end].reset_index(drop=True), float(am), float(rr))
                sc = score_trades(trn)
                if sc > best_score:
                    best_score = sc
                    best = (float(am), float(rr))

        best_am, best_rr = best

        # test with best params (IMPORTANT: carry context from previous bars inside test slice)
        # To allow open trades to be handled properly strictly within test, we backtest only the test slice.
        tst = run_backtest_bar_by_bar(dfx.iloc[test_start:test_end].reset_index(drop=True), best_am, best_rr)
        if len(tst):
            tst["fold_id"] = fold_id
            tst["train_start"] = train_start
            tst["train_end"] = train_end
            tst["test_start"] = test_start
            tst["test_end"] = test_end
            tst["atr_multiplier"] = best_am
            tst["tp_sl_ratio"] = best_rr
            all_test_trades.append(tst)

        folds.append({
            "fold_id": fold_id,
            "train_start": train_start, "train_end": train_end,
            "test_start": test_start, "test_end": test_end,
            "best_atr_multiplier": best_am,
            "best_tp_sl_ratio": best_rr,
            "train_score_totalR": best_score,
            "test_totalR": float(tst["R"].sum()) if len(tst) else 0.0,
            "test_n": int(len(tst))
        })

        fold_id += 1
        start += step_bars

    trades_all = pd.concat(all_test_trades, ignore_index=True) if all_test_trades else pd.DataFrame()
    folds_df = pd.DataFrame(folds)
    return trades_all, folds_df

wf_trades, wf_folds = walk_forward(
    df,
    train_bars=TRAIN_BARS,
    test_bars=TEST_BARS,
    step_bars=STEP_BARS,
    atr_grid=ATR_MULT_GRID,
    tp_grid=TP_SL_GRID
)

wf_folds.head(), wf_trades.head(), len(wf_trades)


In [None]:

def summarize(trades: pd.DataFrame) -> pd.Series:
    if trades is None or len(trades)==0:
        return pd.Series({"n":0,"sumR":0,"avgR":0,"winrate":0})
    n=len(trades)
    sumR=float(trades["R"].sum())
    avgR=float(trades["R"].mean())
    win=float((trades["R"]>0).mean())
    return pd.Series({"n":n,"sumR":sumR,"avgR":avgR,"winrate":win})

print("WF folds:", len(wf_folds))
display(wf_folds.tail(10))

print("\nOverall test trades summary:")
display(summarize(wf_trades))

if len(wf_trades):
    # equity curve in R
    eq = wf_trades["R"].cumsum()
    import matplotlib.pyplot as plt
    plt.figure(figsize=(12,4))
    plt.plot(eq.values)
    plt.title("Walk-forward cumulative R (test only)")
    plt.xlabel("Trade #")
    plt.ylabel("Cumulative R")
    plt.show()
