In [5]:
import pandas as pd

df_regime = pd.read_csv("df_regime.csv")

df_strategy = df_regime.copy()
df_strategy = df_strategy.sort_index()
df_strategy.reset_index(drop=True, inplace=True)


In [7]:
df_strategy.head()


Unnamed: 0,timestamp,open,high,low,close,volume,ema_5,ema_15,spot_return,futures_return,...,avg_iv,iv_spread,pcr_oi,pcr_volume,regime,market_regime,regime_change,regime_id,market_regime_smooth,regime_group
0,2025-08-25,226.48,229.3,226.23,227.16,30983133,227.56,227.685,-0.002634,0.0,...,0.2,0.0,0.573228,0.473839,1,1,True,1,1.0,1
1,2025-08-26,226.87,229.49,224.69,229.31,54575107,228.143333,227.888125,0.009465,0.0,...,0.2,0.0,0.573228,0.473839,0,-1,True,2,1.0,2
2,2025-08-27,228.61,230.9,228.26,230.49,31259513,228.925556,228.213359,0.005146,0.0,...,0.2,0.0,0.573228,0.473839,1,1,True,3,-1.0,3
3,2025-08-28,230.82,233.41,229.335,232.56,38074700,230.137037,228.756689,0.008981,0.0,...,0.2,0.0,0.573228,0.473839,0,-1,True,4,1.0,4
4,2025-08-29,232.51,233.38,231.37,232.14,39418437,230.804691,229.179603,-0.001806,0.0,...,0.2,0.0,0.573228,0.473839,1,1,True,5,-1.0,5


In [9]:
# -------- ENSURE TIMESTAMP IS DATETIME INDEX --------
df_strategy = df_strategy.copy()

df_strategy['timestamp'] = pd.to_datetime(df_strategy['timestamp'])
df_strategy = df_strategy.sort_values('timestamp')
df_strategy = df_strategy.set_index('timestamp')

# -------- VALIDATE EMA COLUMNS --------
if 'ema_5' not in df_strategy.columns or 'ema_15' not in df_strategy.columns:
    raise ValueError("EMA columns missing. Do not proceed.")

# -------- FINAL CLEAN --------
required_cols = ['open', 'high', 'low', 'close', 'ema_5', 'ema_15', 'regime']
df_strategy = df_strategy.dropna(subset=required_cols)

print("✅ Final dataframe ready for Task 4")
print("Shape:", df_strategy.shape)
print(df_strategy[['ema_5', 'ema_15', 'regime']].head())


✅ Final dataframe ready for Task 4
Shape: (99, 22)
                 ema_5      ema_15  regime
timestamp                                 
2025-08-25  227.560000  227.685000       1
2025-08-26  228.143333  227.888125       0
2025-08-27  228.925556  228.213359       1
2025-08-28  230.137037  228.756689       0
2025-08-29  230.804691  229.179603       1


In [11]:
# -------- EMA CROSSOVER LOGIC --------
df_strategy = df_strategy.copy()

df_strategy['ema_diff'] = df_strategy['ema_5'] - df_strategy['ema_15']
df_strategy['ema_diff_prev'] = df_strategy['ema_diff'].shift(1)

# Crossovers
df_strategy['cross_up'] = (
    (df_strategy['ema_diff'] > 0) &
    (df_strategy['ema_diff_prev'] <= 0)
)

df_strategy['cross_down'] = (
    (df_strategy['ema_diff'] < 0) &
    (df_strategy['ema_diff_prev'] >= 0)
)

# Cleanup helper column
df_strategy.drop(columns=['ema_diff_prev'], inplace=True)

print("✅ EMA crossover signals created")
print(df_strategy[['ema_5', 'ema_15', 'cross_up', 'cross_down']].head(10))


✅ EMA crossover signals created
                 ema_5      ema_15  cross_up  cross_down
timestamp                                               
2025-08-25  227.560000  227.685000     False       False
2025-08-26  228.143333  227.888125      True       False
2025-08-27  228.925556  228.213359     False       False
2025-08-28  230.137037  228.756689     False       False
2025-08-29  230.804691  229.179603     False       False
2025-09-02  230.443128  229.247153     False       False
2025-09-03  233.118752  230.400009     False       False
2025-09-04  235.339168  231.572508     False       False
2025-09-05  236.789445  232.587194     False       False
2025-09-08  237.152963  233.248795     False       False


In [19]:
# -------- REGIME-CONFIRMED STRATEGY SIGNALS --------
df_strategy = df_strategy.copy()

df_strategy['signal'] = 0

# LONG: crossover at t, regime confirmed at t+1
df_strategy.loc[
    (df_strategy['cross_up']) &
    (df_strategy['regime'].shift(-1) == 1),
    'signal'
] = 1

# SHORT: crossover at t, regime confirmed at t+1
df_strategy.loc[
    (df_strategy['cross_down']) &
    (df_strategy['regime'].shift(-1) == -1),
    'signal'
] = -1

df_strategy['signal'] = df_strategy['signal'].fillna(0)

print("✅ Regime-confirmed signals created")
print(df_strategy['signal'].value_counts())


✅ Regime-confirmed signals created
signal
0    95
1     4
Name: count, dtype: int64


In [21]:
# -------- POSITION EXECUTION (NEXT CANDLE OPEN) --------
df_strategy = df_strategy.copy()

df_strategy['position'] = 0

current_position = 0

for i in range(len(df_strategy) - 1):
    signal = df_strategy.iloc[i]['signal']

    # ENTRY LOGIC (next candle)
    if current_position == 0:
        if signal == 1:
            current_position = 1
        elif signal == -1:
            current_position = -1

    # EXIT LOGIC
    elif current_position == 1 and df_strategy.iloc[i]['cross_down']:
        current_position = 0

    elif current_position == -1 and df_strategy.iloc[i]['cross_up']:
        current_position = 0

    df_strategy.iloc[i + 1, df_strategy.columns.get_loc('position')] = current_position

print("✅ Positions executed at next candle open")
print(df_strategy[['signal', 'position']].value_counts())


✅ Positions executed at next candle open
signal  position
0       1           70
        0           25
1       0            4
Name: count, dtype: int64


In [23]:
# -------- SANITY CHECK --------
total_trades = (df_strategy['position'].diff().abs() == 1).sum()

print("Total position changes (entries + exits):", total_trades)

if total_trades == 0:
    raise RuntimeError("No trades generated. Metrics will be zero/NaN.")


Total position changes (entries + exits): 8


## Task 4.2

## Train–Test Split

In [25]:
# -------- STRATEGY RETURNS --------
df_bt = df_strategy.copy()

# Open-to-open returns
df_bt['open_return'] = df_bt['open'].pct_change().fillna(0)

# Position applied from previous candle (execution safety)
df_bt['position_lag'] = df_bt['position'].shift(1).fillna(0)

# Strategy returns (long & short aware)
df_bt['strategy_return'] = df_bt['position_lag'] * df_bt['open_return']

print("✅ Strategy returns computed")
print(df_bt[['position', 'position_lag', 'open_return', 'strategy_return']].head(10))


✅ Strategy returns computed
            position  position_lag  open_return  strategy_return
timestamp                                                       
2025-08-25         0           0.0     0.000000         0.000000
2025-08-26         0           0.0     0.001722         0.000000
2025-08-27         1           0.0     0.007670         0.000000
2025-08-28         1           1.0     0.009667         0.009667
2025-08-29         1           1.0     0.007322         0.007322
2025-09-02         1           1.0    -0.014021        -0.014021
2025-09-03         1           1.0     0.034722         0.034722
2025-09-04         1           1.0     0.005227         0.005227
2025-09-05         1           1.0     0.006479         0.006479
2025-09-08         1           1.0    -0.002896        -0.002896


In [27]:
# -------- TRAIN / TEST SPLIT --------
split_idx = int(0.7 * len(df_bt))

train_df = df_bt.iloc[:split_idx].copy()
test_df  = df_bt.iloc[split_idx:].copy()

print("Train period:", train_df.index.min(), "→", train_df.index.max())
print("Test period :", test_df.index.min(), "→", test_df.index.max())
print("Train rows:", len(train_df), "| Test rows:", len(test_df))


Train period: 2025-08-25 00:00:00 → 2025-12-01 00:00:00
Test period : 2025-12-02 00:00:00 → 2026-01-14 00:00:00
Train rows: 69 | Test rows: 30


In [31]:
import numpy as np

EPS = 1e-9

def total_return(returns):
    return (1 + returns).prod() - 1

def sharpe_ratio(returns):
    std = returns.std()
    if std < EPS:
        return 0.0
    return np.sqrt(252) * returns.mean() / std

def sortino_ratio(returns):
    downside = returns[returns < 0]
    downside_std = downside.std()
    if downside_std < EPS:
        return 0.0
    return np.sqrt(252) * returns.mean() / downside_std

def max_drawdown(returns):
    cum = (1 + returns).cumprod()
    peak = cum.cummax()
    drawdown = (cum - peak) / peak
    return drawdown.min()

def calmar_ratio(returns):
    mdd = abs(max_drawdown(returns))
    if mdd < EPS:
        return 0.0
    return total_return(returns) / mdd

def win_rate(returns):
    trades = returns[returns != 0]
    if len(trades) == 0:
        return 0.0
    return (trades > 0).mean()

def profit_factor(returns):
    profits = returns[returns > 0].sum()
    losses = abs(returns[returns < 0].sum())
    if losses < EPS:
        return 0.0
    return profits / losses

def trade_stats(df):
    pos_changes = df['position'].diff().fillna(0)
    entries = pos_changes.abs() == 1
    total_trades = entries.sum() // 2

    durations = []
    entry_idx = None

    for i, change in enumerate(pos_changes):
        if change != 0 and entry_idx is None:
            entry_idx = i
        elif change != 0 and entry_idx is not None:
            durations.append(i - entry_idx)
            entry_idx = None

    avg_duration = np.mean(durations) if durations else 0.0
    return int(total_trades), avg_duration


## Total Return

In [33]:
print("TOTAL RETURN")
print("Train:", total_return(train_df['strategy_return']))
print("Test :", total_return(test_df['strategy_return']))


TOTAL RETURN
Train: 0.08730425080473947
Test : -0.018668393223265523


## Sharpe Ratio

In [35]:
print("SHARPE RATIO")
print("Train:", sharpe_ratio(train_df['strategy_return']))
print("Test :", sharpe_ratio(test_df['strategy_return']))


SHARPE RATIO
Train: 1.7406737428713703
Test : -1.3221645682363417


## Sortino Ratio

In [37]:
print("SORTINO RATIO")
print("Train:", sortino_ratio(train_df['strategy_return']))
print("Test :", sortino_ratio(test_df['strategy_return']))



SORTINO RATIO
Train: 2.874205216948388
Test : -1.0943526250756348


## Max Drawdown

In [39]:
print("MAX DRAWDOWN")
print("Train:", max_drawdown(train_df['strategy_return']))
print("Test :", max_drawdown(test_df['strategy_return']))


MAX DRAWDOWN
Train: -0.05466780557928317
Test : -0.046750524109014736


## Calmar Ratio

In [41]:
print("CALMAR RATIO")
print("Train:", calmar_ratio(train_df['strategy_return']))
print("Test :", calmar_ratio(test_df['strategy_return']))


CALMAR RATIO
Train: 1.5969957066984257
Test : -0.3993194424886835


## Trade Detection

In [123]:
test_df["trade_change"] = test_df["position"].diff().abs()

total_trades = int(test_df["trade_change"].sum())
total_trades


0

## Win Rate 

In [44]:
print("WIN RATE")
print("Train:", win_rate(train_df['strategy_return']))
print("Test :", win_rate(test_df['strategy_return']))


WIN RATE
Train: 0.6101694915254238
Test : 0.45454545454545453


## Total Trade and Average Trade Duration

In [51]:
train_trades, train_avg_duration = trade_stats(train_df)
test_trades, test_avg_duration = trade_stats(test_df)

print("TRADE STATISTICS")
print("Train → Total Trades:", train_trades,
      "| Avg Trade Duration:", train_avg_duration)

print("Test  → Total Trades:", test_trades,
      "| Avg Trade Duration:", test_avg_duration)


TRADE STATISTICS
Train → Total Trades: 3 | Avg Trade Duration: 18.333333333333332
Test  → Total Trades: 0 | Avg Trade Duration: 0.0


## PROFIT FACTOR

In [64]:
print("PROFIT FACTOR")
print("Train:", profit_factor(train_df['strategy_return']))
print("Test :", profit_factor(test_df['strategy_return']))


PROFIT FACTOR
Train: 1.364961802197169
Test : 0.7006252174481647


In [66]:
# Save backtest dataframe for ML tasks
df_bt.to_csv("df_backtest.csv")
print("df_backtest.csv saved successfully")


df_backtest.csv saved successfully
