In [3]:
# ---- STEP 1: Install dependencies ----
!pip install yfinance pandas numpy matplotlib tqdm pandas_ta --quiet


In [4]:
# ---- STEP 2: Import libraries ----
import yfinance as yf
import pandas as pd
import numpy as np
import pandas_ta as ta
from tqdm import tqdm
import matplotlib.pyplot as plt
import datetime


ModuleNotFoundError: No module named 'yfinance'

In [None]:
# ---- STEP 3: Parameters ----
# You can tweak these to test variations

START_DATE = "2010-01-01"        # backtest start
END_DATE   = "2025-10-15"        # end date
VOL_LOOKBACK = 6                 # months of avg volume to compare
EMA_PERIOD = 12                  # 12-month EMA
TRAIL_STOP_PCT = 0.20            # 20% trailing stop
MAX_HOLD_MONTHS = 12             # max months to hold a trade
TXN_COST = 0.0005                # 0.05% brokerage per side
SLIPPAGE = 0.001                 # 0.1% slippage on entry/exit
MIN_MONTHS_HISTORY = 36          # skip if shorter history
USE_ATR = False                  # switch True to use ATR stop instead of %
ATR_MULTIPLIER = 3.0             # ATR stop distance if used


In [None]:
# ---- STEP 4: NSE tickers ----
# ⚠️ Replace this sample list with your full NSE-500 tickers.
# Each ticker should be without ".NS" suffix — Yahoo uses .NS for India.
tickers = [
    "RELIANCE", "TCS", "INFY", "HDFCBANK", "ICICIBANK",
    "LT", "SBIN", "BHARTIARTL", "ASIANPAINT", "BAJFINANCE"
]

print(f"Loaded {len(tickers)} tickers. You can expand this list for full NSE500.")


In [None]:
# ---- STEP 5: Helper functions ----
def download_monthly_data(ticker):
    """Download daily data and resample to monthly OHLCV"""
    try:
        df = yf.download(f"{ticker}.NS", start=START_DATE, end=END_DATE, progress=False)
        if df.empty:
            return None
        df.index = pd.to_datetime(df.index)
        m = pd.DataFrame({
            'open': df['Open'].resample('M').first(),
            'high': df['High'].resample('M').max(),
            'low': df['Low'].resample('M').min(),
            'close': df['Close'].resample('M').last(),
            'volume': df['Volume'].resample('M').sum()
        }).dropna()
        if len(m) < MIN_MONTHS_HISTORY:
            return None
        m['ema12'] = m['close'].ewm(span=EMA_PERIOD, adjust=False).mean()
        tr = pd.concat([
            m['high'] - m['low'],
            (m['high'] - m['close'].shift(1)).abs(),
            (m['low'] - m['close'].shift(1)).abs()
        ], axis=1).max(axis=1)
        m['atr'] = tr.rolling(EMA_PERIOD).mean()
        return m
    except Exception as e:
        print(f"Error downloading {ticker}: {e}")
        return None

def backtest_ticker(monthly, ticker):
    """Find breakout signals and simulate trailing stop exits"""
    trades = []
    m = monthly.copy()
    m['prior_ath'] = m['close'].expanding().max().shift(1)
    m['vol_avg'] = m['volume'].rolling(VOL_LOOKBACK).mean().shift(0)
    for i in range(len(m)):
        r = m.iloc[i]
        if pd.isna(r['prior_ath']): 
            continue
        # --- Entry conditions ---
        if (r['close'] > r['prior_ath']) and (r['volume'] > r['vol_avg']) and (r['close'] > r['ema12']):
            entry_price = r['close'] * (1 + SLIPPAGE)
            entry_date = m.index[i]
            highest = entry_price
            stop = entry_price * (1 - TRAIL_STOP_PCT)
            for j in range(i+1, len(m)):
                rr = m.iloc[j]
                # Update trailing stop
                if rr['high'] > highest:
                    highest = rr['high']
                if USE_ATR:
                    stop = highest - (ATR_MULTIPLIER * rr['atr'])
                else:
                    stop = highest * (1 - TRAIL_STOP_PCT)
                # Exit check
                if rr['low'] <= stop:
                    exit_price = stop * (1 - SLIPPAGE)
                    exit_date = m.index[j]
                    reason = "stop"
                    break
                if (j - i) >= MAX_HOLD_MONTHS:
                    exit_price = rr['close'] * (1 - SLIPPAGE)
                    exit_date = m.index[j]
                    reason = "max_hold"
                    break
            else:
                # If never exited before end of data
                exit_price = m['close'].iloc[-1] * (1 - SLIPPAGE)
                exit_date = m.index[-1]
                reason = "data_end"
            gross_ret = (exit_price - entry_price) / entry_price
            net_ret = gross_ret - 2 * TXN_COST
            trades.append({
                'ticker': ticker,
                'entry_date': entry_date,
                'exit_date': exit_date,
                'entry_price': entry_price,
                'exit_price': exit_price,
                'net_return': net_ret,
                'gross_return': gross_ret,
                'exit_reason': reason
            })
    return trades

In [None]:
# ---- STEP 6: Run backtest ----
all_trades = []
for t in tqdm(tickers, desc="Processing tickers"):
    data = download_monthly_data(t)
    if data is None:
        continue
    trades = backtest_ticker(data, t)
    all_trades.extend(trades)

results = pd.DataFrame(all_trades)
print(f"\nTotal trades: {len(results)}")


In [None]:
# ---- STEP 7: Summary statistics ----
if not results.empty:
    winners = results[results['net_return'] > 0]
    losers = results[results['net_return'] <= 0]
    win_rate = len(winners) / len(results)
    avg_win = winners['net_return'].mean() if not winners.empty else 0
    avg_loss = losers['net_return'].mean() if not losers.empty else 0
    profit_factor = winners['net_return'].sum() / abs(losers['net_return'].sum()) if not losers.empty else np.nan
    cagr_approx = (1 + results['net_return']).prod() ** (1/((datetime.date.today().year - 2010) or 1)) - 1

    print("\n===== Strategy Summary =====")
    print(f"Total Trades: {len(results)}")
    print(f"Win Rate: {win_rate*100:.2f}%")
    print(f"Avg Winner: {avg_win*100:.2f}%")
    print(f"Avg Loser: {avg_loss*100:.2f}%")
    print(f"Profit Factor: {profit_factor:.2f}")
    print(f"Approx CAGR: {cagr_approx*100:.2f}%")
    print("=============================")

    # ---- Equity curve (assuming serial trades) ----
    results['equity'] = (1 + results['net_return']).cumprod()
    plt.figure(figsize=(8,4))
    plt.plot(results['equity'], label="Equity Curve")
    plt.title("Simulated Cumulative Return (sequential trades)")
    plt.xlabel("Trade #")
    plt.ylabel("Equity (x initial capital)")
    plt.legend()
    plt.grid(True)
    plt.show()

    # Save results
    results.to_csv("nse_monthly_breakout_trades.csv", index=False)
    print("\nDetailed trade list saved as 'nse_monthly_breakout_trades.csv'.")
else:
    print("No trades found – try relaxing filters or checking tickers.")