## Imports

In [215]:
import pandas as pd
import yfinance as yf
import numpy as np
import time
import plotly.graph_objects as go
import plotly.io as pio
from plotly.subplots import make_subplots
from datetime import datetime, timedelta
import math
import seaborn as sns
import itertools
import warnings

warnings.simplefilter(action='ignore', category=FutureWarning)

## Functions

In [102]:
def download_data(symbols, period, interval, batch_size=100, delay=0.1):
    """
    This function is used to download data for general purposes in a specified format.

    Parameter:
        symbols (list): List of symbols to download data for.
        period (str): Period to download data for. Number + timeframe(d for day, h for hour, m for minute etc.).
        interval (str): Candlesticks timeframe. Same format as period.
        batch_size (int): Size of the batch. Not to overload scanner.
        delay (int): Seconds to wait after each batch. Not to overload scanner.

    Returns:
        dict: Dictionary in format {symbol: symbol_dataframe}.
        Dataframe contains columns: Datetime, Symbol, Open, High, Low, Close, Volume.
    """
    data = {}

    for i in range(0, len(symbols), batch_size):
        batch = symbols[i:i + batch_size]
        print(f"Processing batch {i // batch_size + 1} of {(len(symbols) - 1) // batch_size + 1}")

        # try:
        df = yf.download(batch, period=period, interval=interval, group_by="ticker", progress=False, threads=True, ignore_tz=True, auto_adjust=False)

        if df.empty:
            continue

        df = pd.concat([df.reset_index()], axis=1)
        df = df.reset_index(drop=True)

        # Rename columns if multi-ticker
        if isinstance(df.columns, pd.MultiIndex):
            df.columns = [
                f"{symbol}_{field}" if field else "Datetime"
                for symbol, field in df.columns
            ]
        else:
            df.columns = [
                f"{batch[0]}_{col}" if col != "Datetime" else "Datetime"
                for col in df.columns
            ]

        # Separate data per ticker
        for symbol in batch:
            selected_columns = [col for col in df.columns if col.startswith(f"{symbol}_")]
            if not selected_columns:
                continue
            symbol_df = df[["Datetime"] + selected_columns].copy()
            symbol_df.columns = [col.split("_", 1)[1] if "_" in col else col for col in symbol_df.columns]
            symbol_df["Symbol"] = symbol
            data[symbol] = symbol_df

        # except Exception as e:
        #     print(f"Error in batch {i // batch_size + 1}: {e}")
        #     continue

        time.sleep(delay)

    return data

In [5]:
def apply_to_dict(stock_dict, function, **kwargs):
    new_stock_dict = {}
    for sym, df in stock_dict.items():
        df = df.copy()
        df = function(df, **kwargs)
        new_stock_dict[sym] = df
    return new_stock_dict

In [4]:
def calculate_ema(df, days, price_col="Close", ema_col=None):
    """
    Calculate Exponential Moving Average (EMA) for a given price column.
    
    Parameters:
        df (pd.DataFrame): Input dataframe with price data.
        days (int): The number of days (period) for EMA.
        price_col (str): The column on which EMA is calculated (default "Close").
        ema_col (str): Optional column name for storing EMA.
    
    Returns:
        pd.DataFrame: Dataframe with a new column containing EMA values.
    """
    if ema_col is None:
        ema_col = f"EMA_{days}"
    
    df[ema_col] = df[price_col].ewm(span=days, adjust=False).mean()
    return df

In [169]:
def add_atr(df, window):
    df = df.copy()
    df["H-L"] = df["High"] - df["Low"]
    df["H-PC"] = abs(df["High"] - df["Close"].shift(1))
    df["L-PC"] = abs(df["Low"] - df["Close"].shift(1))
    df["TR"] = df[["H-L", "H-PC", "L-PC"]].max(axis=1)
    df["ATR_14"] = df["TR"].rolling(window=window).mean()

    return df.drop(columns=["H-L", "H-PC", "L-PC", "TR"])

In [11]:
def add_indicators(df, ema_periods=[8, 20, 34, 50, 200], atr_window=14):
    df = df.copy()

    # EMAs
    for period in ema_periods:
        df[f"EMA_{period}"] = df["Close"].ewm(span=period, adjust=False).mean()
    
    # ATR
    df = add_atr(df, atr_window)

    return df

In [78]:
def generate_trades(df):
    trades = []
    
    for i in range(1, len(df) - 2):  # leave space for entry day
        prev = df.iloc[i - 1]
        curr = df.iloc[i]
        nxt = df.iloc[i + 1]

        # Detect bullish crossover (8 crosses above 20)
        if prev["EMA_8"] < prev["EMA_20"] and curr["EMA_8"] > curr["EMA_20"]:
            entry_day = nxt  # entry is the next candle
            crossover_day = curr

            # Entry only if new high is made
            if entry_day["High"] > crossover_day["High"]:
                entry_price = crossover_day["High"]
                stop_loss = prev["Low"]   # <-- use the day before crossover
                risk = entry_price - stop_loss
                pt1 = entry_price + risk
                pt2 = entry_price + 2 * risk

                # Trade outcome flags
                pt1_hit, pt2_hit, stop_loss_hit = False, False, False
                exit_date, exit_price = None, None

                # Track forward until exit
                for j in range(i + 1, len(df)):
                    day = df.iloc[j]

                    # Check PT1
                    if not pt1_hit and day["High"] >= pt1:
                        pt1_hit = True

                    # Check PT2 (exit immediately if hit)
                    if day["High"] >= pt2:
                        pt2_hit = True
                        exit_date, exit_price = day["Datetime"], pt2
                        break

                    # Check Stop Loss
                    if day["Low"] <= stop_loss:
                        stop_loss_hit = True
                        exit_date, exit_price = day["Datetime"], stop_loss
                        break

                    # Check EMA cross back (exit at close)
                    if day["EMA_8"] < day["EMA_20"]:
                        exit_date, exit_price = day["Datetime"], day["Close"]
                        break

                # Volume metrics
                crossover_vol = crossover_day["Volume"]
                candle_color = "Bullish" if crossover_day["Close"] > crossover_day["Open"] else "Bearish"

                trades.append({
                    "Symbol": df['Symbol'].values[0],
                    "Entry Date": entry_day["Datetime"],
                    "Entry Price": entry_price,
                    "Stop Loss": stop_loss,
                    "PT1": pt1,
                    "PT2": pt2,
                    "PT1_Hit": pt1_hit,
                    "PT2_Hit": pt2_hit,
                    "Stop_Loss_Hit": stop_loss_hit,
                    "Exit Date": exit_date,
                    "Exit Price": exit_price,
                    "EMA_34": entry_day["EMA_34"],
                    "EMA_50": entry_day["EMA_50"],
                    "EMA_200": entry_day["EMA_200"],
                    "ATR_14": entry_day["ATR_14"],
                    "Crossover_Volume": crossover_vol,
                    "Crossover_Color": candle_color
                })
    return pd.DataFrame(trades)

In [148]:
def analyse_entries(
    df,
    sl_method="crossover_low",   # prev_low, crossover_low, atr
    sl_offset=0.0,          # absolute offset or ATR multiplier
):
    trades = []

    for i in range(1, len(df) - 2):  # leave space for entry
        prev = df.iloc[i - 1]
        curr = df.iloc[i]
        nxt = df.iloc[i + 1]
        entry_day = df.iloc[i + 2]   # entry = day after crossover confirmation

        # --- Entry condition: EMA8 crosses above EMA20 ---
        if prev["EMA_8"] < prev["EMA_20"] and curr["EMA_8"] > curr["EMA_20"]:
            crossover_day = curr

            # --- Entry price logic ---
            # Take the HIGH of the crossover candle OR the OPEN of the next candle (whichever is higher)
            entry_price = max(nxt["High"], entry_day['Open'])

            # --- Stop Loss logic ---
            if sl_method == "prev_low":
                stop_loss = prev["Low"] - sl_offset
            elif sl_method == "crossover_low":
                stop_loss = crossover_day["Low"] - sl_offset
            elif sl_method == "atr":
                stop_loss = entry_price - sl_offset * entry_day["ATR_14"]
            else:
                raise ValueError("Invalid sl_method")

            risk = entry_price - stop_loss
            if risk <= 0:
                continue  # skip invalid setups

            # --- Track until EMA cross back ---
            max_price = entry_price
            max_price_date = entry_day["Datetime"]
            stop_loss_date = None
            ema_cross_back_date = None
            exit_date, exit_price = None, None

            for j in range(i + 1, len(df)):
                day = df.iloc[j]

                # Track max price
                if day["High"] > max_price:
                    max_price = day["High"]
                    max_price_date = day["Datetime"]

                # Check stop loss
                if stop_loss_date is None and day["Low"] <= stop_loss:
                    stop_loss_date = day["Datetime"]

                # Exit when EMAs cross back
                if day["EMA_8"] < day["EMA_20"]:
                    ema_cross_back_date = day["Datetime"]
                    exit_date, crossover_price = day["Datetime"], day["Close"]
                    break

            if exit_date is None:
                continue  # no valid exit

            # --- Metrics ---
            max_gain_pct = (max_price / entry_price - 1) * 100
            atr_pct = df.loc[i, "ATR_14"] / entry_price * 100
            risk_pct = (1 - stop_loss / entry_price) * 100

            crossover_vol = crossover_day["Volume"]
            crossover_color = "Bullish" if crossover_day["Close"] > crossover_day["Open"] else "Bearish"

            crossover_price_change = crossover_price / entry_price - 1

            trades.append({
                "Symbol": entry_day["Symbol"],
                "Entry Date": entry_day["Datetime"],
                "Max Price Date": max_price_date,
                "Stop Loss Date": stop_loss_date,
                "EMA Cross Back Date": ema_cross_back_date,
                "Entry Price": entry_price,
                "Max Price": max_price,
                "Stop Loss": stop_loss,
                "Crossover Price": crossover_price,
                "Max%Gain": max_gain_pct,
                "Risk%": risk_pct,
                "Crossover Change%": crossover_price_change,
                "EMA_8": entry_day["EMA_8"],
                "EMA_20": entry_day["EMA_20"],
                "EMA_34": entry_day["EMA_34"],
                "EMA_50": entry_day["EMA_50"],
                "EMA_200": entry_day["EMA_200"],
                "ATR_14": entry_day["ATR_14"],
                "ATR%": atr_pct,
                "Above_EMA_34": entry_price > entry_day["EMA_34"],
                "Above_EMA_50": entry_price > entry_day["EMA_50"],
                "Above_EMA_200": entry_price > entry_day["EMA_200"],
                "Crossover_Volume": crossover_vol,
                "Crossover_Color": crossover_color,
            })

    return pd.DataFrame(trades)

## Explore Trades

In [13]:
symbols_data = pd.read_csv('data/all_symbols_results.csv', index_col=0).dropna().reset_index(drop=True)
symbols_list = list(symbols_data['Symbol'])

In [None]:
stock_data = download_data(symbols=symbols_list, period='500d', interval='1d')

In [170]:
stock_data = apply_to_dict(stock_data, add_indicators)

In [105]:
all_trades = {}
for sym, df in stock_data.items():
    trades_df = generate_trades(df)
    all_trades[sym] = trades_df

In [106]:
concat_entries = pd.concat([df for df in all_trades.values()])

concat_entries["Above_EMA_34"] = concat_entries["Entry Price"] > concat_entries["EMA_34"]
concat_entries["Above_EMA_50"] = concat_entries["Entry Price"] > concat_entries["EMA_50"]
concat_entries["Above_EMA_200"] = concat_entries["Entry Price"] > concat_entries["EMA_200"]

concat_entries["ATR%"] = concat_entries["ATR_14"] / concat_entries["Entry Price"]

  concat_entries = pd.concat([df for df in all_trades.values()])


In [107]:
# Additional thresholds 
min_crossover_vol = 2_000_000 # minimum volume at crossover
min_price = 50 # minimum entry price 

# Filter trades combining trend, volatility, price, volume, and candle color 
filtered_combined = concat_entries[ 
    (concat_entries["Above_EMA_34"]) & # trend filter 
    (concat_entries["ATR%"] < concat_entries["ATR%"].median()) & # low volatility 
    (concat_entries["Entry Price"] >= min_price) & # price filter 
    (concat_entries["Crossover_Volume"] >= min_crossover_vol) 
] 

print("Filtered trades:", len(filtered_combined))
print(filtered_combined[["PT1_Hit", "PT2_Hit", "Stop_Loss_Hit"]].mean())

Filtered trades: 1459
PT1_Hit          0.544894
PT2_Hit          0.356408
Stop_Loss_Hit    0.444825
dtype: float64


#### Best filters are:
1. Above 34 EMA
2. Lower volatility: ATR less than 0.035494516184041786
3. Entry Price higher than 50
4. Volume on crossover candle 50

In [255]:
selected_stocks = filtered_combined['Symbol'].unique()[:50]
selected_stock_data = {key: value for key, value in stock_data.items() if key in selected_stocks}

In [256]:
trades_analysis = {}
for sym, df in selected_stock_data.items():
    trades_df = analyse_entries(df)
    trades_analysis[sym] = trades_df

In [257]:
trades_analysis_concat = pd.concat([df for df in trades_analysis.values()])
trades_analysis_concat.head()

Unnamed: 0,Symbol,Entry Date,Max Price Date,Stop Loss Date,EMA Cross Back Date,Entry Price,Max Price,Stop Loss,Crossover Price,Max%Gain,...,EMA_34,EMA_50,EMA_200,ATR_14,ATR%,Above_EMA_34,Above_EMA_50,Above_EMA_200,Crossover_Volume,Crossover_Color
0,TSM,2023-09-14,2023-09-14,2023-09-15,2023-09-19,92.779999,92.93,90.419998,88.169998,0.161674,...,89.870453,89.800312,89.68158,,,True,True,True,8242600,Bullish
1,TSM,2023-10-12,2023-10-19,2023-10-17,2023-10-27,92.510002,94.760002,89.139999,85.989998,2.432169,...,88.668191,88.796211,89.332914,1.752143,1.921028,True,True,True,7182900,Bullish
2,TSM,2023-11-07,2024-03-08,NaT,2024-04-18,92.610001,158.399994,90.839996,132.270004,71.039837,...,89.443098,89.336216,89.420481,2.447858,2.873032,True,True,True,9226700,Bullish
3,TSM,2024-05-08,2024-07-11,NaT,2024-07-19,142.289993,193.470001,139.809998,165.770004,35.968803,...,137.733685,135.644524,115.632887,4.327858,3.451202,True,True,True,8601600,Bullish
4,TSM,2024-08-16,2024-08-19,2024-08-22,2024-09-03,175.440002,175.449997,167.190002,160.490005,0.005697,...,166.679978,165.379442,141.057911,8.234998,4.766382,True,True,True,11510300,Bearish


## Grid Search of Trading Parameters

In [258]:
def simulate_trades(df, stop_rule, offset, tp_coeffs, be_level):
    trades = []

    # Precompute volume quartile once
    vol_quartile_series = pd.qcut(df["Volume"], 4, labels=[1, 2, 3, 4])

    for i in range(1, len(df) - 2):
        # EMA crossover long signal
        if df.loc[i, "EMA_8"] > df.loc[i, "EMA_20"] and df.loc[i-1, "EMA_8"] <= df.loc[i-1, "EMA_20"]:
            crossover_idx = i
            next_idx = i + 1

            # Only enter if next candle makes new high
            if df.loc[next_idx, "High"] <= df.loc[crossover_idx, "High"]:
                continue

            entry_idx = next_idx
            entry_price = df.loc[entry_idx, "Open"]
            entry_close = df.loc[entry_idx, "Close"]
            atr = df.loc[crossover_idx, "ATR_14"]
            atr_pct = atr / entry_close * 100

            # Stop loss
            if stop_rule == "crossover":
                stop_loss = df.loc[crossover_idx, "Low"] - offset * atr
            else:
                stop_loss = df.loc[crossover_idx - 1, "Low"] - offset * atr

            risk = entry_price - stop_loss
            if risk <= 0:
                continue

            # EMA positions
            above_ema_34 = entry_close > df.loc[entry_idx, "EMA_34"]
            above_ema_50 = entry_close > df.loc[entry_idx, "EMA_50"]
            above_ema_200 = entry_close > df.loc[entry_idx, "EMA_200"]

            # Crossover color
            crossover_color = "Green" if entry_close > df.loc[crossover_idx, "Open"] else "Red"

            # Prepare TP prices and percentages
            tps = [(entry_price + coeff * risk, pct) for coeff, pct in tp_coeffs]
            remaining_position = 1.0
            partial_exit_done = False

            for j in range(entry_idx, len(df)):
                low, high, close = df.loc[j, "Low"], df.loc[j, "High"], df.loc[j, "Close"]

                # Move stop to breakeven after first partial exit
                if partial_exit_done and be_level is not None:
                    stop_loss = entry_price

                # Check TPs
                for idx, (tp_price, tp_pct) in enumerate(tps):
                    if remaining_position > 0 and high >= tp_price:
                        # Partial exit
                        position_pct = min(tp_pct, remaining_position)
                        remaining_position -= position_pct
                        partial_exit_done = True

                        trades.append({
                            'Datetime': df.loc[entry_idx, "Datetime"],
                            'Open': df.loc[entry_idx, "Open"],
                            'High': df.loc[entry_idx, "High"],
                            'Low': df.loc[entry_idx, "Low"],
                            'Close': entry_close,
                            'Adj Close': df.loc[entry_idx, "Adj Close"] if "Adj Close" in df.columns else entry_close,
                            'Volume': df.loc[entry_idx, "Volume"],
                            'Symbol': df.loc[entry_idx, "Symbol"] if "Symbol" in df.columns else None,
                            'EMA_8': df.loc[entry_idx, "EMA_8"],
                            'EMA_20': df.loc[entry_idx, "EMA_20"],
                            'EMA_34': df.loc[entry_idx, "EMA_34"],
                            'EMA_50': df.loc[entry_idx, "EMA_50"],
                            'EMA_200': df.loc[entry_idx, "EMA_200"],
                            'ATR_14': atr,
                            'ATR%': atr_pct,
                            'Above_EMA_34': above_ema_34,
                            'Above_EMA_50': above_ema_50,
                            'Above_EMA_200': above_ema_200,
                            'Volume_Quartile': int(vol_quartile_series.iloc[entry_idx]),
                            'Crossover_Color': crossover_color,
                            'EntryPrice': entry_price,
                            'StopLoss': stop_loss,
                            'Risk': risk,
                            'ExitDate': df.loc[j, "Datetime"],
                            'ExitPrice': tp_price,
                            'ExitReason': f'TP_{idx+1}',
                            'Return%': (tp_price - entry_price) / entry_price * 100,
                            'R_Multiple': (tp_price - entry_price) / risk,
                            'stop_rule': stop_rule,
                            'offset': offset,
                            'tp_set': tuple([c for c, _ in tp_coeffs]),
                            'breakeven': be_level
                        })

                # Check stop
                if remaining_position > 0 and low <= stop_loss:
                    trades.append({
                        'Datetime': df.loc[entry_idx, "Datetime"],
                        'Open': df.loc[entry_idx, "Open"],
                        'High': df.loc[entry_idx, "High"],
                        'Low': df.loc[entry_idx, "Low"],
                        'Close': entry_close,
                        'Adj Close': df.loc[entry_idx, "Adj Close"] if "Adj Close" in df.columns else entry_close,
                        'Volume': df.loc[entry_idx, "Volume"],
                        'Symbol': df.loc[entry_idx, "Symbol"] if "Symbol" in df.columns else None,
                        'EMA_8': df.loc[entry_idx, "EMA_8"],
                        'EMA_20': df.loc[entry_idx, "EMA_20"],
                        'EMA_34': df.loc[entry_idx, "EMA_34"],
                        'EMA_50': df.loc[entry_idx, "EMA_50"],
                        'EMA_200': df.loc[entry_idx, "EMA_200"],
                        'ATR_14': atr,
                        'ATR%': atr_pct,
                        'Above_EMA_34': above_ema_34,
                        'Above_EMA_50': above_ema_50,
                        'Above_EMA_200': above_ema_200,
                        'Volume_Quartile': int(vol_quartile_series.iloc[entry_idx]),
                        'Crossover_Color': crossover_color,
                        'EntryPrice': entry_price,
                        'StopLoss': stop_loss,
                        'Risk': risk,
                        'ExitDate': df.loc[j, "Datetime"],
                        'ExitPrice': stop_loss,
                        'ExitReason': 'Stop',
                        'Return%': (stop_loss - entry_price) / entry_price * 100,
                        'R_Multiple': (stop_loss - entry_price) / risk,
                        'stop_rule': stop_rule,
                        'offset': offset,
                        'tp_set': tuple([c for c, _ in tp_coeffs]),
                        'breakeven': be_level
                    })
                    remaining_position = 0
                    break

                # EMA cross back exit
                if remaining_position > 0 and df.loc[j, "EMA_8"] < df.loc[j, "EMA_20"]:
                    trades.append({
                        'Datetime': df.loc[entry_idx, "Datetime"],
                        'Open': df.loc[entry_idx, "Open"],
                        'High': df.loc[entry_idx, "High"],
                        'Low': df.loc[entry_idx, "Low"],
                        'Close': entry_close,
                        'Adj Close': df.loc[entry_idx, "Adj Close"] if "Adj Close" in df.columns else entry_close,
                        'Volume': df.loc[entry_idx, "Volume"],
                        'Symbol': df.loc[entry_idx, "Symbol"] if "Symbol" in df.columns else None,
                        'EMA_8': df.loc[entry_idx, "EMA_8"],
                        'EMA_20': df.loc[entry_idx, "EMA_20"],
                        'EMA_34': df.loc[entry_idx, "EMA_34"],
                        'EMA_50': df.loc[entry_idx, "EMA_50"],
                        'EMA_200': df.loc[entry_idx, "EMA_200"],
                        'ATR_14': atr,
                        'ATR%': atr_pct,
                        'Above_EMA_34': above_ema_34,
                        'Above_EMA_50': above_ema_50,
                        'Above_EMA_200': above_ema_200,
                        'Volume_Quartile': int(vol_quartile_series.iloc[entry_idx]),
                        'Crossover_Color': crossover_color,
                        'EntryPrice': entry_price,
                        'StopLoss': stop_loss,
                        'Risk': risk,
                        'ExitDate': df.loc[j, "Datetime"],
                        'ExitPrice': close,
                        'ExitReason': 'EMA Cross Back',
                        'Return%': (close - entry_price) / entry_price * 100,
                        'R_Multiple': (close - entry_price) / risk,
                        'stop_rule': stop_rule,
                        'offset': offset,
                        'tp_set': tuple([c for c, _ in tp_coeffs]),
                        'breakeven': be_level
                    })
                    remaining_position = 0
                    break

            # Final exit if still open
            if remaining_position > 0:
                trades.append({
                    'Datetime': df.loc[entry_idx, "Datetime"],
                    'Open': df.loc[entry_idx, "Open"],
                    'High': df.loc[entry_idx, "High"],
                    'Low': df.loc[entry_idx, "Low"],
                    'Close': entry_close,
                    'Adj Close': df.loc[entry_idx, "Adj Close"] if "Adj Close" in df.columns else entry_close,
                    'Volume': df.loc[entry_idx, "Volume"],
                    'Symbol': df.loc[entry_idx, "Symbol"] if "Symbol" in df.columns else None,
                    'EMA_8': df.loc[entry_idx, "EMA_8"],
                    'EMA_20': df.loc[entry_idx, "EMA_20"],
                    'EMA_34': df.loc[entry_idx, "EMA_34"],
                    'EMA_50': df.loc[entry_idx, "EMA_50"],
                    'EMA_200': df.loc[entry_idx, "EMA_200"],
                    'ATR_14': atr,
                    'ATR%': atr_pct,
                    'Above_EMA_34': above_ema_34,
                    'Above_EMA_50': above_ema_50,
                    'Above_EMA_200': above_ema_200,
                    'Volume_Quartile': int(vol_quartile_series.iloc[entry_idx]),
                    'Crossover_Color': crossover_color,
                    'EntryPrice': entry_price,
                    'StopLoss': stop_loss,
                    'Risk': risk,
                    'ExitDate': df.loc[len(df)-1, "Datetime"],
                    'ExitPrice': close,
                    'ExitReason': 'Open',
                    'Return%': (close - entry_price) / entry_price * 100,
                    'R_Multiple': (close - entry_price) / risk,
                    'stop_rule': stop_rule,
                    'offset': offset,
                    'tp_set': tuple([c for c, _ in tp_coeffs]),
                    'breakeven': be_level
                })

    return pd.DataFrame(trades)

In [267]:
# def grid_search(df, stop_rules, offsets, tp_sets, be_levels):
#     all_trades = []
#     for stop_rule, offset, tp_set, be in itertools.product(stop_rules, offsets, tp_sets, be_levels):
#         trades = simulate_trades(df, stop_rule, offset, tp_set, be)
#         if not trades.empty and not trades.dropna(axis=1, how="all").empty:
#             all_trades.append(trades)

#     if all_trades:
#         return pd.concat(all_trades, ignore_index=True)
#     else:
#         return pd.DataFrame()
    
def grid_search(df, stop_rules, offsets, tp_sets, be_levels):
    all_trades = []

    # Iterate through all parameter combinations
    for stop_rule, offset, tp_set, be_level in itertools.product(stop_rules, offsets, tp_sets, be_levels):
        # Convert TP set to include position percentages if length > 1
        if len(tp_set) == 1:
            tp_coeffs = [(tp_set[0], 1.0)]  # full position
        else:
            pct = 1.0 / len(tp_set)
            tp_coeffs = [(tp, pct) for tp in tp_set]

        # Run simulation
        trades_df = simulate_trades(df, stop_rule, offset, tp_coeffs, be_level)

        all_trades.append(trades_df)

    return pd.concat(all_trades, ignore_index=True)


In [283]:
# Define parameter grid
stop_rules = ["crossover", "previous"]
offsets = [0.0, 0.5, 1.0]  # ATR multipliers
tp_sets = [[1.0, 2.0], [1.0, 2.0, 3.0], [1.0, 2.0, 3.0, 4.0], [1.0, 2.0, 3.0, 4.0, 5.0]]
be_levels = [None, 1.0]  # breakeven at 1R or disabled

# Run grid search across all selected stocks
all_results = []

for symbol, df in selected_stock_data.items():
    print(f"Running grid search for {symbol}...")
    trades = grid_search(df, stop_rules, offsets, tp_sets, be_levels)
    all_results.append(trades)


Running grid search for TSM...
Running grid search for WMT...
Running grid search for JPM...
Running grid search for LLY...
Running grid search for V...
Running grid search for ORCL...
Running grid search for MA...
Running grid search for XOM...
Running grid search for PG...
Running grid search for JNJ...
Running grid search for HD...
Running grid search for NVO...
Running grid search for ABBV...
Running grid search for KO...
Running grid search for UNH...
Running grid search for PM...
Running grid search for BABA...
Running grid search for GE...
Running grid search for IBM...
Running grid search for CRM...
Running grid search for CVX...
Running grid search for WFC...
Running grid search for ABT...
Running grid search for NVS...
Running grid search for MCD...
Running grid search for DIS...
Running grid search for MS...
Running grid search for AXP...
Running grid search for SHEL...
Running grid search for MRK...
Running grid search for ACN...
Running grid search for RTX...
Running grid 

In [284]:
trade_results = pd.concat(all_results, ignore_index=True)

In [285]:
summary = trade_results.groupby(
    ["stop_rule", "offset", "tp_set", "breakeven"]
).agg(EV=("R_Multiple", "mean"),
       Winrate=("Return%", lambda x: (x > 0).mean()),
       Trades=("Return%", "count")).reset_index()

In [286]:
summary.sort_values('EV', ascending=False)

Unnamed: 0,stop_rule,offset,tp_set,breakeven,EV,Winrate,Trades
3,crossover,0.0,"(1.0, 2.0, 3.0, 4.0, 5.0)",1.0,1.055131,0.793328,1229
15,previous,0.0,"(1.0, 2.0, 3.0, 4.0, 5.0)",1.0,0.976121,0.813135,1279
7,crossover,0.5,"(1.0, 2.0, 3.0, 4.0, 5.0)",1.0,0.916937,0.82448,1299
2,crossover,0.0,"(1.0, 2.0, 3.0, 4.0)",1.0,0.891039,0.758368,956
19,previous,0.5,"(1.0, 2.0, 3.0, 4.0, 5.0)",1.0,0.863032,0.820919,1262
14,previous,0.0,"(1.0, 2.0, 3.0, 4.0)",1.0,0.846176,0.775051,978
11,crossover,1.0,"(1.0, 2.0, 3.0, 4.0, 5.0)",1.0,0.846109,0.812398,1226
1,crossover,0.0,"(1.0, 2.0, 3.0)",1.0,0.829484,0.748954,956
13,previous,0.0,"(1.0, 2.0, 3.0)",1.0,0.805037,0.771984,978
6,crossover,0.5,"(1.0, 2.0, 3.0, 4.0)",1.0,0.794525,0.782875,981


In [279]:
min_crossover_vol = 2_000_000 # minimum volume at crossover
min_price = 50 # minimum entry price 

# Filter trades combining trend, volatility, price, volume, and candle color 
trade_results_filtered = trade_results[ 
    (trade_results["Above_EMA_34"]) & # trend filter 
    (trade_results["ATR%"] < trade_results["ATR%"].median()) & # low volatility 
    (trade_results["EntryPrice"] >= min_price) & # price filter 
    (trade_results["Volume"] >= min_crossover_vol) 
]

In [280]:
summary_filtered = trade_results_filtered.groupby(
    ["stop_rule", "offset", "tp_set", "breakeven"]
).agg(EV=("R_Multiple", "mean"),
       Winrate=("Return%", lambda x: (x >= 0).mean()),
       Trades=("Return%", "count")).reset_index()

In [281]:
summary_filtered.sort_values('EV', ascending=False)

Unnamed: 0,stop_rule,offset,tp_set,breakeven,EV,Winrate,Trades
2,crossover,0.0,"(1.0, 2.0, 3.0)",1.0,0.854664,0.84456,386
14,previous,0.0,"(1.0, 2.0, 3.0)",1.0,0.826421,0.863636,396
6,crossover,0.5,"(1.0, 2.0, 3.0)",1.0,0.796549,0.86445,391
18,previous,0.5,"(1.0, 2.0, 3.0)",1.0,0.793336,0.851562,384
10,crossover,1.0,"(1.0, 2.0, 3.0)",1.0,0.768657,0.854167,384
22,previous,1.0,"(1.0, 2.0, 3.0)",1.0,0.687765,0.805085,354
1,crossover,0.0,"(1.0, 2.0)",1.0,0.645149,0.78481,237
13,previous,0.0,"(1.0, 2.0)",1.0,0.628842,0.790984,244
9,crossover,1.0,"(1.0, 2.0)",1.0,0.574533,0.763713,237
17,previous,0.5,"(1.0, 2.0)",1.0,0.570513,0.758475,236


### All Trades Metrics

In [162]:
def analyse_strategy(trades_df: pd.DataFrame) -> dict:
    """
    Analyse performance of crossover strategy from trades dataframe.

    Parameters
    ----------
    trades_df : pd.DataFrame
        Must contain columns:
        ['Symbol', 'Entry Date', 'Max Price Date', 'Stop Loss Date',
         'EMA Cross Back Date', 'Entry Price', 'Max Price', 'Stop Loss',
         'Crossover Price', 'Max%Gain', 'Risk%', 'Crossover Change%', 'EMA_8',
         'EMA_20', 'EMA_34', 'EMA_50', 'EMA_200', 'ATR_14', 'ATR%',
         'Above_EMA_34', 'Above_EMA_50', 'Above_EMA_200', 'Crossover_Volume',
         'Crossover_Color', 'ATR_Bin', 'Volume_Quartile']

    Returns
    -------
    results : dict
        Dictionary containing descriptive stats, grouped performance, and 
        filters for stock selection.
    """
    median_atr = trades_df["ATR%"].median()
    trades_df["ATR_Bin"] = np.where(trades_df["ATR%"] <= median_atr, "Low ATR", "High ATR")

    trades_df["Volume_Quartile"] = pd.qcut(trades_df["Crossover_Volume"], 4, labels=["Q1-Low", "Q2", "Q3", "Q4-High"])
    trades_df.groupby("Volume_Quartile")["Max%Gain"].agg(["mean", "median", "count"])

    results = {}

    # --- General descriptive stats ---
    desc_cols = ["Max%Gain", "Risk%", "Crossover Change%", "ATR%", "Crossover_Volume"]
    results["descriptive"] = trades_df[desc_cols].describe().T

    # --- EMA filters ---
    results["ema_filters"] = {
        "Above_EMA_34": trades_df.groupby("Above_EMA_34")["Max%Gain"].agg(["mean", "median", "count"]),
        "Above_EMA_50": trades_df.groupby("Above_EMA_50")["Max%Gain"].agg(["mean", "median", "count"]),
        "Above_EMA_200": trades_df.groupby("Above_EMA_200")["Max%Gain"].agg(["mean", "median", "count"]),
    }

    # --- ATR bins ---
    results["atr_bins"] = trades_df.groupby("ATR_Bin")["Max%Gain"].agg(["mean", "median", "count"])

    # --- Volume quartiles ---
    results["volume_quartiles"] = trades_df.groupby("Volume_Quartile")["Max%Gain"].agg(["mean", "median", "count"])

    # --- Symbol-level summary (for stock selection) ---
    symbol_perf = trades_df.groupby("Symbol")["Max%Gain"].agg(
        mean="mean", median="median", count="count"
    ).sort_values(by="mean", ascending=False)
    results["symbols"] = symbol_perf

    # --- Crossover color effect ---
    results["crossover_color"] = trades_df.groupby("Crossover_Color")["Max%Gain"].agg(["mean", "median", "count"])

    return results


In [164]:
analyse_strategy(trades_analysis_concat)

  trades_df.groupby("Volume_Quartile")["Max%Gain"].agg(["mean", "median", "count"])
  results["volume_quartiles"] = trades_df.groupby("Volume_Quartile")["Max%Gain"].agg(["mean", "median", "count"])


{'descriptive':                     count          mean           std         min  \
 Max%Gain           4785.0  7.939652e+00  1.365139e+01    0.000000   
 Risk%              4785.0  3.286689e+00  2.319626e+00    0.015205   
 Crossover Change%  4785.0 -2.286507e-03  1.081528e-01   -0.340917   
 ATR%               4674.0  2.521519e+00  9.292390e-01    0.327271   
 Crossover_Volume   4785.0  4.505919e+06  8.377225e+06  400.000000   
 
                             25%           50%           75%           max  
 Max%Gain           0.000000e+00  2.942086e+00  1.023357e+01  2.668175e+02  
 Risk%              1.766663e+00  2.766320e+00  4.210398e+00  3.044335e+01  
 Crossover Change% -5.365011e-02 -2.988986e-02  1.187774e-02  1.267610e+00  
 ATR%               1.862929e+00  2.315447e+00  2.964524e+00  1.000099e+01  
 Crossover_Volume   1.404000e+06  2.423400e+06  4.550200e+06  3.186799e+08  ,
 'ema_filters': {'Above_EMA_34':                   mean    median  count
  Above_EMA_34             

In [165]:
analyse_strategy(trades_analysis_filtered)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  trades_df["ATR_Bin"] = np.where(trades_df["ATR%"] <= median_atr, "Low ATR", "High ATR")
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  trades_df["Volume_Quartile"] = pd.qcut(trades_df["Crossover_Volume"], 4, labels=["Q1-Low", "Q2", "Q3", "Q4-High"])
  trades_df.groupby("Volume_Quartile")["Max%Gain"].agg(["mean", "median", "count"])
  results["volume_quartiles"] = trades_df.groupby("Volume_Quartile")["Max%Gain"].agg(["mean", "median", "count"])


{'descriptive':                     count          mean           std           min  \
 Max%Gain           1196.0  6.016831e+00  8.446975e+00  0.000000e+00   
 Risk%              1196.0  2.873125e+00  2.048991e+00  1.520544e-02   
 Crossover Change%  1196.0 -5.777076e-03  7.665106e-02 -2.245652e-01   
 ATR%               1196.0  1.845242e+00  2.962897e-01  3.272713e-01   
 Crossover_Volume   1196.0  5.862691e+06  6.895892e+06  2.004200e+06   
 
                             25%           50%           75%           max  
 Max%Gain           1.389292e-02  2.653354e+00  8.744887e+00  6.384811e+01  
 Risk%              1.645459e+00  2.454113e+00  3.634923e+00  3.044335e+01  
 Crossover Change% -4.678724e-02 -2.712865e-02  1.100464e-02  5.135851e-01  
 ATR%               1.639105e+00  1.866737e+00  2.081843e+00  2.314494e+00  
 Crossover_Volume   2.664875e+06  3.703550e+06  6.214825e+06  9.421490e+07  ,
 'ema_filters': {'Above_EMA_34':                   mean    median  count
  Above_EMA_34 