<a href="https://colab.research.google.com/github/jhenningsen/Equity_Analysis/blob/main/LangStudio/SMA_Model_Backtest.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install yfinance
!pip install pandas_ta
# !pip install pandas==2.2.2 numpy==1.26.0 --force-reinstall
# After running this cell, please restart the Colab runtime (Runtime -> Restart runtime) to apply the changes.

Collecting pandas==2.2.2
  Downloading pandas-2.2.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (19 kB)
Collecting numpy==1.26.0
  Downloading numpy-1.26.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (58 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m58.5/58.5 kB[0m [31m2.0 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting python-dateutil>=2.8.2 (from pandas==2.2.2)
  Downloading python_dateutil-2.9.0.post0-py2.py3-none-any.whl.metadata (8.4 kB)
Collecting pytz>=2020.1 (from pandas==2.2.2)
  Downloading pytz-2025.2-py2.py3-none-any.whl.metadata (22 kB)
Collecting tzdata>=2022.7 (from pandas==2.2.2)
  Downloading tzdata-2025.3-py2.py3-none-any.whl.metadata (1.4 kB)
Collecting six>=1.5 (from python-dateutil>=2.8.2->pandas==2.2.2)
  Downloading six-1.17.0-py2.py3-none-any.whl.metadata (1.7 kB)
Downloading pandas-2.2.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (12.7 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━

In [10]:
import pandas as pd
import yfinance as yf
import pandas_ta as ta
import numpy as np

def run_backtest(symbol, sma_long=200, sma_short=5):
    try:
        # 1. Download Data
        # We use 'auto_adjust=True' to ensure 'Close' is the 'Adj Close'
        df = yf.download(symbol, period="5y", interval="1d", progress=False, auto_adjust=True)

        # 2. Safety Check: Handle Multi-Index columns or empty DataFrames
        if df.empty or len(df) < sma_long:
            return None

        if isinstance(df.columns, pd.MultiIndex):
            df.columns = df.columns.get_level_values(0)

        # 3. Calculate Indicators
        # We use .copy() to avoid SettingWithCopy warnings in Pandas
        df = df.copy()
        df['SMA_L'] = ta.sma(df['Close'], length=sma_long)
        df['SMA_S'] = ta.sma(df['Close'], length=sma_short)

        # Calculate Bollinger Bands
        bb = ta.bbands(df['Close'], length=20, std=2)

        # 4. Handle NoneType/Calculation Failures
        if bb is None or bb.empty:
            return None

        # Dynamically find BB columns to avoid naming errors
        bbl_col = [c for c in bb.columns if c.startswith('BBL')][0]
        bbm_col = [c for c in bb.columns if c.startswith('BBM')][0]

        df['Low_B'] = bb[bbl_col]
        df['Mid_B'] = bb[bbm_col]
        df['Value_Zone_Limit'] = (df['Mid_B'] + df['Low_B']) / 2

        # 5. Apply Strategy Logic (Vectorized)
        condition_1 = df['Close'] > df['SMA_L'] # Uptrend
        condition_2 = (df['Close'] >= df['Low_B']) & (df['Close'] <= df['Value_Zone_Limit']) # Value Zone
        condition_3 = df['Close'] > df['SMA_S'] # Momentum

        # Create Signal Column (1 for Buy, 0 for Ignore)
        df['Signal'] = np.where(condition_1 & condition_2 & condition_3, 1, 0)

        # 6. Performance Calculation
        # We calculate the return if we held for 5 days after the signal
        df['Next_5D_Return'] = df['Close'].pct_change(5).shift(-5)

        # Filter only rows where a signal was triggered
        trades = df[df['Signal'] == 1].copy()

        if trades.empty:
            return {"symbol": symbol, "trades": 0, "avg_return": 0}

        return {
            "symbol": symbol,
            "trades": len(trades),
            "avg_return": trades['Next_5D_Return'].mean(),
            "win_rate": (trades['Next_5D_Return'] > 0).mean()
        }

    except Exception as e:
        print(f"Error processing {symbol}: {e}")
        return None

# --- Run on your list ---
symbols_to_test = [
    "TSLA", "SPY", "QQQ", "NVDA", "META", "MSTR", "COIN", "GLD", "AMD", "SLV",
    "PLTR", "MSFT", "ORCL", "IWM", "AAPL", "AVGO", "AMZN", "UNH", "NFLX", "MU",
    "GOOGL", "TSM", "LULU", "CRWV", "GOOG", "IBIT", "JPM", "HOOD", "GDX", "ADBE",
    "NOW", "APP", "GS", "WOLF", "BABA", "IREN", "COST", "INTC", "LLY", "CRCL",
    "CVNA", "SNDK", "OKLO", "SMH", "BA", "BMNR", "ASTS", "NBIS", "SOFI", "BE"
]
results = []

for s in symbols_to_test:
    res = run_backtest(s)
    if res:
        results.append(res)

# Convert results list to a DataFrame
performance_df = pd.DataFrame(results)

# View as a Summary Table
print(performance_df)

if not performance_df.empty:
    # 2. Calculate Portfolio-Wide Metrics
    # We weight the average return by the number of trades per ticker
    # for a more accurate 'Expected Value' per trade.
    total_trades = performance_df['trades'].sum()

    if total_trades > 0:
        overall_avg_return = (performance_df['avg_return'] * performance_df['trades']).sum() / total_trades
        overall_win_rate = (performance_df['win_rate'] * performance_df['trades']).sum() / total_trades

        print("\n--- PORTFOLIO AGGREGATE RESULTS ---")
        print(f"Total Symbols Tested: {len(symbols_to_test)}")
        print(f"Symbols with Signals: {len(performance_df)}")
        print(f"Total Trades Executed: {total_trades}")
        print(f"Portfolio Win Rate:    {overall_win_rate:.2%}")
        print(f"Avg Return Per Trade:  {overall_avg_return:.2%}")
        print("------------------------------------")
    else:
        print("No trades were triggered for the given parameters.")

# Display the top 10 best performing tickers
print("\nTop 10 Performers (by Win Rate):")
print(performance_df.sort_values(by='win_rate', ascending=False).head(10))


   symbol  trades  avg_return  win_rate
0    TSLA       8    0.030992  0.500000
1     SPY       7   -0.004492  0.571429
2     QQQ       8    0.008489  0.625000
3    NVDA      10    0.058430  0.900000
4    META       8    0.002968  0.375000
5    MSTR      14    0.014647  0.428571
6    COIN       9    0.013286  0.666667
7     GLD       7   -0.012875  0.285714
8     AMD      12    0.002654  0.666667
9     SLV      10    0.005187  0.600000
10   PLTR      11    0.020021  0.636364
11   MSFT       8   -0.005575  0.375000
12   ORCL      12    0.015178  0.500000
13    IWM       6    0.014699  0.833333
14   AAPL      16   -0.027740  0.062500
15   AVGO      12    0.036824  0.916667
16   AMZN       9   -0.042543  0.222222
17    UNH      10    0.005413  0.500000
18   NFLX       9    0.019781  0.444444
19     MU      11    0.000072  0.454545
20  GOOGL      14   -0.005655  0.428571
21    TSM      14   -0.007622  0.500000
22   LULU       6   -0.015980  0.333333
23   CRWV       0    0.000000       NaN


In [13]:
import pandas as pd
import yfinance as yf
import pandas_ta as ta

def get_detailed_trade_log(symbol, sma_long=200, sma_short=5):
    try:
        df = yf.download(symbol, period="5y", interval="1d", progress=False, auto_adjust=True)
        if df.empty or len(df) < sma_long:
            return pd.DataFrame()

        if isinstance(df.columns, pd.MultiIndex):
            df.columns = df.columns.get_level_values(0)

        df = df.copy()

        # Indicators
        df['SMA_L'] = ta.sma(df['Close'], length=sma_long)
        df['SMA_S'] = ta.sma(df['Close'], length=sma_short)
        bb = ta.bbands(df['Close'], length=20, std=2)
        if bb is None: return pd.DataFrame()

        bbl_col = [c for c in bb.columns if c.startswith('BBL')][0]
        bbm_col = [c for c in bb.columns if c.startswith('BBM')][0]

        df['Low_B'] = bb[bbl_col]
        df['Mid_B'] = bb[bbm_col]
        df['Value_Zone_Limit'] = (df['Mid_B'] + df['Low_B']) / 2

        # Strategy Logic
        cond_uptrend = df['Close'] > df['SMA_L']
        cond_value_zone = (df['Close'] >= df['Low_B']) & (df['Close'] <= df['Value_Zone_Limit'])
        cond_momentum = df['Close'] > df['SMA_S']

        df['Signal'] = (cond_uptrend & cond_value_zone & cond_momentum).astype(int)

        # Calculate 5-day forward return
        df['Next_5D_Return'] = df['Close'].pct_change(5).shift(-5)

        # Filter for Signal Dates Only
        trade_log = df[df['Signal'] == 1][['Close', 'SMA_L', 'Value_Zone_Limit', 'Next_5D_Return']].copy()
        trade_log['Symbol'] = symbol
        trade_log = trade_log.reset_index() # Moves Date from Index to a Column

        return trade_log

    except Exception as e:
        print(f"Error on {symbol}: {e}")
        return pd.DataFrame()

# --- Execution ---
all_trades_list = []

test_symbol = ["NVDA"]

for s in test_symbol:
    # Use the detailed function we just created
    log = get_detailed_trade_log(s)

    # Type Safety Check: Only append if it's a DataFrame and not empty
    if isinstance(log, pd.DataFrame) and not log.empty:
        all_trades_list.append(log)
    else:
        # This helps you see which tickers didn't produce signals
        print(f"No signals or data for {s}")

# Check if we have anything to concatenate
if all_trades_list:
    master_trade_log = pd.concat(all_trades_list, ignore_index=True)

    # Format for readability
    # Note: We check if the column exists before formatting to avoid errors
    if 'Next_5D_Return' in master_trade_log.columns:
        master_trade_log['Next_5D_Return'] = master_trade_log['Next_5D_Return'].apply(
            lambda x: f"{x:.2%}" if pd.notnull(x) and isinstance(x, (int, float)) else "N/A"
        )

    # View the results
    print(f"\nSuccessfully generated log with {len(master_trade_log)} trades.")
    display(master_trade_log.head(20)) # display() looks better in Colab than print()
else:
    print("The trade log is empty. No signals were found across the portfolio.")


Successfully generated log with 10 trades.


Price,Date,Close,SMA_L,Value_Zone_Limit,Next_5D_Return,Symbol
0,2022-01-28,22.795841,22.171628,23.383199,6.48%,NVDA
1,2023-09-25,42.194782,31.603045,42.856069,6.06%,NVDA
2,2023-09-26,41.883984,31.731945,42.513341,3.83%,NVDA
3,2024-04-23,82.380966,57.242867,82.70161,4.83%,NVDA
4,2024-08-08,104.925087,81.702166,104.93103,17.04%,NVDA
5,2024-09-09,106.424454,89.351944,109.673997,9.69%,NVDA
6,2024-09-10,108.053741,89.642654,109.573762,6.94%,NVDA
7,2025-09-09,170.74086,139.477669,171.258526,2.42%,NVDA
8,2025-11-24,182.539841,152.961049,183.651785,-0.60%,NVDA
9,2025-11-26,180.249954,153.419864,181.283025,1.74%,NVDA
