# Test Strategies against S&P 500 Baseline


In [1]:
import sys
sys.executable

'/home/hilton/Coding/Dashboard/backtest-env/bin/python'

# Section 0 - Config and Imports

In [2]:
import numpy as np
import pandas as pd
import datetime
import yfinance as yf
import time

import matplotlib.pyplot as plt
import matplotlib.dates as mdates

from scipy import stats
from typing import Dict, Callable, Optional

import pandas as pd
from pathlib import Path
from datetime import date
import requests


import warnings
warnings.filterwarnings("ignore")


# -------------------------
# Global constants
# -------------------------
TRADING_DAYS = 252
RISK_FREE_RATE = 0.0

PLOT_STYLE = {
    "figure.figsize": (14, 6),
    "axes.grid": True,
    "grid.alpha": 0.3,
    "axes.spines.top": False,
    "axes.spines.right": False
}

plt.rcParams.update(PLOT_STYLE)



In [17]:
def downloadTickerUniverse():    
    BASE_DIR = Path.home() / "data" / "universes"
    BASE_DIR.mkdir(parents=True, exist_ok=True)

    SEC_URL = "https://www.sec.gov/files/company_tickers.json"
    SEC_PATH = BASE_DIR / "company_tickers.json"

    headers = {
        "User-Agent": "researcher reynoldsb200@gmail.com"
    }

    if not SEC_PATH.exists():
        r = requests.get(SEC_URL, headers=headers, timeout=(5, 30))
        r.raise_for_status()
        SEC_PATH.write_bytes(r.content)

    sec_data = pd.read_json(SEC_PATH)

    sec_data = sec_data.T
    sec_data["ticker"] = sec_data["ticker"].str.upper()

    sec_data.to_csv(BASE_DIR / "sec_ticker_cik_raw.csv", index=False)

    universe = (
        sec_data["ticker"]
        .dropna()
        .str.strip()
        .unique()
    )

    universe = pd.DataFrame({"ticker": universe})

    universe_path = BASE_DIR / "us_equity_universe_master.csv"
    universe.to_csv(universe_path, index=False)

    #print(f"Universe size: {len(universe)}")
    #print(universe.head())

    return universe


In [4]:
def extractTicker(tickerNames):
    errors = {}
    data = {}
    for ticker in tickerNames:
        time.sleep(0.02)
        df = yf.download(ticker, period="25y", progress=False)

        if df.empty or df.filter(like="Close").isna().all().all():
            errors[ticker] = "invalid ticker or no usable data"
            continue
        
        startDate = df.index.min()
        RequiredstartDate = pd.Timestamp.today() - pd.DateOffset(years=2)

        if startDate > RequiredstartDate:
            errors[ticker] = "insufficient history"
            continue

        data[ticker] = {
            "prices": df
        }

    return data, errors


In [5]:
def extractFundementals(tickerNames):
    f_errors = {}
    fundementals = []
    
    asof = pd.Timestamp.today()

    for ticker in tickerNames:
        try:
            info = yf.Ticker(ticker).info
        except Exception as e:
            f_errors[ticker] = f"fetch info failed: {e}"
            continue
    
        PE = info.get("trailingPE", np.nan)
        Market_Cap = info.get("marketCap", np.nan)
        BookValue_PerShare = info.get("bookValue", np.nan)
        Intangible = info.get("intangibleAssets", np.nan)
        Shares_Outstanding = info.get("sharesOutstanding", np.nan)

        if(pd.notna(BookValue_PerShare)
        and pd.notna(Shares_Outstanding)
        and pd.notna(Market_Cap)):
            Book = float(BookValue_PerShare) * float(Shares_Outstanding)
            BooktoMarket = Book / float(Market_Cap)
        else:
            Book, BooktoMarket = np.nan, np.nan

        if (pd.notna(Book)
            and pd.notna(Intangible)):
            tangibleBook = Book - float(Intangible)
            tangibleBooktoMarket = tangibleBook/float(Market_Cap)
        else: 
            tangibleBook, tangibleBooktoMarket = np.nan, np.nan

        fundementals.append({
            "ticker": ticker,
            "PE": PE,
            "Market_Cap": Market_Cap,
            "BookValue_PerShare": BookValue_PerShare,
            "Intangible": Intangible,
            "Shares_Outstanding": Shares_Outstanding,
            "Book": Book,
            "BooktoMarket": BooktoMarket,
            "Tangible Book": tangibleBook,
            "TangibleBooktoMarket": tangibleBooktoMarket
        })

    fundementals_df = pd.DataFrame(fundementals).set_index("ticker")
    return fundementals_df, f_errors    

    

In [None]:
def filterUniverseByMarketCap(
    universe,
    mcap_max=None,
    price_min=None,
    volume_min=None,
    sleep=0.02
):
    import yfinance as yf
    import time
    import pandas as pd

    rows = []

    for t in universe:
        try:
            info = yf.Ticker(t).fast_info
            mc = info.get("market_cap")

            if mc is None:
                continue
            if mcap_max is not None and mc >= mcap_max:
                continue
            if price_min is not None and info.get("last_price", 0) < price_min:
                continue
            if volume_min is not None and info.get("last_volume", 0) < volume_min:
                continue

            rows.append({"ticker": t, "Market_Cap": mc})
            time.sleep(sleep)

        except Exception:
            continue
        
    if not rows:
        return pd.DataFrame(columns=["Market_Cap"])
    
    return pd.DataFrame(rows).set_index("ticker")




In [7]:
def dynamicScreen(tickers, pe_max=None, tbm_min=None, mcap_max=None, pe_or_tbm=False):
    #downloadTickerUniverse()
    

    fundementals, f_errors = extractFundementals(tickers)
    f=fundementals.copy()

    core = pd.Series(True, index=f.index)

    if pe_max is not None and "PE" in f.columns:
        pe_cond = f["PE"].notna() & (f["PE"] < pe_max)
    else:
        pe_cond = pd.Series(True, index=f.index)
    
    if tbm_min is not None and "TangibleBooktoMarket" in f.columns: 
        tbm_cond = (
        f["TangibleBooktoMarket"].notna() &
        (f["TangibleBooktoMarket"] >= tbm_min)
        )
    else: 
        tbm_cond = pd.Series(True, index=f.index)

    if pe_or_tbm: 
        core &= (pe_cond | tbm_cond)
    else:
        core &= (pe_cond & tbm_cond)
    
    if mcap_max is not None and "Market_Cap" in f.columns:
        core &= f["Market_Cap"] < mcap_max
    
    selected = f.loc[core]

    return selected.index.tolist(), selected    

In [8]:
tickerNames = ["^GSPC", "KINS"]

data, errors = extractTicker(tickerNames)
fundementals, f_errors = extractFundementals(tickerNames)

In [9]:
data

{'^GSPC': {'prices': Price             Close         High          Low         Open      Volume
  Ticker            ^GSPC        ^GSPC        ^GSPC        ^GSPC       ^GSPC
  Date                                                                      
  2001-01-22  1342.900024  1353.619995  1333.839966  1342.540039  1164000000
  2001-01-23  1360.400024  1362.900024  1339.630005  1342.900024  1232600000
  2001-01-24  1364.300049  1369.750000  1357.280029  1360.400024  1309000000
  2001-01-25  1357.510010  1367.349976  1354.630005  1364.300049  1258000000
  2001-01-26  1354.949951  1357.510010  1342.750000  1357.510010  1098000000
  ...                 ...          ...          ...          ...         ...
  2026-01-12  6977.270020  6986.330078  6934.069824  6944.120117  5019040000
  2026-01-13  6963.740234  6985.830078  6938.770020  6977.410156  5091730000
  2026-01-14  6926.600098  6941.299805  6885.740234  6937.410156  5530830000
  2026-01-15  6944.470215  6979.339844  6937.930176  6969

In [10]:
fundementals.head()

Unnamed: 0_level_0,PE,Market_Cap,BookValue_PerShare,Intangible,Shares_Outstanding,Book,BooktoMarket,Tangible Book,TangibleBooktoMarket
ticker,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
^GSPC,,,,,,,,,
KINS,7.004445,222963472.0,7.609,,14147428.0,107647800.0,0.482805,,


# Section 1 - Base Feature Engineering

### Daily Return

$$
r_t = \frac{C_t}{C_{t-1}} - 1
$$


In [11]:
df = data["^GSPC"]["prices"]
df["Daily Return"] = df["Close"].pct_change()
df = df.dropna()

### Equity Curve

$$
E_t = \prod_{i=1}^{t} \left(1 + r_i\right)
$$
$$
E_0 = 1
$$



In [12]:
df["Equity Curve"] = (1 + df["Daily Return"]).cumprod()

### Rolling Peak Equity -> Drawdown Series -> Max Drawdown

$$
M_t = \max_{s \le t} E_s
$$

$$
\text{DD}_t = \frac{E_t}{M_t} - 1
$$
$$
\text{Max Drawdown} = \min_t \left( \text{DD}_t \right)
$$


In [13]:
df["rolling_max"] = df["Equity Curve"].cummax()

df["Drawdown Series"] = df["Equity Curve"]/df["rolling_max"] - 1

max_drawdown = df["Drawdown Series"].min()

df[["rolling_max", "Drawdown Series"]]

Price,rolling_max,Drawdown Series
Ticker,Unnamed: 1_level_1,Unnamed: 2_level_1
Date,Unnamed: 1_level_2,Unnamed: 2_level_2
2001-01-23,1.013031,0.000000
2001-01-24,1.015936,0.000000
2001-01-25,1.015936,-0.004977
2001-01-26,1.015936,-0.006853
2001-01-29,1.015936,-0.000095
...,...,...
2026-01-12,5.195673,0.000000
2026-01-13,5.195673,-0.001939
2026-01-14,5.195673,-0.007262
2026-01-15,5.195673,-0.004701


### Drawdown Duration

$$
D_t =
\begin{cases}
0, & \text{if } E_t = \max_{s \le t} E_s \\
D_{t-1} + 1, & \text{if } E_t < \max_{s \le t} E_s
\end{cases}
$$


In [14]:
dd = df["Drawdown Series"].to_numpy()
duration = np.zeros(len(df), dtype=int)

for i in range(1, len(df)):
    duration[i] = duration[i-1] + 1 if dd[i] < 0 else 0

df["Drawdown Duration"] = duration
max_drawdown_duration = duration.max()
max_drawdown_duration


np.int64(1439)

### Moving Average

$$
\text{MA}_{N,t} = \frac{1}{N} \sum_{i=0}^{N-1} C_{t-i}
$$


In [15]:
df["MA_200"] = df["Close"].rolling(200).mean()

# Section 2 - Basic Strategy Implementation & Comparison

### Strategy 1 - Naive Price Theoretical Investing 

#### Conditions:

#### P/E ratio less than 7 and/or Tangible Book / Market > 1

#### Market Cap under 1 billion 


In [36]:
def load_curated_microcap_tickers(csv_path = "microcaptickers.csv"):

    df = pd.read_csv(csv_path)
    return df["ticker"].tolist()

In [37]:

screened_tickers, screened_df = dynamicScreen(
    tickers=load_curated_microcap_tickers(),
    pe_max=7,
    tbm_min=1,
    pe_or_tbm=True
)
price_data, price_errors = extractTicker(screened_tickers)
len(screened_tickers)
len(price_data)
len(price_errors)

EmptyDataError: No columns to parse from file