In [9]:
import pickle as pkl
import pandas as pd
import yfinance as yf
import time

sp500_url = "https://en.wikipedia.org/wiki/List_of_S%26P_500_companies"
table = pd.read_html(sp500_url)
sp500_df = table[0]  # First table is the constituents list
tickers = sp500_df["Symbol"].tolist()

# Yahoo uses BRK-B instead of BRK.B, same for BF-B -> BF.B
tickers = [t.replace(".", "-") for t in tickers]


def fetch_history(tickers, start="2020-01-01", end=None, interval="1d"):
    all_data = []
    chunk_size = 50  # <= 100 is safer

    for i in range(0, len(tickers), chunk_size):
        batch = tickers[i : i + chunk_size]
        print(f"Fetching {batch[0]}...{batch[-1]}")

        df = yf.download(
            tickers=batch,
            start=start,
            end=end,
            interval=interval,
            group_by="ticker",
            auto_adjust=False,
            threads=True,
        )
        all_data.append(df)

        time.sleep(1.5)  # avoid being throttled

    return all_data


# history_batches = fetch_history(tickers, start="2020-01-01")
history_batches = pkl.load(open("./sp500.pkl", "rb"))


In [10]:
# Merge batches
history = pd.concat(history_batches, axis=1)

# Example: get Close prices in wide format
close_df = history.xs("Close", axis=1, level=1)

# Similarly for Open/High/Low/Volume
open_df = history.xs("Open", axis=1, level=1)
high_df = history.xs("High", axis=1, level=1)
low_df = history.xs("Low", axis=1, level=1)
vol_df = history.xs("Volume", axis=1, level=1)

In [11]:
dates = history.index.values

In [12]:
# Convert to polars for your pipeline
import polars as pl
from backtest_lib.market.polars_impl import Axis

close_pl = pl.from_pandas(close_df)
securities = close_pl.columns
axis = Axis.from_names(securities)

In [13]:
pastview = close_pl.transpose()

pastview = pastview.rename(
    {orig: orig.replace("column_", "period_") for orig in pastview.columns}
)

window_size = 4
stagger = 0

pastview.select(pastview.columns[stagger : window_size + stagger])

period_0,period_1,period_2,period_3
f64,f64,f64,f64
87.639999,87.239998,87.550003,90.199997
49.099998,48.599998,48.389999,48.25
12.795,12.553125,12.67875,12.803125
187.830002,184.949997,187.119995,187.5
68.433998,68.075996,69.890503,69.755501
…,…,…,…
46.119999,46.490002,46.389999,45.82
137.509995,137.020004,137.169998,135.160004
259.140015,256.049988,258.01001,256.470001
144.85437,144.475723,143.640778,143.514557


In [14]:
# now with the lib
from backtest_lib.market.polars_impl import PolarsPastView
from backtest_lib.market import PastView

close_prices_df = close_pl.with_columns(pl.Series("date", dates))
past_cost_prices = PolarsPastView.from_data_frame(close_prices_df)
print(isinstance(past_cost_prices, PastView))
print(past_cost_prices.latest()["AAPL"])

False
231.58999633789062


In [18]:
print(past_cost_prices.window(5).by_period)

shape: (503, 5)
┌───────────────────┬───────────────────┬───────────────────┬───────────────────┬──────────────────┐
│ 2020-01-02 00:00: ┆ 2020-01-03 00:00: ┆ 2020-01-06 00:00: ┆ 2020-01-07 00:00: ┆ 2020-01-08 00:00 │
│ 00.000000000      ┆ 00.000000000      ┆ 00.000000000      ┆ 00.000000000      ┆ :00.000000000    │
│ ---               ┆ ---               ┆ ---               ┆ ---               ┆ ---              │
│ f64               ┆ f64               ┆ f64               ┆ f64               ┆ f64              │
╞═══════════════════╪═══════════════════╪═══════════════════╪═══════════════════╪══════════════════╡
│ 87.639999         ┆ 87.239998         ┆ 87.550003         ┆ 90.199997         ┆ 91.400002        │
│ 49.099998         ┆ 48.599998         ┆ 48.389999         ┆ 48.25             ┆ 47.830002        │
│ 12.795            ┆ 12.553125         ┆ 12.67875          ┆ 12.803125         ┆ 12.93625         │
│ 187.830002        ┆ 184.949997        ┆ 187.119995        ┆ 187.5        

In [16]:
print(past_cost_prices.window(5, stagger=2))

PolarsPastView(by_period=shape: (503, 5)
┌───────────────────┬───────────────────┬───────────────────┬───────────────────┬──────────────────┐
│ 2020-01-06 00:00: ┆ 2020-01-07 00:00: ┆ 2020-01-08 00:00: ┆ 2020-01-09 00:00: ┆ 2020-01-10 00:00 │
│ 00.000000000      ┆ 00.000000000      ┆ 00.000000000      ┆ 00.000000000      ┆ :00.000000000    │
│ ---               ┆ ---               ┆ ---               ┆ ---               ┆ ---              │
│ f64               ┆ f64               ┆ f64               ┆ f64               ┆ f64              │
╞═══════════════════╪═══════════════════╪═══════════════════╪═══════════════════╪══════════════════╡
│ 87.550003         ┆ 90.199997         ┆ 91.400002         ┆ 93.190002         ┆ 93.529999        │
│ 48.389999         ┆ 48.25             ┆ 47.830002         ┆ 48.970001         ┆ 48.169998        │
│ 12.67875          ┆ 12.803125         ┆ 12.93625          ┆ 12.925            ┆ 12.96625         │
│ 187.119995        ┆ 187.5             ┆ 189.9499