In [1]:
import pandas as pd
import yfinance as yf
import time

sp500_url = "https://en.wikipedia.org/wiki/List_of_S%26P_500_companies"
table = pd.read_html(sp500_url)
sp500_df = table[0]  # First table is the constituents list
tickers = sp500_df["Symbol"].tolist()

# Yahoo uses BRK-B instead of BRK.B, same for BF-B -> BF.B
tickers = [t.replace(".", "-") for t in tickers]


def fetch_history(tickers, start="2020-01-01", end=None, interval="1d"):
    all_data = []
    chunk_size = 50  # <= 100 is safer

    for i in range(0, len(tickers), chunk_size):
        batch = tickers[i : i + chunk_size]
        print(f"Fetching {batch[0]}...{batch[-1]}")

        df = yf.download(
            tickers=batch,
            start=start,
            end=end,
            interval=interval,
            group_by="ticker",
            auto_adjust=False,
            threads=True,
        )
        all_data.append(df)

        time.sleep(1.5)  # avoid being throttled

    return all_data


history_batches = fetch_history(tickers, start="2020-01-01")


Fetching MMM...ADP


[*********************100%***********************]  50 of 50 completed
[***                    6%                       ]  3 of 50 completed

Fetching AZO...CVX


[*********************100%***********************]  50 of 50 completed
[                       0%                       ]

Fetching CMG...DLTR


[*********************100%***********************]  50 of 50 completed
[                       0%                       ]

Fetching D...F


[*********************100%***********************]  50 of 50 completed
[****                   8%                       ]  4 of 50 completed

Fetching FTNT...INTC


[*********************100%***********************]  50 of 50 completed
[                       0%                       ]

Fetching ICE...MAR


[*********************100%***********************]  50 of 50 completed
[                       0%                       ]

Fetching MMC...ORLY


[*********************100%***********************]  50 of 50 completed
[**                     4%                       ]  2 of 50 completed

Fetching OXY...ROK


[*********************100%***********************]  50 of 50 completed
[**                     4%                       ]  2 of 50 completed

Fetching ROL...TDG


[*********************100%***********************]  50 of 50 completed
[                       0%                       ]

Fetching TRV...YUM


[*********************100%***********************]  50 of 50 completed
[*********************100%***********************]  3 of 3 completed

Fetching ZBRA...ZTS





In [2]:
# Merge batches
history = pd.concat(history_batches, axis=1)

# Example: get Close prices in wide format
close_df = history.xs("Close", axis=1, level=1)

# Similarly for Open/High/Low/Volume
open_df = history.xs("Open", axis=1, level=1)
high_df = history.xs("High", axis=1, level=1)
low_df = history.xs("Low", axis=1, level=1)
vol_df = history.xs("Volume", axis=1, level=1)


In [3]:
dates = history.index.values

In [4]:
# Convert to polars for your pipeline
import polars as pl
from backtest_lib.market.polars_impl import Axis
close_pl = pl.from_pandas(close_df)
securities = close_pl.columns
axis = Axis.from_names(securities)

In [5]:

pastview = close_pl.transpose()

pastview = pastview.rename({orig: orig.replace("column_", "period_") for orig in pastview.columns})

window_size = 4
stagger = 0

pastview.select(pastview.columns[stagger:window_size+stagger])

period_0,period_1,period_2,period_3
f64,f64,f64,f64
283.679993,280.440002,285.880005,283.059998
10.63,10.49,10.42,10.33
47.66,47.310001,47.580002,47.630001
87.639999,87.239998,87.550003,90.199997
96.309998,94.099998,92.349998,91.190002
…,…,…,…
204.279999,204.330002,204.580002,204.160004
344.709991,340.660004,337.48999,337.839996
134.139999,134.160004,133.130005,133.580002
144.85437,144.475723,143.640778,143.514557


In [9]:
# now with the lib
from backtest_lib.market.polars_impl import PolarsPastView
from backtest_lib.market import PastView
from backtest_lib.universe import UniverseMapping

close_prices_df = close_pl.with_columns(pl.Series("date", dates))
past_cost_prices = PolarsPastView.from_data_frame(close_prices_df)
print(isinstance(past_cost_prices, PastView))
print(past_cost_prices.latest()["AAPL"])

True
75.0875015258789


In [10]:
print(past_cost_prices.window(5))

PolarsPastView(_inner_df=shape: (503, 5)
┌────────────┬────────────┬────────────┬────────────┬────────────┐
│ period_0   ┆ period_1   ┆ period_2   ┆ period_3   ┆ period_4   │
│ ---        ┆ ---        ┆ ---        ┆ ---        ┆ ---        │
│ f64        ┆ f64        ┆ f64        ┆ f64        ┆ f64        │
╞════════════╪════════════╪════════════╪════════════╪════════════╡
│ 283.679993 ┆ 280.440002 ┆ 285.880005 ┆ 283.059998 ┆ 286.0      │
│ 10.63      ┆ 10.49      ┆ 10.42      ┆ 10.33      ┆ 10.37      │
│ 47.66      ┆ 47.310001  ┆ 47.580002  ┆ 47.630001  ┆ 48.650002  │
│ 87.639999  ┆ 87.239998  ┆ 87.550003  ┆ 90.199997  ┆ 91.400002  │
│ 96.309998  ┆ 94.099998  ┆ 92.349998  ┆ 91.190002  ┆ 93.089996  │
│ …          ┆ …          ┆ …          ┆ …          ┆ …          │
│ 204.279999 ┆ 204.330002 ┆ 204.580002 ┆ 204.160004 ┆ 204.389999 │
│ 344.709991 ┆ 340.660004 ┆ 337.48999  ┆ 337.839996 ┆ 341.410004 │
│ 134.139999 ┆ 134.160004 ┆ 133.130005 ┆ 133.580002 ┆ 133.289993 │
│ 144.85437  ┆ 144.47

In [11]:
print(past_cost_prices.window(5, stagger=2))

PolarsPastView(_inner_df=shape: (503, 5)
┌────────────┬────────────┬────────────┬────────────┬────────────┐
│ period_2   ┆ period_3   ┆ period_4   ┆ period_5   ┆ period_6   │
│ ---        ┆ ---        ┆ ---        ┆ ---        ┆ ---        │
│ f64        ┆ f64        ┆ f64        ┆ f64        ┆ f64        │
╞════════════╪════════════╪════════════╪════════════╪════════════╡
│ 285.880005 ┆ 283.059998 ┆ 286.0      ┆ 296.540009 ┆ 293.329987 │
│ 10.42      ┆ 10.33      ┆ 10.37      ┆ 10.4       ┆ 10.45      │
│ 47.580002  ┆ 47.630001  ┆ 48.650002  ┆ 49.810001  ┆ 49.869999  │
│ 87.550003  ┆ 90.199997  ┆ 91.400002  ┆ 93.190002  ┆ 93.529999  │
│ 92.349998  ┆ 91.190002  ┆ 93.089996  ┆ 91.730003  ┆ 89.510002  │
│ …          ┆ …          ┆ …          ┆ …          ┆ …          │
│ 204.580002 ┆ 204.160004 ┆ 204.389999 ┆ 205.0      ┆ 205.0      │
│ 337.48999  ┆ 337.839996 ┆ 341.410004 ┆ 343.170013 ┆ 341.309998 │
│ 133.130005 ┆ 133.580002 ┆ 133.289993 ┆ 135.059998 ┆ 135.570007 │
│ 143.640778 ┆ 143.51