In [None]:
import pickle as pkl
import pandas as pd
import yfinance as yf
import time

sp500_url = "https://en.wikipedia.org/wiki/List_of_S%26P_500_companies"
table = pd.read_html(sp500_url)
sp500_df = table[0]  # First table is the constituents list
tickers = sp500_df["Symbol"].tolist()

# Yahoo uses BRK-B instead of BRK.B, same for BF-B -> BF.B
tickers = [t.replace(".", "-") for t in tickers]


def fetch_history(tickers, start="2020-01-01", end=None, interval="1d"):
    all_data = []
    chunk_size = 50  # <= 100 is safer

    for i in range(0, len(tickers), chunk_size):
        batch = tickers[i : i + chunk_size]
        print(f"Fetching {batch[0]}...{batch[-1]}")

        df = yf.download(
            tickers=batch,
            start=start,
            end=end,
            interval=interval,
            group_by="ticker",
            auto_adjust=False,
            threads=True,
        )
        all_data.append(df)

        time.sleep(1.5)  # avoid being throttled

    return all_data


history_batches = fetch_history(tickers, start="2022-01-01")
pkl.dump(history_batches, open("./sp500.pkl", "wb"))
# history_batches = pkl.load(open("./sp500.pkl", "rb"))

history = pd.concat(history_batches, axis=1)

close_df = history.xs("Close", axis=1, level=1)

open_df = history.xs("Open", axis=1, level=1)
high_df = history.xs("High", axis=1, level=1)
low_df = history.xs("Low", axis=1, level=1)
vol_df = history.xs("Volume", axis=1, level=1)

pkl.dump(close_df, open("./sp500_close.pkl", "wb"))

In [10]:
close_df = pkl.load(open("./sp500_close.pkl", "rb"))
dates = close_df.index.values

In [11]:
# Convert to polars for your pipeline
import polars as pl
from backtest_lib.market.polars_impl import Axis

close_pl = pl.from_pandas(close_df)
securities = close_pl.columns
axis = Axis.from_names(securities)

In [None]:
pastview = close_pl.transpose()

pastview = pastview.rename(
    {orig: orig.replace("column_", "period_") for orig in pastview.columns}
)

window_size = 4
stagger = 0

pastview.select(pastview.columns[stagger : window_size + stagger])

In [None]:
# now with the lib
from backtest_lib.market.polars_impl import PolarsPastView
from backtest_lib.market import PastView

close_prices_df = close_pl.with_columns(pl.Series("date", dates))
past_cost_prices = PolarsPastView.from_data_frame(close_prices_df)
print(isinstance(past_cost_prices, PastView))
print(past_cost_prices.by_period[-1]["AAPL"])

In [None]:
print(past_cost_prices.by_security["AAPL"])

In [None]:
from backtest_lib.market import BySecurity, ByPeriod

print(
    isinstance(past_cost_prices.by_security, BySecurity),
    isinstance(past_cost_prices.by_period, ByPeriod),
)

In [None]:
past_cost_prices.by_security["AAPL"].as_series()