In [10]:
import pickle as pkl
from io import StringIO

import pandas as pd
import requests

url = "https://en.wikipedia.org/wiki/List_of_S%26P_500_companies"
headers = {
    "User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0 Safari/537.36"
}
response = requests.get(url, headers=headers)
response.raise_for_status()  # raise if 403 or other error
tables = pd.read_html(StringIO(response.text))
changes = tables[1]
current = tables[0]
pkl.dump(current, open("sp500_const.pkl", "wb"))
pkl.dump(changes, open("sp500_changes.pkl", "wb"))

# changes = pkl.load(open("sp500_changes.pkl", "rb"))
# current = pkl.load(open("sp500_const.pkl", "rb"))

In [12]:
added = changes[["Effective Date", "Added"]]
added.columns = added.columns.droplevel(0)
added = added.loc[:, ["Effective Date", "Ticker"]]
added.loc[:, "Effective Date"] = pd.to_datetime(added["Effective Date"])
added.dropna(inplace=True)

removed = changes[["Effective Date", "Removed"]]
removed.columns = removed.columns.droplevel(0)
removed = removed.loc[:, ["Effective Date", "Ticker"]]
removed.loc[:, "Effective Date"] = pd.to_datetime(removed["Effective Date"])
removed.dropna(inplace=True)

In [13]:
current_tickers = [
    line.rstrip("\n") for line in open("sp500_constituents.txt", "r").readlines()
]
all_historical_tickers = (
    set(added["Ticker"]).union(set(removed["Ticker"])).union(current_tickers)
)

In [14]:
dates = pd.date_range(
    added["Effective Date"].min(), added["Effective Date"].max(), freq="D"
).to_list()
tradeable_df = pd.DataFrame(
    {
        "date": dates,
        **{ticker: [True] * len(dates) for ticker in all_historical_tickers},
    }
)

In [15]:
# for ticker in all_historical_tickers:
#     date_first_added_df = current.query("Symbol == @ticker")["Date added"]
#     date_first_added = date_first_added_df.iat[0] if not date_first_added_df.empty else None
#     dates_added: pd.Series = added.query("Ticker == @ticker")["Effective Date"]
#     dates_removed: pd.Series = removed.query("Ticker == @ticker")["Effective Date"]

#     poi_added = [(date, "a") for date in dates_added]
#     poi_rem = [(date, "r") for date in dates_removed]

#     poi = sorted((poi_added + poi_rem), key=lambda tup: tup[0])
#     first_addition = True

#     if date_first_added is not None:
#         tradeable_df.loc[tradeable_df["date"] < date_first_added, ticker] = False

#     for date, action in poi:
#         if action == "a":
#             if first_addition:
#                 tradeable_df.loc[tradeable_df["date"] < date, ticker] = False
#                 first_addition = False
#             tradeable_df.loc[tradeable_df["date"] >= date, ticker] = True
#         if action == "r":
#             tradeable_df.loc[tradeable_df["date"] >= date, ticker] = False

# tradeable_df

In [16]:
# from backtest_lib.market.polars_impl import PolarsPastView
# tradeable_past_view = PolarsPastView.from_dataframe(tradeable_df)

In [17]:
import datetime as dt

from backtest_lib.examples.get_sp500_historical import get_sp500_market_view

market = get_sp500_market_view(dt.date.fromisoformat("2020-01-01"))

In [None]:
market.prices.close.between("2023-01-01", "2023-01-03")