In [2]:
import pandas as pd
from tqdm import tqdm
import json

df = pd.read_csv("S&P 500 Historical Components & Changes(03-10-2025).csv")
df['date'] = pd.to_datetime(df['date'])
df.head()

Unnamed: 0,date,tickers
0,1996-01-02,"AAL,AAMRQ,AAPL,ABI,ABS,ABT,ABX,ACKH,ACV,ADM,AD..."
1,1996-01-03,"AAL,AAMRQ,AAPL,ABI,ABS,ABT,ABX,ACKH,ACV,ADM,AD..."
2,1996-01-04,"AAL,AAMRQ,AAPL,ABI,ABS,ABT,ABX,ACKH,ACV,ADM,AD..."
3,1996-01-10,"AAL,AAMRQ,AAPL,ABI,ABS,ABT,ABX,ACKH,ACV,ADM,AD..."
4,1996-01-11,"AAL,AAMRQ,AAPL,ABI,ABS,ABT,ABX,ACKH,ACV,ADM,AD..."


In [3]:
# Get Unique Tickers
unique_tickers = set()
transformed_data = []
for i, row in df.iterrows():
    tickers = row['tickers'].split(',')
    unique_tickers = unique_tickers.union(set(tickers))

In [4]:
# Convert Point in Time to Ticker Start/End Records
ticker_start_end_records = []
for ticker in tqdm(unique_tickers):
    ticker_record = None
    for i, row in df.iterrows():
        period_tickers = set(row['tickers'].split(','))
        if ticker in period_tickers:
            if ticker_record is None:
                ticker_record = {"ticker": ticker, "start_date": row["date"]}
                continue
        elif ticker_record is not None:
            ticker_record.update({"end_date": row["date"]})
            ticker_start_end_records.append(ticker_record)
            ticker_record = None

    if ticker_record is not None:
        ticker_start_end_records.append(ticker_record)

100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1175/1175 [08:05<00:00,  2.42it/s]


In [5]:
# Convert back to dataframe
ticker_record_df = pd.DataFrame(ticker_start_end_records)
ticker_record_df.head()

Unnamed: 0,ticker,start_date,end_date
0,TMO,1997-01-02,NaT
1,RX,1996-11-04,2010-02-26
2,BBBY,1999-10-01,2017-07-26
3,ZBRA,2019-12-23,NaT
4,CTL,1999-03-25,2020-09-18


In [6]:
# Look at top tickers entering and leaving S&P 500
(
    ticker_record_df.groupby("ticker")["start_date"]
    .count().sort_values(ascending=False)
    .head(15)
)

ticker
COV     3
KMI     2
TT      2
TEL     2
TER     2
BR      2
NE      2
GAS     2
TMUS    2
GGP     2
MXIM    2
H       2
FMC     2
HCA     2
MIR     2
Name: start_date, dtype: int64

In [7]:
# Record to CSV
(
    ticker_record_df.sort_values(["ticker", "start_date"])
    .to_csv("sp500_ticker_start_end.csv", index=False)
)

In [8]:
ticker_record_df.ticker.nunique()

1175

In [9]:
len(ticker_record_df)

1224

In [10]:
# Record list of tickers to JSON (Optional)
# with open("sp_500_full.json", "w") as f:
#     json.dump(ticker_record_df.ticker.str.replace(".", " ").to_list(), f)