# S&P 500 by date

Get snapshot of S&P 500 components at a given date

In [16]:
from datetime import datetime
import os
import shutil
import pandas as pd

pd.options.mode.chained_assignment = None  # default='warn'
pd.set_option('display.max_rows', 600)

# -*- encoding: utf-8 -*-
%matplotlib inline

In [17]:
%%javascript
IPython.OutputArea.prototype._should_scroll = function(lines) {
    return false;
}

<IPython.core.display.Javascript object>

In [18]:
# Date to use for snapshot of S&P 500 components.
snap_shot = '2008-01-01'

In [19]:
def get_table(filename):

    if os.path.isfile(filename):
        df = pd.read_csv(filename, index_col='date')
        return df

In [20]:
filename = 'S&P 500 Historical Components & Changes(04-25-2023).csv'
df = get_table(filename)
df.tail()

Unnamed: 0_level_0,tickers
date,Unnamed: 1_level_1
2022-12-19,"A,AAL,AAP,AAPL,ABBV,ABC,ABT,ACGL,ACN,ADBE,ADI,..."
2022-12-22,"A,AAL,AAP,AAPL,ABBV,ABC,ABT,ACGL,ACN,ADBE,ADI,..."
2023-01-04,"A,AAL,AAP,AAPL,ABBV,ABC,ABT,ACGL,ACN,ADBE,ADI,..."
2023-03-15,"A,AAL,AAP,AAPL,ABBV,ABC,ABT,ACGL,ACN,ADBE,ADI,..."
2023-03-20,"A,AAL,AAP,AAPL,ABBV,ABC,ABT,ACGL,ACN,ADBE,ADI,..."


In [21]:
# Convert ticker column from csv to list, then sort.
df['tickers'] = df['tickers'].apply(lambda x: sorted(x.split(',')))
df.tail()

Unnamed: 0_level_0,tickers
date,Unnamed: 1_level_1
2022-12-19,"[A, AAL, AAP, AAPL, ABBV, ABC, ABT, ACGL, ACN,..."
2022-12-22,"[A, AAL, AAP, AAPL, ABBV, ABC, ABT, ACGL, ACN,..."
2023-01-04,"[A, AAL, AAP, AAPL, ABBV, ABC, ABT, ACGL, ACN,..."
2023-03-15,"[A, AAL, AAP, AAPL, ABBV, ABC, ABT, ACGL, ACN,..."
2023-03-20,"[A, AAL, AAP, AAPL, ABBV, ABC, ABT, ACGL, ACN,..."


In [22]:
# Number of symbols in the first row.
l = list(df['tickers'].head(1))[0]
len(l)

487

In [23]:
# Get the synbols on snap_shot date by filtering df by rows before or on the snap_shot date,
# then picking the last row.
df2 = df[df.index <= snap_shot]
last_row = df2.tail(1)
last_row

Unnamed: 0_level_0,tickers
date,Unnamed: 1_level_1
2007-12-31,"[A, AABA, AAPL, ABC, ABI, ABKFQ, ABT, ACAS, AC..."


In [24]:
past = last_row['tickers'][0]
print('*'*40, f'S&P 500 on {snap_shot}', '*'*40)
print(past)

**************************************** S&P 500 on 2008-01-01 ****************************************
['A', 'AABA', 'AAPL', 'ABC', 'ABI', 'ABKFQ', 'ABT', 'ACAS', 'ACS', 'ADBE', 'ADI', 'ADM', 'ADP', 'ADSK', 'AEE', 'AEP', 'AES', 'AET', 'AFL', 'AGN', 'AIG', 'AIV', 'AIZ', 'AKAM', 'ALL', 'ALTR', 'AMAT', 'AMD', 'AMGN', 'AMP', 'AMT', 'AMZN', 'AN', 'ANDV', 'ANF', 'ANTM', 'AON', 'APA', 'APC', 'APD', 'APOL', 'ARNC', 'ASH', 'ATI', 'AVB', 'AVP', 'AVY', 'AW', 'AXP', 'AYE', 'AZO', 'BA', 'BAC', 'BAX', 'BBBY', 'BBT', 'BBY', 'BC', 'BCR', 'BDK', 'BDX', 'BEAM', 'BEN', 'BF.B', 'BHGE', 'BIG', 'BIIB', 'BJS', 'BK', 'BLL', 'BMC', 'BMS', 'BMY', 'BNI', 'BRCM', 'BRL', 'BSC', 'BSX', 'BTUUQ', 'BUD', 'BXP', 'C', 'CA', 'CAG', 'CAH', 'CAT', 'CB', 'CBE', 'CBH', 'CBRE', 'CBS', 'CCE', 'CCL', 'CCTYQ', 'CCU', 'CEG', 'CELG', 'CFC', 'CHK', 'CHRW', 'CI', 'CIEN', 'CINF', 'CITGQ', 'CL', 'CLX', 'CMA', 'CMCSA', 'CME', 'CMI', 'CMS', 'CNP', 'CNX', 'COF', 'COL', 'COP', 'COST', 'COV', 'CPB', 'CPWR', 'CSCO', 'CSX', 'CTAS', 'CTL', '

In [26]:
df2 = pd.DataFrame(past, columns = ['Ticker'])
df2

Unnamed: 0,Ticker
0,A
1,AABA
2,AAPL
3,ABC
4,ABI
5,ABKFQ
6,ABT
7,ACAS
8,ACS
9,ADBE


In [27]:
df2.to_csv(r'C:\Users\yuxua\Desktop\Companies 2008.csv', index = False)

In [10]:
# Get current S&P500 list.
filename = 'sp500.csv'
current = pd.read_csv(filename)
current = sorted(list(current['Symbol']))
#print(current)

In [11]:
# Show what's been added and removed since snap_shot date.

added = list(set(current) - set(past))
print('*'*40, f'ADDED since {snap_shot}', '*'*40)
print(added)
print()

removed = list(set(past) - set(current))
print('*'*40, f'REMOVED since {snap_shot}', '*'*40)
print(removed)

**************************************** ADDED since 2008-01-01 ****************************************
['STX', 'TSCO', 'SJM', 'MTD', 'BR', 'CDW', 'VTR', 'DISH', 'LRCX', 'CZR', 'SNPS', 'CTRA', 'DAL', 'CRM', 'XRAY', 'ACGL', 'FDS', 'VRTX', 'CFG', 'EPAM', 'PM', 'ELV', 'REGN', 'XYL', 'BRO', 'SEDG', 'ISRG', 'KEYS', 'TDY', 'PODD', 'CPRT', 'TSLA', 'HRL', 'IRM', 'DPZ', 'UDR', 'FTNT', 'PAYC', 'AWK', 'ALLE', 'ES', 'FRC', 'EVRG', 'GNRC', 'IVZ', 'TRMB', 'MOH', 'FLT', 'NRG', 'OKE', 'ULTA', 'APTV', 'RCL', 'VTRS', 'CCI', 'CE', 'DXCM', 'WAB', 'RSG', 'IDXX', 'CNC', 'CBOE', 'ANET', 'ON', 'V', 'ALGN', 'BLK', 'MKTX', 'SYF', 'CSGP', 'BG', 'POOL', 'FOX', 'STLD', 'MPC', 'HLT', 'HSIC', 'STE', 'AVGO', 'LW', 'TDG', 'CRL', 'AOS', 'ZTS', 'BRK.B', 'MLM', 'HII', 'CDNS', 'FSLR', 'MOS', 'IT', 'LKQ', 'SBAC', 'NDAQ', 'SWKS', 'RTX', 'JKHY', 'GRMN', 'EQIX', 'NVR', 'O', 'IEX', 'AAP', 'BKR', 'JBHT', 'REG', 'PSX', 'CARR', 'WYNN', 'KMX', 'FICO', 'CTVA', 'URI', 'MA', 'GL', 'DG', 'MTCH', 'LNT', 'ARE', 'COO', 'ANSS', 'NXPI', '