In [2]:
import yfinance as yf
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt




In [3]:
start = '2015-01-01'
overall = pd.read_html('https://en.wikipedia.org/wiki/List_of_S%26P_500_companies')[0]
symbols = overall.Symbol.to_list()
print(symbols)

['MMM', 'AOS', 'ABT', 'ABBV', 'ACN', 'ADBE', 'AMD', 'AES', 'AFL', 'A', 'APD', 'ABNB', 'AKAM', 'ALB', 'ARE', 'ALGN', 'ALLE', 'LNT', 'ALL', 'GOOGL', 'GOOG', 'MO', 'AMZN', 'AMCR', 'AEE', 'AEP', 'AXP', 'AIG', 'AMT', 'AWK', 'AMP', 'AME', 'AMGN', 'APH', 'ADI', 'ANSS', 'AON', 'APA', 'APO', 'AAPL', 'AMAT', 'APTV', 'ACGL', 'ADM', 'ANET', 'AJG', 'AIZ', 'T', 'ATO', 'ADSK', 'ADP', 'AZO', 'AVB', 'AVY', 'AXON', 'BKR', 'BALL', 'BAC', 'BAX', 'BDX', 'BRK.B', 'BBY', 'TECH', 'BIIB', 'BLK', 'BX', 'BK', 'BA', 'BKNG', 'BWA', 'BSX', 'BMY', 'AVGO', 'BR', 'BRO', 'BF.B', 'BLDR', 'BG', 'BXP', 'CHRW', 'CDNS', 'CZR', 'CPT', 'CPB', 'COF', 'CAH', 'KMX', 'CCL', 'CARR', 'CAT', 'CBOE', 'CBRE', 'CDW', 'CE', 'COR', 'CNC', 'CNP', 'CF', 'CRL', 'SCHW', 'CHTR', 'CVX', 'CMG', 'CB', 'CHD', 'CI', 'CINF', 'CTAS', 'CSCO', 'C', 'CFG', 'CLX', 'CME', 'CMS', 'KO', 'CTSH', 'CL', 'CMCSA', 'CAG', 'COP', 'ED', 'STZ', 'CEG', 'COO', 'CPRT', 'GLW', 'CPAY', 'CTVA', 'CSGP', 'COST', 'CTRA', 'CRWD', 'CCI', 'CSX', 'CMI', 'CVS', 'DHR', 'DRI', 'DV

In [4]:
removed = pd.read_html('https://en.wikipedia.org/wiki/List_of_S%26P_500_companies')[1][['Date', 'Removed']]
removed = removed.set_index(removed.Date.Date)
removed.index = pd.to_datetime(removed.index)
removed = removed[removed.index >= start]
removed = removed.Removed.dropna()
removed

Unnamed: 0_level_0,Ticker,Security
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2024-12-23,QRVO,Qorvo
2024-12-23,AMTM,Amentum
2024-12-23,CTLT,Catalent
2024-11-26,MRO,Marathon Oil
2024-10-01,BBWI,"Bath & Body Works, Inc."
...,...,...
2015-03-23,AVP,Avon Products
2015-03-18,CFN,Carefusion
2015-03-12,PETM,PetSmart
2015-01-27,SWY,Safeway


In [5]:
symbols.extend(removed.Ticker.to_list())

def save_to_csv(dataframe, file_path="data.csv"):
    dataframe.to_csv(file_path, index=True)

def load_from_csv(file_path="data.csv"):
    try:
        return pd.read_csv(file_path, index_col=0, parse_dates=True)
    except pd.errors.EmptyDataError:
        # Handle any unexpected empty data errors
        return pd.DataFrame()

def fetch_missing_data(symbols, file_path="data.csv"):
    existing_data = load_from_csv(file_path)
    existing_symbols = existing_data.columns if not existing_data.empty else []
    missing_symbols = [symbol for symbol in symbols if symbol not in existing_symbols]

    if missing_symbols:
        print(f"Fetching data for: {missing_symbols}")
        new_data = yf.download(missing_symbols, start=start)['Close']

        # Drop columns where all rows are NaN
        valid_data = new_data.dropna(axis=1, how="all")

        if valid_data.empty:
            print("No valid data fetched. Not updating the file.")
            return existing_data

        print(f"Fetched valid data for: {valid_data.columns.tolist()}")
        updated_data = pd.concat([existing_data, valid_data], axis=1)
        save_to_csv(updated_data, file_path)
        return updated_data

    print("No missing data to fetch.")
    return existing_data


df = fetch_missing_data(symbols)

df


Fetching data for: ['BRK.B', 'BF.B', 'PXD', 'ATVI', 'DISH', 'FRC', 'SIVB', 'ABMD', 'FBHS', 'TWTR', 'NLSN', 'CTXS', 'DRE', 'CERN', 'PBCT', 'XLNX', 'GPS', 'KSU', 'MXIM', 'ALXN', 'HFC', 'FLIR', 'VAR', 'CXO', 'TIF', 'NBL', 'ETFC', 'ADS', 'AGN', 'RTN', 'ARNC', 'XEC', 'WCG', 'VIAB', 'CELG', 'TSS', 'APC', 'RHT', 'LLL', 'DWDP', 'XL', 'GGP', 'DPS', 'MON', 'WYN', 'CHK', 'BCR', 'LVLT', 'SPLS', 'WFM', 'BBBY', 'MNK', 'RAI', 'YHOO', 'MJN', 'SWN', 'FTR', 'LLTC', 'ENDP', 'STJ', 'LM', 'DO', 'TYC', 'CPGX', 'TE', 'CVC', 'BXLT', 'ARG', 'TWC', 'SNDK', 'CAM', 'ESV', 'GMCR', 'BRCM', 'CMCSK', 'SIAL', 'HCBK', 'JOY', 'HSP', 'DTV', 'FDO', 'KRFT', 'QEP', 'LO', 'WIN', 'AGN', 'DNR', 'AVP', 'CFN', 'SWY']


[*********************100%***********************]  89 of 89 completed

89 Failed downloads:
['SIAL', 'ARG', 'TYC', 'KRFT', 'BCR', 'BXLT', 'SNDK', 'DPS', 'CFN', 'BRCM', 'GGP', 'CMCSK', 'MJN', 'RAI', 'CPGX', 'HSP', 'TWC', 'LLTC', 'FDO', 'SWY', 'BF.B', 'STJ', 'GMCR', 'WYN', 'JOY', 'CAM', 'SPLS', 'LVLT', 'CVC', 'WFM', 'HCBK', 'TE', 'LO']: YFPricesMissingError('$%ticker%: possibly delisted; no price data found  (1d 2015-01-01 -> 2025-01-03)')
['ARNC', 'WIN', 'LLL', 'XLNX', 'VAR', 'ETFC', 'TIF', 'TWTR', 'DWDP', 'ADS', 'DRE', 'HFC', 'BBBY', 'FLIR', 'SIVB', 'LM', 'AGN', 'ENDP', 'MNK', 'RHT', 'FTR', 'ESV', 'CHK', 'FBHS', 'CTXS', 'SWN', 'VIAB', 'XL', 'CERN', 'PXD', 'YHOO', 'DTV', 'DNR', 'RTN', 'CELG', 'DISH', 'TSS', 'PBCT', 'CXO', 'APC', 'MON', 'MXIM', 'NLSN', 'WCG', 'ATVI', 'GPS', 'ABMD', 'FRC', 'BRK.B', 'DO', 'QEP', 'AVP', 'ALXN', 'NBL', 'KSU', 'XEC']: YFTzMissingError('$%ticker%: possibly delisted; no timezone found')


No valid data fetched. Not updating the file.


Unnamed: 0_level_0,A,AA,AAL,AAP,AAPL,ABBV,ACE,ACGL,ACN,ADBE,...,UHS,VFC,VMC,VNO,VNT,WHR,WMT,YUM,ZBH,ZTS
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2015-01-02,37.353008,36.027794,51.079910,142.627319,24.347178,43.563183,,19.496668,75.312714,72.339996,...,106.474159,51.941383,61.653030,57.969090,,137.164902,23.441494,43.035698,100.930573,40.278938
2015-01-05,36.653107,33.940552,51.051491,140.747284,23.661278,42.743366,,19.379999,74.041130,71.980003,...,104.296097,51.504784,59.950169,58.114803,,134.336502,23.373266,42.161304,104.695595,40.037128
2015-01-06,36.082127,34.190105,50.255581,140.648361,23.663502,42.531796,,19.423332,73.507050,70.529999,...,104.334488,51.201977,59.812099,58.585861,,133.227844,23.553377,41.643806,103.808128,39.646530
2015-01-07,36.561008,35.074921,50.227154,143.670746,23.995312,44.250778,,19.536667,75.049919,71.110001,...,107.452782,51.856880,60.944283,59.824219,,136.361877,24.178303,43.023808,106.389862,40.464928
2015-01-08,37.656910,36.073174,50.843040,144.930084,24.917269,44.713585,,19.876667,76.194374,72.919998,...,108.556236,53.025829,62.849655,60.028164,,139.879654,24.688616,43.773285,107.519379,41.088036
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2024-12-24,135.848907,38.610001,17.350000,43.730000,258.200012,180.000000,,92.669998,361.630005,447.940002,...,181.220001,21.879999,264.309998,42.419998,37.110001,116.550003,92.680000,135.000000,107.059036,164.699997
2024-12-26,135.579407,38.349998,17.350000,44.240002,259.019989,179.199997,,92.930000,360.429993,450.160004,...,181.130005,22.120001,264.549988,42.650002,37.349998,116.029999,92.790001,136.000000,107.009148,165.520004
2024-12-27,135.289932,37.680000,17.350000,44.060001,255.589996,178.009995,,92.339996,356.179993,446.480011,...,180.460007,21.770000,262.359985,41.790001,36.610001,115.169998,91.660004,135.229996,106.630005,164.600006
2024-12-30,134.171997,37.150002,17.620001,46.049999,252.199997,176.199997,,91.889999,352.489990,445.799988,...,178.369995,21.410000,258.160004,41.380001,36.220001,113.839996,90.570000,133.520004,105.400002,162.240005


In [6]:
df.index = pd.to_datetime(df.index)

print(removed[removed.Ticker == 'SBNY'])

print(df.shape[1])
df = df.dropna(axis=1, how='all')
print(df.shape[1])


def pricefilter_remove(ticker):
    df[ticker] = df[ticker][df[ticker].index <= removed[removed.Ticker == ticker].index[0]]

def pricefilter_add(ticker):
    # print(overall[overall.Symbol == ticker])
    # print(overall[overall.Symbol == ticker].values[0])
    date_added = overall.loc[overall.Symbol == ticker, 'Date added'].iloc[0]
    df[ticker] = df[ticker][df[ticker].index >= date_added]

for ticker_rem in removed.Ticker:
    if ticker_rem in df.columns:
        pricefilter_remove(ticker_rem)

for ticker_add in overall.Symbol:
    if ticker_add in df.columns:
        pricefilter_add(ticker_add)

df[['LUMN']].head(20)
df

           Ticker        Security
Date                             
2023-03-15   SBNY  Signature Bank
619
619


Unnamed: 0_level_0,A,AA,AAL,AAP,AAPL,ABBV,ACE,ACGL,ACN,ADBE,...,UHS,VFC,VMC,VNO,VNT,WHR,WMT,YUM,ZBH,ZTS
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2015-01-02,37.353008,36.027794,51.079910,142.627319,24.347178,43.563183,,,75.312714,72.339996,...,106.474159,51.941383,61.653030,57.969090,,137.164902,23.441494,43.035698,100.930573,40.278938
2015-01-05,36.653107,33.940552,51.051491,140.747284,23.661278,42.743366,,,74.041130,71.980003,...,104.296097,51.504784,59.950169,58.114803,,134.336502,23.373266,42.161304,104.695595,40.037128
2015-01-06,36.082127,34.190105,50.255581,140.648361,23.663502,42.531796,,,73.507050,70.529999,...,104.334488,51.201977,59.812099,58.585861,,133.227844,23.553377,41.643806,103.808128,39.646530
2015-01-07,36.561008,35.074921,50.227154,143.670746,23.995312,44.250778,,,75.049919,71.110001,...,107.452782,51.856880,60.944283,59.824219,,136.361877,24.178303,43.023808,106.389862,40.464928
2015-01-08,37.656910,36.073174,50.843040,144.930084,24.917269,44.713585,,,76.194374,72.919998,...,108.556236,53.025829,62.849655,60.028164,,139.879654,24.688616,43.773285,107.519379,41.088036
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2024-12-24,135.848907,,,,258.200012,180.000000,,92.669998,361.630005,447.940002,...,181.220001,,264.309998,,,,92.680000,135.000000,107.059036,164.699997
2024-12-26,135.579407,,,,259.019989,179.199997,,92.930000,360.429993,450.160004,...,181.130005,,264.549988,,,,92.790001,136.000000,107.009148,165.520004
2024-12-27,135.289932,,,,255.589996,178.009995,,92.339996,356.179993,446.480011,...,180.460007,,262.359985,,,,91.660004,135.229996,106.630005,164.600006
2024-12-30,134.171997,,,,252.199997,176.199997,,91.889999,352.489990,445.799988,...,178.369995,,258.160004,,,,90.570000,133.520004,105.400002,162.240005


In [7]:

monthly_returns = df.pct_change().resample('M').agg(lambda x : (x + 1).prod() - 1)
monthly_returns

twelve_month_returns = monthly_returns.rolling(12).agg(lambda x: (x+1).prod() - 1)
twelve_month_returns.dropna(inplace=True)
twelve_month_returns

  monthly_returns = df.pct_change().resample('M').agg(lambda x : (x + 1).prod() - 1)
  monthly_returns = df.pct_change().resample('M').agg(lambda x : (x + 1).prod() - 1)


Unnamed: 0_level_0,A,AA,AAL,AAP,AAPL,ABBV,ACE,ACGL,ACN,ADBE,...,UHS,VFC,VMC,VNO,VNT,WHR,WMT,YUM,ZBH,ZTS
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2015-12-31,0.041792,-0.372034,-0.207681,-0.049328,-0.020823,-0.071393,0.0,0.000000,0.201626,0.298590,...,0.080221,-0.139808,0.424073,-0.051475,0.0,-0.224067,-0.266537,0.032109,-0.081229,0.114511
2016-01-31,0.007435,-0.529367,-0.198756,-0.042258,-0.155014,-0.057631,0.0,0.000000,0.283052,0.270925,...,0.102119,-0.080229,0.256344,-0.177809,0.0,-0.311649,-0.197397,0.024486,-0.107165,0.015155
2016-02-29,-0.105719,-0.388437,-0.136089,-0.040434,-0.233361,-0.064933,0.0,0.000000,0.137632,0.076486,...,-0.023192,-0.134323,0.193327,-0.194410,0.0,-0.250876,-0.187601,-0.085783,-0.189166,-0.102356
2016-03-31,-0.033086,-0.248964,-0.215775,0.072751,-0.107849,0.010790,0.0,0.000000,0.258271,0.268596,...,0.062924,-0.122333,0.258874,-0.134546,0.0,-0.087630,-0.142953,0.063896,-0.084690,-0.035055
2016-04-30,0.000082,-0.156940,-0.276434,0.093243,-0.237094,-0.021444,0.0,0.000000,0.243836,0.238759,...,0.146690,-0.111531,0.265129,-0.050486,0.0,0.013756,-0.118196,-0.052617,0.063251,0.067118
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2024-08-31,0.188982,0.000000,-0.279022,0.000000,0.225165,0.387416,0.0,0.471376,0.073212,0.026942,...,0.773333,-0.277276,0.131933,0.000000,0.0,-0.224196,0.444054,0.063450,-0.022685,-0.027285
2024-09-30,0.337418,0.000000,-0.142857,0.000000,0.367866,0.376022,0.0,0.403588,0.169581,0.015454,...,0.829853,-0.204428,0.248903,0.000000,0.0,-0.187872,0.535173,0.140339,-0.030022,0.134153
2024-10-31,0.269165,0.000000,-0.015247,0.000000,0.329660,0.496830,0.0,0.137056,0.179316,-0.101455,...,0.630419,-0.045637,0.404545,0.000000,0.0,0.038461,0.524807,0.106692,0.032587,0.150004
2024-11-30,0.086898,0.000000,-0.116653,0.000000,0.255555,0.331674,0.0,0.203489,0.105227,-0.155611,...,0.495804,-0.159727,0.358787,0.000000,0.0,-0.018103,0.806449,0.123231,-0.028152,0.001787


In [8]:
from pandas.tseries.offsets import MonthEnd 

curr_ = twelve_month_returns.iloc[0]
winners = curr_.nlargest(10)
winners

NFLX     1.294549
AMZN     1.190749
AYI      0.676443
NVDA     0.664514
HRL      0.559016
VRSN     0.527540
SBUX     0.493214
STZ      0.471928
GOOGL    0.469191
AVGO     0.467665
Name: 2015-12-31 00:00:00, dtype: float64

In [9]:
win_ret = monthly_returns.loc[winners.name + MonthEnd(1), winners.index]
win_ret
win_ret.mean()

-0.0704995317101424

In [10]:
def momentum(all_mtl_ret, lookback):
    all_mtl_ret_lb = all_mtl_ret.rolling(lookback).agg(lambda x: (x+1).prod() - 1)
    all_mtl_ret_lb.dropna(inplace=True)

    rets = []

    for row in range(len(all_mtl_ret_lb) - 1):
        curr = all_mtl_ret_lb.iloc[row]
        win = curr.nlargest(10)
        win_ret = all_mtl_ret.loc[win.name + MonthEnd(1), win.index]
        rets.append(win_ret.mean())

    return (pd.Series(rets) + 1).prod() - 1

In [12]:
for lookback in range(1,13):
    print(momentum(monthly_returns, lookback))

1.8751048553792469
1.216590359113562
2.6128559731244083
2.9374751208862193
2.9646664764896595
3.4761838167501082
4.440341589105945
7.743352419015352
6.578842718458594
8.061537754409343
6.160499061306789
5.0496868564925785
