# 1. Download SP500 Price Data

## 1.1 Import all necessary libraries

In [35]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as pyplt
import statsmodels.api as sm
import pandas_datareader.data as web
import datetime as dt
import yfinance as yf
import pandas_ta
import warnings
from statsmodels.regression.rolling import RollingOLS

In [36]:
warnings.filterwarnings('ignore')

## 1.2 Download the SP500 Constituent Data

In [37]:
sp500 = pd.read_html("https://en.wikipedia.org/wiki/List_of_S%26P_500_companies")
print(type(sp500[0]))


<class 'pandas.core.frame.DataFrame'>


## 1.3 Clean the Data

- Isolate the ticker symbols
- Store the isoalted ticker symbols into a list

In [38]:
tickers_list = sp500[0]["Symbol"].tolist()

## 1.4 Take the start and end date to determine a range

- For this example, I will use 8 years

In [43]:
end_date = dt.datetime.today()
start_date = pd.to_datetime(end_date)-pd.DateOffset(365*8)

## 1.5.1 Per ticker, download the necessary information from the start date to the end date

In [45]:
df = yf.download(tickers=tickers_list, 
                 start = start_date, 
                 end = end_date)
df

[*********************100%%**********************]  503 of 503 completed

2 Failed downloads:
['BRK.B']: Exception('%ticker%: No timezone found, symbol may be delisted')
['BF.B']: Exception('%ticker%: No price data found, symbol may be delisted (1d 2016-04-28 17:10:43.763668 -> 2024-04-26 17:10:43.763668)')


Price,Adj Close,Adj Close,Adj Close,Adj Close,Adj Close,Adj Close,Adj Close,Adj Close,Adj Close,Adj Close,Adj Close,Adj Close,Adj Close,Adj Close,Adj Close,Adj Close,Adj Close,Adj Close,Adj Close,Adj Close,Adj Close,Adj Close,Adj Close,Adj Close,Adj Close,Adj Close,Adj Close,Adj Close,Adj Close,Adj Close,Adj Close,Adj Close,Adj Close,Adj Close,Adj Close,Adj Close,Adj Close,Adj Close,Adj Close,Adj Close,...,Volume,Volume,Volume,Volume,Volume,Volume,Volume,Volume,Volume,Volume,Volume,Volume,Volume,Volume,Volume,Volume,Volume,Volume,Volume,Volume,Volume,Volume,Volume,Volume,Volume,Volume,Volume,Volume,Volume,Volume,Volume,Volume,Volume,Volume,Volume,Volume,Volume,Volume,Volume,Volume
Ticker,A,AAL,AAPL,ABBV,ABNB,ABT,ACGL,ACN,ADBE,ADI,ADM,ADP,ADSK,AEE,AEP,AES,AFL,AIG,AIZ,AJG,AKAM,ALB,ALGN,ALL,ALLE,AMAT,AMCR,AMD,AME,AMGN,AMP,AMT,AMZN,ANET,ANSS,AON,AOS,APA,APD,APH,...,UNH,UNP,UPS,URI,USB,V,VICI,VLO,VLTO,VMC,VRSK,VRSN,VRTX,VTR,VTRS,VZ,WAB,WAT,WBA,WBD,WDC,WEC,WELL,WFC,WM,WMB,WMT,WRB,WRK,WST,WTW,WY,WYNN,XEL,XOM,XYL,YUM,ZBH,ZBRA,ZTS
Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2,Unnamed: 22_level_2,Unnamed: 23_level_2,Unnamed: 24_level_2,Unnamed: 25_level_2,Unnamed: 26_level_2,Unnamed: 27_level_2,Unnamed: 28_level_2,Unnamed: 29_level_2,Unnamed: 30_level_2,Unnamed: 31_level_2,Unnamed: 32_level_2,Unnamed: 33_level_2,Unnamed: 34_level_2,Unnamed: 35_level_2,Unnamed: 36_level_2,Unnamed: 37_level_2,Unnamed: 38_level_2,Unnamed: 39_level_2,Unnamed: 40_level_2,Unnamed: 41_level_2,Unnamed: 42_level_2,Unnamed: 43_level_2,Unnamed: 44_level_2,Unnamed: 45_level_2,Unnamed: 46_level_2,Unnamed: 47_level_2,Unnamed: 48_level_2,Unnamed: 49_level_2,Unnamed: 50_level_2,Unnamed: 51_level_2,Unnamed: 52_level_2,Unnamed: 53_level_2,Unnamed: 54_level_2,Unnamed: 55_level_2,Unnamed: 56_level_2,Unnamed: 57_level_2,Unnamed: 58_level_2,Unnamed: 59_level_2,Unnamed: 60_level_2,Unnamed: 61_level_2,Unnamed: 62_level_2,Unnamed: 63_level_2,Unnamed: 64_level_2,Unnamed: 65_level_2,Unnamed: 66_level_2,Unnamed: 67_level_2,Unnamed: 68_level_2,Unnamed: 69_level_2,Unnamed: 70_level_2,Unnamed: 71_level_2,Unnamed: 72_level_2,Unnamed: 73_level_2,Unnamed: 74_level_2,Unnamed: 75_level_2,Unnamed: 76_level_2,Unnamed: 77_level_2,Unnamed: 78_level_2,Unnamed: 79_level_2,Unnamed: 80_level_2,Unnamed: 81_level_2
2016-04-28,38.642582,34.468357,21.672606,43.371487,,34.886658,23.556667,100.819069,93.639999,49.254292,32.000378,75.087936,61.389999,37.847172,47.581516,8.552246,28.508385,45.403248,71.679771,38.349407,52.410000,60.120426,73.699997,54.215496,59.702118,18.926081,7.835553,3.610000,45.573177,126.767906,81.886650,86.861275,30.100000,16.440001,90.559998,95.771217,34.079006,46.930992,113.243187,26.097137,...,3332800,3863100,4210000,1285300,6641400,8322600,,4581200,,1003600,581600,962000,5463300,1890500,8658300,13055600,798400,513600,2876600,2246900,7236600,1948000,1423600,15506300,5062800,8025900,14330700,3345975,3670328,298600,910700,4322400,2969800,3163200,11276200,1135100,1965483,2558108,575100,1888600
2016-04-29,38.398598,33.241798,21.423491,43.229752,,33.574764,23.496668,99.575699,94.220001,47.943333,31.896564,74.531746,59.820000,38.093189,47.913513,8.583014,28.491863,45.395115,71.662834,39.739079,50.990002,59.885101,72.190002,54.298973,59.894295,18.616856,7.835553,3.550000,45.563705,124.983597,80.025772,87.486908,32.979500,16.655001,90.769997,97.590836,34.030521,46.058918,111.222900,25.828812,...,4161100,5221100,2516700,1397200,10576000,8092700,,6701300,,1006400,799600,1592000,2888000,3009900,7688100,13684800,1031200,548600,5421200,4294500,19380400,1908100,1717600,17887000,2691000,8848200,36297000,1346400,3154891,507500,889600,3953000,3214700,3865700,17885900,1795300,3140461,2028379,548800,3342300
2016-05-02,39.139927,33.088028,21.400637,43.541573,,33.531597,23.730000,100.880783,94.900002,48.530712,32.152119,75.366051,61.369999,38.204304,48.562428,8.690684,28.557968,46.021313,73.086433,40.481377,50.939999,61.460052,75.099998,54.557720,60.855156,18.471342,7.835553,3.740000,45.914257,125.528381,81.627960,88.012444,34.192501,16.767500,92.239998,98.983383,34.616657,45.923447,111.207657,26.027742,...,2887700,3492700,2905300,1460800,8866500,7636800,,5419000,,975200,501600,1401600,2165400,1528000,5999500,11103800,1162500,513700,3250300,3774800,8391800,2011300,1787300,14780300,3067300,9313900,22829100,1681425,2123905,333200,734700,4380000,4269300,2990600,10273700,1727700,2524109,1681681,392400,3705400
2016-05-03,38.698875,33.232185,21.752594,43.775433,,33.272655,23.600000,99.910797,93.660004,47.866718,31.225729,74.843575,60.340000,38.529675,48.668060,8.559941,28.491863,45.492710,73.069489,40.041180,49.910000,60.944141,74.900002,54.599461,60.113918,18.280352,7.835553,3.600000,45.222614,123.444008,79.750404,86.886337,33.566002,16.067499,91.300003,98.194244,34.004078,44.136982,109.126366,25.787180,...,3632300,4164300,2171900,2069900,6581800,7666300,,8338500,,2746700,953400,723300,2063400,1837400,11911900,12900700,836900,478600,5294000,3540400,8993500,2610500,2810100,16246400,2558700,9217300,17767800,1729350,2594613,250500,1304800,6345800,3669100,2500800,10137400,1773500,2089560,1185324,453600,4474600
2016-05-04,38.023243,31.915590,21.526340,43.555744,,32.884270,23.523333,100.131256,92.940002,47.526215,30.922260,74.489616,58.730000,38.672535,49.339607,8.583014,28.268784,44.606277,72.916969,40.619476,49.540001,60.274311,74.800003,54.490944,59.903454,18.052986,8.376921,3.600000,44.493065,121.730698,79.091164,88.012444,33.544998,15.637500,89.959999,98.379951,33.779327,42.951626,108.905258,25.620626,...,2909300,3532900,2524800,2175200,8835600,8854000,,8052600,,2293700,1006700,1032000,1907400,3914100,5547600,9033800,477900,421100,3480700,6455500,10122300,2269600,4959500,22402500,2219700,9709200,18088500,2009475,2567976,255100,1598200,5208900,3308200,3372100,10601100,1426400,3604081,1356510,451100,5430400
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2024-04-19,132.729996,14.110000,165.000000,166.410004,155.009995,107.279999,92.930000,316.880005,465.019989,183.360001,62.599998,243.309998,216.500000,73.879997,84.199997,16.400000,83.230003,74.230003,173.830002,236.570007,101.500000,112.150002,298.980011,172.960007,125.209999,189.770004,8.970000,146.639999,177.699997,268.929993,417.010010,171.300003,174.630005,246.089996,322.109985,310.190002,86.070000,32.360001,231.639999,110.169998,...,6616000,2971900,5118500,572700,12061400,7826200,5453200.0,2437500,5196800.0,613800,714300,609300,1342000,1690500,4679900,24693500,1220600,466300,10863100,26344200,7029800,2105700,1901600,34330900,1354000,6745900,14160200,1951500,1802400,444700,301400,3955300,1165300,4477700,21566500,2243400,1673700,1301600,351500,6162800
2024-04-22,133.910004,14.530000,165.839996,167.889999,156.610001,107.070000,93.849998,317.420013,466.890015,186.309998,62.500000,246.309998,217.130005,74.379997,84.900002,16.700001,83.519997,74.959999,174.600006,237.520004,100.879997,112.290001,300.820007,175.410004,125.419998,189.460007,9.030000,148.639999,177.860001,271.910004,421.380005,172.300003,177.229996,245.470001,324.809998,311.019989,86.900002,32.470001,234.360001,111.860001,...,4697700,2200000,4578500,767100,8630700,6463700,9231000.0,2130000,1995500.0,591400,733100,564600,1393800,1434300,5099300,51243200,1057700,358300,7640600,16314400,6832700,1648400,1940500,21753400,1420600,5977300,14910000,1510700,1550600,510700,313700,2670700,1695800,3996100,16340700,1061900,1570500,918300,305100,4545000
2024-04-23,139.199997,14.230000,166.899994,169.539993,160.949997,107.589996,93.779999,316.829987,472.899994,189.929993,61.790001,246.839996,217.929993,74.019997,85.559998,17.129999,83.750000,74.930000,176.309998,237.869995,101.760002,114.209999,311.880005,175.279999,126.970001,193.240005,8.980000,152.270004,179.850006,273.540009,407.690002,174.179993,179.539993,251.179993,325.970001,311.859985,87.919998,32.060001,233.710007,114.260002,...,3646700,1980800,6371100,598200,7079900,6438000,8838600.0,1969300,2554700.0,764900,1068500,726700,1070300,2605800,4675300,25972800,1090200,561600,7297000,25151100,7710600,1979300,2379000,21731200,1596500,5142600,17963900,4029900,2112200,427100,367900,2619100,1231800,4432100,13929800,945600,2140500,742800,245800,4315300
2024-04-24,137.490005,13.920000,169.020004,167.800003,162.839996,106.889999,93.190002,313.540009,477.119995,196.500000,61.560001,246.610001,215.000000,74.709999,86.370003,17.370001,84.279999,74.970001,177.050003,236.809998,102.190002,115.269997,313.779999,172.270004,126.540001,196.059998,9.060000,151.740005,178.220001,273.010010,412.859985,173.350006,176.589996,254.770004,328.570007,308.829987,87.000000,32.049999,234.679993,116.309998,...,3724400,2951100,5284700,641000,6550300,8807200,5648500.0,2350800,3370000.0,568000,1008800,686600,806500,1490800,7595300,15946600,3772600,531000,10269700,20324200,7400400,2034400,2692300,19731600,1875900,5667700,18529100,2864000,1846500,757300,480500,3019100,1256700,4614400,12101200,1053000,1909500,978700,322600,3640300


# 1.5.2 Make Data More Readable

In [51]:
df.stack() 

Unnamed: 0_level_0,Price,Adj Close,Close,High,Low,Open,Volume
Date,Ticker,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2016-04-28,A,38.642582,41.180000,41.779999,41.060001,41.250000,1549800.0
2016-04-28,AAL,34.468357,35.970001,37.090000,35.889999,37.040001,9836800.0
2016-04-28,AAPL,21.672606,23.707500,24.469999,23.562500,24.402500,328970800.0
2016-04-28,ABBV,43.371487,61.200001,62.099998,57.529999,58.560001,16235400.0
2016-04-28,ABT,34.886658,40.419998,41.689999,39.990002,40.080002,54527700.0
...,...,...,...,...,...,...,...
2024-04-25,XYL,130.610001,130.610001,131.199997,128.100006,129.619995,963600.0
2024-04-25,YUM,141.559998,141.559998,142.169998,140.389999,141.979996,1692800.0
2024-04-25,ZBH,119.750000,119.750000,121.349998,118.769997,120.709999,1078800.0
2024-04-25,ZBRA,292.529999,292.529999,293.290009,271.630005,274.359985,674700.0


# 2. Calculate Different Technical Indicators per Stock

- RSI
- Bollinger Bands
- MACD
- ATR
- Garman-Klass Volatility
- Dollar Volume

# 3. Aggregate on Monthly Level and Filter per Month the Most Liquid Stocks

# 4. Calculate Monthly Returns for Different Time-Horizons

# 5. Download Fama-French Factors; Calculate Rolling Factor Betas Per Stock

# 6.  Per Month, Create a K-means Clustering Model to group similar assets based on their Features

# 7. Per Month, Select Assets Based on the Cluster and Build a Portfolio Based on That

# 8. Visualize the Portfolio Returns and Compare That with the SP500 