# Unsupervised Learning Trading Model

### Downloading and Loading Packages

In [7]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import statsmodels.api as sm
from statsmodels.regression.rolling import RollingOLS
import pandas_datareader.data as web
import datetime as dt
import yfinance as yf
import pandas_ta
import sklearn
import warnings

In [9]:
warnings.filterwarnings('ignore')

Use the read_html method to retrieve data from Wikipedia. 

**Note:** this data is prone to survivorship bias. To avoid this use data of all companies, including those that were removed from the SP 500.

In [14]:
sp500 = pd.read_html('https://en.wikipedia.org/wiki/List_of_S%26P_500_companies')[0]
sp500['Symbol'] = sp500['Symbol'].str.replace('.','-')

I set Sep. 4, 2024 as the end date and then set the start date to 8 years ago approximately. Now we have a matrix with the adjusted close for all 503 of the stocks on the SP 500

In [33]:
symbols_list = sp500['Symbol'].unique().tolist()
end_date = '2024-09-4'
start_date = pd.to_datetime(end_date)-pd.DateOffset(365*8)

df = yf.download(tickers = symbols_list,
                start = start_date,
                end = end_date).stack()

[*********************100%%**********************]  503 of 503 completed


1 Failed download:
['ZTS']: Exception('%ticker%: No price data found, symbol may be delisted (1d 2016-09-06 00:00:00 -> 2024-09-4)')





In [34]:
df

Unnamed: 0_level_0,Unnamed: 1_level_0,Adj Close,Close,High,Low,Open,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2016-09-06,A,44.091305,46.950001,47.110001,46.720001,46.900002,1460900.0
2016-09-06,AAL,35.625740,36.959999,36.970001,36.360001,36.450001,5501700.0
2016-09-06,AAPL,24.835474,26.924999,27.075001,26.877501,26.975000,107521600.0
2016-09-06,ABBV,45.769611,64.610001,64.900002,64.050003,64.699997,6286300.0
2016-09-06,ABT,36.419403,42.160000,42.320000,41.799999,42.099998,7079100.0
...,...,...,...,...,...,...,...
2024-09-03,XOM,115.470001,115.470001,116.089996,114.040001,115.839996,15721100.0
2024-09-03,XYL,130.710007,130.710007,136.000000,130.309998,134.130005,2334700.0
2024-09-03,YUM,133.479996,133.479996,135.649994,132.630005,134.600006,1866500.0
2024-09-03,ZBH,116.169998,116.169998,116.709999,115.000000,115.330002,1428700.0


In [38]:
df.index.names = ['date','ticker']
df.columns = df.columns.str.lower()

df

Unnamed: 0_level_0,Unnamed: 1_level_0,adj close,close,high,low,open,volume
date,ticker,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2016-09-06,A,44.091305,46.950001,47.110001,46.720001,46.900002,1460900.0
2016-09-06,AAL,35.625740,36.959999,36.970001,36.360001,36.450001,5501700.0
2016-09-06,AAPL,24.835474,26.924999,27.075001,26.877501,26.975000,107521600.0
2016-09-06,ABBV,45.769611,64.610001,64.900002,64.050003,64.699997,6286300.0
2016-09-06,ABT,36.419403,42.160000,42.320000,41.799999,42.099998,7079100.0
...,...,...,...,...,...,...,...
2024-09-03,XOM,115.470001,115.470001,116.089996,114.040001,115.839996,15721100.0
2024-09-03,XYL,130.710007,130.710007,136.000000,130.309998,134.130005,2334700.0
2024-09-03,YUM,133.479996,133.479996,135.649994,132.630005,134.600006,1866500.0
2024-09-03,ZBH,116.169998,116.169998,116.709999,115.000000,115.330002,1428700.0


### Calculate Features and Technical Indicators

1. Garman-Klass Volatility: 

$$ \text{Garman-Klass Volatility} = \frac{(\text{ln}(\text{High})-\text{ln(Low)})^2}{2}-(2\text{ln(2)}-1)*(\text{ln(Adj. Close)}-\text{ln(Open)})^2 $$

In [39]:
df['garman_klass_vol'] = ((np.log(df['high'])-np.log(df['low']))**2)/2 - (2*np.log(2)-1)*(np.log(df['adj close'])-np.log(df['open']))**2

In [40]:
df['rsi'] = df.groupby(level = 1)

Unnamed: 0_level_0,Unnamed: 1_level_0,adj close,close,high,low,open,volume,garman_klass_vol
date,ticker,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2016-09-06,A,44.091305,46.950001,47.110001,46.720001,46.900002,1460900.0,-0.001439
2016-09-06,AAL,35.625740,36.959999,36.970001,36.360001,36.450001,5501700.0,-0.000064
2016-09-06,AAPL,24.835474,26.924999,27.075001,26.877501,26.975000,107521600.0,-0.002611
2016-09-06,ABBV,45.769611,64.610001,64.900002,64.050003,64.699997,6286300.0,-0.046196
2016-09-06,ABT,36.419403,42.160000,42.320000,41.799999,42.099998,7079100.0,-0.008039
...,...,...,...,...,...,...,...,...
2024-09-03,XOM,115.470001,115.470001,116.089996,114.040001,115.839996,15721100.0,0.000155
2024-09-03,XYL,130.710007,130.710007,136.000000,130.309998,134.130005,2334700.0,0.000656
2024-09-03,YUM,133.479996,133.479996,135.649994,132.630005,134.600006,1866500.0,0.000226
2024-09-03,ZBH,116.169998,116.169998,116.709999,115.000000,115.330002,1428700.0,0.000089
