### Get the Required Data

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import datetime
import matplotlib.cm as cm
import statsmodels.formula.api as sm
import statsmodels.tsa.stattools as ts
from sklearn.cluster import KMeans, DBSCAN 
from sklearn.decomposition import PCA
from sklearn.manifold import TSNE
from sklearn import preprocessing
from pykalman import KalmanFilter
from statsmodels.tsa.stattools import coint 
from scipy import stats
import yfinance as yf
from statsmodels.regression.linear_model import OLS

plt.style.use('seaborn')
%matplotlib inline

import itertools
%matplotlib inline
import warnings
warnings.filterwarnings('ignore')

  from pandas import (to_datetime, Int64Index, DatetimeIndex, Period,
  from pandas import (to_datetime, Int64Index, DatetimeIndex, Period,
  plt.style.use('seaborn')


In [2]:
# Russell 2000 and S&P 400: Targeted Equity Indices 
## Targeted Values
russell_df = yf.download('^RUT', start = '2021-02-10', end = '2023-01-18', progress = False)[['Close']].rename(columns = {'Close': 'RUT'})
sp400_df = yf.download('^SP400', start = '2021-02-10', end = '2023-01-18', progress = False)[['Close']].rename(columns = {'Close': 'SP400'})


In [3]:
## Features 1 --- Stock inside Russell_2000
ovv_df = yf.download('OVV', start = '2021-02-10', end = '2023-01-18', progress = False)[['Close']].rename(columns = {'Close': 'OVV'})
ar_df = yf.download('AR', start = '2021-02-10', end = '2023-01-18', progress = False)[['Close']].rename(columns = {'Close': 'AR'})
chk_df = yf.download('CHK', start = '2021-02-10', end = '2023-01-18', progress = False)[['Close']].rename(columns = {'Close': 'CHK'})
swn_df = yf.download('SWN', start = '2021-02-10', end = '2023-01-18', progress = False)[['Close']].rename(columns = {'Close': 'SWN'})
rrc_df = yf.download('RRC', start = '2021-02-10', end = '2023-01-18', progress = False)[['Close']].rename(columns = {'Close': 'RRC'})
bj_df = yf.download('BJ', start = '2021-02-10', end = '2023-01-18', progress = False)[['Close']].rename(columns = {'Close': 'BJ'})
car_df = yf.download('CAR', start = '2021-02-10', end = '2023-01-18', progress = False)[['Close']].rename(columns = {'Close': 'CAR'})
pdce_df = yf.download('PDCE', start = '2021-02-10', end = '2023-01-18', progress = False)[['Close']].rename(columns = {'Close': 'PDCE'})
lscc_df = yf.download('LSCC', start = '2021-02-10', end = '2023-01-18', progress = False)[['Close']].rename(columns = {'Close': 'LSCC'})
wsc_df = yf.download('WSC', start = '2021-02-10', end = '2023-01-18', progress = False)[['Close']].rename(columns = {'Close': 'WSC'})
pfgc_df = yf.download('PFGC', start = '2021-02-10', end = '2023-01-18', progress = False)[['Close']].rename(columns = {'Close': 'PFGC'})
ttek_df = yf.download('TTEK', start = '2021-02-10', end = '2023-01-18', progress = False)[['Close']].rename(columns = {'Close': 'TTEK'})
hr_df = yf.download('HR', start = '2021-02-10', end = '2023-01-18', progress = False)[["Close"]].rename(columns = {'Close': 'HR'})


In [4]:
## Features 2 --- ETFs tracking Russell_2000
iwn_df = yf.download('IWN', start = '2021-02-10', end = '2023-01-18', progress = False)[['Close']].rename(columns = {'Close': 'IWN'})
vtwo_df = yf.download('VTWO', start = '2021-02-10', end = '2023-01-18', progress = False)[['Close']].rename(columns = {'Close': 'VTWO'})
urty_df = yf.download('URTY', start = '2021-02-10', end = '2023-01-18', progress = False)[['Close']].rename(columns = {'Close': 'URTY'})
vtwv_df = yf.download('VTWV', start = '2021-02-10', end = '2023-01-18', progress = False)[['Close']].rename(columns = {'Close': 'VTWV'})
tna_df = yf.download('TNA', start = '2021-02-10', end = '2023-01-18', progress = False)[['Close']].rename(columns = {'Close': 'TNA'})
tza_df = yf.download('TZA', start = '2021-02-10', end = '2023-01-18', progress = False)[['Close']].rename(columns = {'Close': 'TZA'})
rwm_df = yf.download('RWM', start = '2021-02-10', end = '2023-01-18', progress = False)[['Close']].rename(columns = {'Close': 'RWM'})
srty_df = yf.download('SRTY', start = '2021-02-10', end = '2023-01-18', progress = False)[['Close']].rename(columns = {'Close': 'SRTY'})
twm_df = yf.download('TWM', start = '2021-02-10', end = '2023-01-18', progress = False)[['Close']].rename(columns = {'Close': 'TWM'})


In [5]:
## Features 3 --- Stocks inside S&P 400
aeo_df = yf.download('AEO', start = '1900-01-01', end = '2023-01-18', progress = False)[['Close']].rename(columns = {'Close': 'AEO'})
dpz_df = yf.download('DPZ', start = '1900-01-01', end = '2023-01-18', progress = False)[['Close']].rename(columns = {'Close': 'DPZ'})
fnf_df = yf.download('FNF', start = '1900-01-01', end = '2023-01-18', progress = False)[['Close']].rename(columns = {'Close': 'FNF'})
fl_df = yf.download('FL', start = '1900-01-01', end = '2023-01-18', progress = False)[['Close']].rename(columns = {'Close': 'FL'})
gt_df = yf.download('GT', start = '1900-01-01', end = '2023-01-18', progress = False)[['Close']].rename(columns = {'Close': 'GT'})
ibkr_df = yf.download('IBKR', start = '1900-01-01', end = '2023-01-18', progress = False)[['Close']].rename(columns = {'Close': 'IBKR'})
mat_df = yf.download('MAT', start = '1900-01-01', end = '2023-01-18', progress = False)[['Close']].rename(columns = {'Close': 'MAT'})
nyt_df = yf.download('NYT', start = '1900-01-01', end = '2023-01-18', progress = False)[['Close']].rename(columns = {'Close': 'NYT'})
sedg_df = yf.download('SEDG', start = '2020-02-10', end = '2023-01-18', progress = False)[['Close']].rename(columns = {'Close': 'SEDG'})
stld_df = yf.download('STLD', start = '1900-01-01', end = '2023-01-18', progress = False)[['Close']].rename(columns = {'Close': 'STLD'})
trip_df = yf.download('TRIP', start = '1900-01-01', end = '2023-01-18', progress = False)[['Close']].rename(columns = {'Close': 'TRIP'})
wh_df = yf.download('WH', start = '2020-02-10', end = '2023-01-18', progress = False)[['Close']].rename(columns = {'Close': 'WH'})
yelp_df = yf.download('YELP', start = '2020-02-10', end = '2023-01-18', progress = False)[['Close']].rename(columns = {'Close': 'YELP'})

In [6]:
## Features 4 --- ETFs tracking SP 400
xmmo_df = yf.download('XMMO', start = '1900-01-01', end = '2023-01-18', progress = False)[['Close']].rename(columns = {'Close': 'XMMO'})
xmhq_df = yf.download('XMHQ', start = '1900-01-01', end = '2023-01-18', progress = False)[['Close']].rename(columns = {'Close': 'XMHQ'})
ijh_df = yf.download('IJH', start = '1900-01-01', end = '2023-01-18', progress = False)[['Close']].rename(columns = {'Close': 'IJH'})
mdy_df = yf.download('MDY', start = '1900-01-01', end = '2023-01-18', progress = False)[['Close']].rename(columns = {'Close': 'MDY'})
ivoo_df = yf.download('IVOO', start = '1900-01-01', end = '2023-01-18', progress = False)[['Close']].rename(columns = {'Close': 'IVOO'})
spmd_df = yf.download('SPMD', start = '1900-01-01', end = '2023-01-18', progress = False)[['Close']].rename(columns = {'Close': 'SPMD'})


In [7]:
## Feaures 5 --- Market Index
spy_df = yf.download('SPY', start = '2021-02-10', end = '2023-01-18', progress = False)[['Close']].rename(columns = {'Close': 'SPY'})

In [8]:
# Create the required dataset
data = pd.concat([russell_df, sp400_df, ovv_df, ar_df, chk_df, swn_df, rrc_df, bj_df, car_df,
                 pdce_df, lscc_df, wsc_df, pfgc_df, ttek_df, hr_df, iwn_df, vtwo_df, urty_df,
                 vtwv_df, tna_df, tza_df, rwm_df, srty_df,
                 twm_df, aeo_df, dpz_df, fnf_df, fl_df, gt_df, ibkr_df, mat_df,
                 nyt_df, sedg_df, stld_df, trip_df, wh_df, yelp_df, 
                xmmo_df, xmhq_df, ijh_df, mdy_df, ivoo_df, spmd_df, spy_df], axis = 1)
data = data.dropna()
data

Unnamed: 0_level_0,RUT,SP400,OVV,AR,CHK,SWN,RRC,BJ,CAR,PDCE,...,TRIP,WH,YELP,XMMO,XMHQ,IJH,MDY,IVOO,SPMD,SPY
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2021-02-10,2282.439941,2519.239990,20.090000,8.400000,44.990002,4.38,10.190000,43.689999,43.270000,28.090000,...,36.590000,62.299999,36.790001,86.129997,73.769997,251.520004,459.359985,170.050003,44.189999,390.079987
2021-02-11,2285.320068,2535.250000,20.549999,8.110000,42.799999,4.17,9.570000,42.860001,42.750000,27.820000,...,36.980000,60.919998,37.910000,87.500000,74.370003,253.089996,462.190002,171.119995,44.459999,390.709991
2021-02-12,2289.360107,2544.550049,21.690001,9.000000,42.509998,4.34,10.160000,43.000000,44.709999,28.600000,...,37.380001,60.200001,36.970001,88.029999,74.639999,253.850006,463.579987,171.740005,44.630001,392.640015
2021-02-16,2272.889893,2539.229980,22.700001,9.490000,43.549999,4.51,10.940000,43.459999,45.700001,29.250000,...,37.290001,60.700001,36.160000,87.589996,74.620003,253.389999,462.790009,171.350006,44.560001,392.299988
2021-02-17,2256.110107,2525.610107,22.930000,9.780000,44.259998,4.60,11.470000,43.810001,42.820000,29.190001,...,38.049999,59.849998,35.880001,87.330002,74.269997,252.130005,460.279999,170.520004,44.330002,392.390015
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-01-10,1822.650024,2514.520020,49.000000,29.000000,88.370003,5.68,24.420000,67.050003,171.669998,62.889999,...,20.580000,70.389999,28.469999,76.239998,71.489998,250.440002,458.440002,169.479996,44.020000,390.579987
2023-01-11,1844.050049,2545.360107,49.020000,28.760000,89.570000,5.74,24.629999,69.709999,182.000000,63.400002,...,20.629999,71.839996,28.680000,76.820000,72.320000,253.500000,464.119995,171.550003,44.570000,395.519989
2023-01-12,1876.060059,2568.370117,50.869999,29.950001,91.489998,6.00,25.610001,69.349998,189.339996,66.150002,...,21.030001,72.540001,28.860001,77.690002,72.849998,255.880005,468.359985,173.000000,44.970001,396.959991
2023-01-13,1887.030029,2580.909912,50.279999,29.740000,91.209999,5.97,25.180000,69.050003,190.899994,66.610001,...,21.350000,73.629997,29.049999,77.980003,73.529999,257.170013,470.649994,173.940002,45.189999,398.500000


In [9]:
# Write to a csv file
data.to_csv('data.csv')

data.columns