In [39]:
import yfinance as yf
import pandas as pd
from sklearn.preprocessing import MinMaxScaler

In [2]:
tickers = pd.read_csv("https://www.cboe.com/us/equities/market_statistics/listed_symbols/csv", usecols=["Name"])

In [3]:
allTickerDF = pd.DataFrame()

In [4]:
for i in range(len(tickers["Name"])):
    currentTickerDF = yf.Ticker(tickers["Name"].iloc[i]).history(period="max")
    currentTickerDF['Name'] = tickers["Name"].iloc[i]
    allTickerDF = pd.concat([allTickerDF, currentTickerDF])

In [5]:
allTickerDF.sort_index(inplace=True)

In [6]:
allTickerDF.tail()

Unnamed: 0_level_0,Open,High,Low,Close,Volume,Dividends,Stock Splits,Name
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2021-09-13,59.970001,60.012199,59.450001,60.012199,2526.0,0.0,0.0,WUGI
2021-09-13,28.48,29.0,28.67,28.75,236704.0,0.0,0.0,IGE
2021-09-13,36.93,36.93,36.73,36.73,604.0,0.0,0.0,FJUN
2021-09-13,29.530001,29.530001,29.51,29.52,1994.0,0.0,0.0,XDQQ
2021-09-13,21.189301,21.189301,21.150101,21.150101,259.0,0.0,0.0,YMAR


In [7]:
allTickerDF = allTickerDF.reset_index().pivot(index="Date", values=["Open", "High", "Low", "Close", "Volume", "Dividends", "Stock Splits"], columns='Name')

In [8]:
allTickerDF

Unnamed: 0_level_0,Open,Open,Open,Open,Open,Open,Open,Open,Open,Open,...,Stock Splits,Stock Splits,Stock Splits,Stock Splits,Stock Splits,Stock Splits,Stock Splits,Stock Splits,Stock Splits,Stock Splits
Name,ACIO,ACSI,ACWV,ADFI,ADME,AESR,AFIF,AGT,ALFA,ALTS,...,XSHD,XSHQ,XTAP,XTJL,XVV,YDEC,YJUN,YMAR,YPS,ZECP
Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
2000-05-26,,,,,,,,,,,...,,,,,,,,,,
2000-05-30,,,,,,,,,,,...,,,,,,,,,,
2000-05-31,,,,,,,,,,,...,,,,,,,,,,
2000-06-01,,,,,,,,,,,...,,,,,,,,,,
2000-06-02,,,,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2021-09-07,31.100000,51.404999,107.970001,9.730,41.500000,14.360,9.69,31.170000,89.489998,39.290001,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2021-09-08,30.923000,51.284000,107.040001,9.740,41.292999,14.280,9.70,31.500000,88.110001,39.119999,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2021-09-09,31.090000,51.306999,107.699997,9.775,41.389999,14.190,9.71,31.110001,88.519997,39.150002,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2021-09-10,31.066999,51.119999,107.260002,9.770,41.369999,14.214,9.70,31.100000,87.669998,39.060001,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


We are trying to predict the closing price one day ahead

In [33]:
def create_train_test(data, startDate, numberOfDays=3):
    data = data.loc[startDate:].iloc[:numberOfDays+1].dropna(axis=1)
    train = data.iloc[:numberOfDays]
    test = data.iloc[numberOfDays].loc["Close"].to_list()
    return train, test

In [35]:
train, test = create_train_test(data=allTickerDF, startDate='2021-01-04')

In [36]:
train

Unnamed: 0_level_0,Open,Open,Open,Open,Open,Open,Open,Open,Open,Open,...,Stock Splits,Stock Splits,Stock Splits,Stock Splits,Stock Splits,Stock Splits,Stock Splits,Stock Splits,Stock Splits,Stock Splits
Name,ACIO,ACSI,ACWV,ADME,AESR,AFIF,AGT,ALFA,ALTS,AMER,...,WLDR,WUGI,XJH,XJR,XMPT,XSHD,XSHQ,XVV,YDEC,YPS
Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
2021-01-04,27.716454,45.0,96.628425,35.929054,11.82,9.70065,26.346062,76.419998,36.241614,25.879416,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2021-01-05,27.430388,42.639999,95.637568,35.879124,11.78,9.680834,25.64723,76.389999,36.320552,26.087561,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2021-01-06,27.549995,43.080002,95.409677,35.809227,11.74,9.631289,25.747063,74.93,36.57709,26.335356,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [37]:
test

[27.85898780822754,
 43.79399871826172,
 96.46989440917969,
 36.208656311035156,
 12.01099967956543,
 9.67092514038086,
 25.744068145751953,
 79.44999694824219,
 36.75864791870117,
 27.1282901763916,
 103.37999725341797,
 84.11000061035156,
 12.285094261169434,
 28.10700035095215,
 25.57900047302246,
 25.95199966430664,
 29.770000457763672,
 29.96500015258789,
 55.146827697753906,
 55.413368225097656,
 53.46980285644531,
 56.070919036865234,
 56.400001525878906,
 76.1429214477539,
 51.839237213134766,
 69.42161560058594,
 30.733999252319336,
 27.290000915527344,
 32.96500015258789,
 30.200000762939453,
 31.27199935913086,
 28.040315628051758,
 29.829999923706055,
 29.799999237060547,
 30.034000396728516,
 30.280000686645508,
 30.94812774658203,
 29.420000076293945,
 33.37300109863281,
 21.65999984741211,
 52.57899856567383,
 34.098419189453125,
 96.79000091552734,
 26.305999755859375,
 19.059438705444336,
 51.79339599609375,
 30.602386474609375,
 45.824398040771484,
 34.82173156738281,

In [42]:
scaler = MinMaxScaler()
scaler.fit(train)

train_scaled = scaler.transform(train)

In [43]:
train_scaled

array([[1.        , 1.        , 1.        , ..., 0.        , 0.        ,
        0.        ],
       [0.        , 0.        , 0.18698829, ..., 0.        , 0.        ,
        0.        ],
       [0.41810997, 0.18644166, 0.        , ..., 0.        , 0.        ,
        0.        ]])

In [44]:
train

Unnamed: 0_level_0,Open,Open,Open,Open,Open,Open,Open,Open,Open,Open,...,Stock Splits,Stock Splits,Stock Splits,Stock Splits,Stock Splits,Stock Splits,Stock Splits,Stock Splits,Stock Splits,Stock Splits
Name,ACIO,ACSI,ACWV,ADME,AESR,AFIF,AGT,ALFA,ALTS,AMER,...,WLDR,WUGI,XJH,XJR,XMPT,XSHD,XSHQ,XVV,YDEC,YPS
Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
2021-01-04,27.716454,45.0,96.628425,35.929054,11.82,9.70065,26.346062,76.419998,36.241614,25.879416,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2021-01-05,27.430388,42.639999,95.637568,35.879124,11.78,9.680834,25.64723,76.389999,36.320552,26.087561,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2021-01-06,27.549995,43.080002,95.409677,35.809227,11.74,9.631289,25.747063,74.93,36.57709,26.335356,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
