In [2]:
#!pip install yfinance

In [3]:
import yfinance as yf
import pandas as pd
import numpy as np

In [4]:
googl = yf.Ticker('GOOGL')
googl_df = googl.history(period='max')
googl_df.head()

Unnamed: 0_level_0,Open,High,Low,Close,Volume,Dividends,Stock Splits
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2004-08-19,50.050049,52.082081,48.028027,50.220219,44659096,0,0.0
2004-08-20,50.555557,54.594597,50.300301,54.209209,22834343,0,0.0
2004-08-23,55.430431,56.796799,54.579578,54.754753,18256126,0,0.0
2004-08-24,55.675674,55.855858,51.836838,52.487488,15247337,0,0.0
2004-08-25,52.532532,54.054054,51.991993,53.053055,9188602,0,0.0


In [5]:
def rolling_aves(stock_df):
    
    stock_df['Mt'] = (stock_df['High'] + stock_df['Low'] + stock_df['Close'])/3
    stock_df['Price Change'] = (stock_df['Close'] - stock_df['Close'].shift(1, fill_value=0)) / stock_df['Close'].shift(1, fill_value=1)

    # Generate columns for 5 day means using pandas rolling  
    stock_df['5 Day Open Mean'] = stock_df['Open'].rolling(5, min_periods=1).mean()
    stock_df['5 Day High Mean'] = stock_df['High'].rolling(5,min_periods=1).mean()
    stock_df['5 Day Low Mean'] = stock_df['Low'].rolling(5, min_periods=1).mean()
    stock_df['5 Day Close Mean'] = stock_df['Close'].rolling(5, min_periods=1).mean()
    stock_df['5 Day Volume Mean'] = stock_df['Volume'].rolling(5,min_periods=1).mean()
    
    # Produce columns for 5 day var using rolling 
    stock_df['5 Day Open Var'] = stock_df['Open'].rolling(5, min_periods=1).var(ddof=0)
     # could also try "...var(ddof=0).interpolate(limit_direction ='backward')" if you dont want to pad with zeroes
    stock_df['5 Day High Var'] = stock_df['High'].rolling(5, min_periods=1).var(ddof=0)
    stock_df['5 Day Low Var'] = stock_df['Low'].rolling(5, min_periods=1).var(ddof=0)
    stock_df['5 Day Close Var'] = stock_df['Close'].rolling(5, min_periods=1).var(ddof=0)
    stock_df['5 Day Volume Var'] = stock_df['Volume'].rolling(5, min_periods=1).var(ddof=0)
    
    stock_df['5 Day High'] = stock_df['High'].rolling(5, min_periods=1).max()
    stock_df['5 Day Low'] = stock_df['Low'].rolling(5, min_periods=1).min()
    
    
    # 5 Day SMt & Dt --->>> This is used for advanced analytics
    stock_df['5 Day SMt'] = stock_df['Mt'].rolling(5, min_periods=1).mean()
    stock_df['5 Day Dt'] = stock_df['Mt'].rolling(5, min_periods=1).std(ddof=0)
    
    
    # Create 10 day means
    stock_df['10 Day Open Mean'] = stock_df['Open'].rolling(10, min_periods=1).mean()
    stock_df['10 Day High Mean'] = stock_df['High'].rolling(10, min_periods=1).mean()
    stock_df['10 Day Low Mean'] = stock_df['Low'].rolling(10, min_periods=1).mean()
    stock_df['10 Day Close Mean'] = stock_df['Close'].rolling(10, min_periods=1).mean()
    stock_df['10 Day Volume Mean'] = stock_df['Volume'].rolling(10, min_periods=1).mean()
    
    
    # produce 10 day var columns
    stock_df['10 Day Open Var'] = stock_df['Open'].rolling(10, min_periods=1).var(ddof=0)
    stock_df['10 Day High Var'] = stock_df['High'].rolling(10, min_periods=1).var(ddof=0)
    stock_df['10 Day Low Var'] = stock_df['Low'].rolling(10, min_periods=1).var(ddof=0)
    stock_df['10 Day Close Var'] = stock_df['Close'].rolling(10, min_periods=1).var(ddof=0)
    stock_df['10 Day Volume Var'] = stock_df['Volume'].rolling(10, min_periods=1).var(ddof=0)
    
    
    stock_df['10 Day High'] = stock_df['High'].rolling(10, min_periods=1).max()
    stock_df['10 Day Low'] = stock_df['Low'].rolling(10, min_periods=1).min()
    
    # 10 Day SMt & Dt --->>> This is used for advanced analytics
    stock_df['10 Day SMt'] = stock_df['Mt'].rolling(10, min_periods=1).mean()
    stock_df['10 Day Dt'] = stock_df['Mt'].rolling(10, min_periods=1).std(ddof=0)
    
    # Produce 20 day mean columns
    stock_df['20 Day Open Mean'] = stock_df['Open'].rolling(20, min_periods=1).mean()
    stock_df['20 Day High Mean'] = stock_df['High'].rolling(20, min_periods=1).mean()
    stock_df['20 Day Low Mean'] = stock_df['Low'].rolling(20, min_periods=1).mean()
    stock_df['20 Day Close Mean'] = stock_df['Close'].rolling(20, min_periods=1).mean()
    stock_df['20 Day Volume Mean'] = stock_df['Volume'].rolling(20, min_periods=1).mean()
    
    
    # Produce columns for 20 day var
    stock_df['20 Day Open Var'] = stock_df['Open'].rolling(20, min_periods=1).var(ddof=0)
    stock_df['20 Day High Var'] = stock_df['High'].rolling(20, min_periods=1).var(ddof=0)
    stock_df['20 Day Low Var'] = stock_df['Low'].rolling(20, min_periods=1).var(ddof=0)
    stock_df['20 Day Close Var'] = stock_df['Close'].rolling(20, min_periods=1).var(ddof=0)
    stock_df['20 Day Volume Var'] = stock_df['Volume'].rolling(20, min_periods=1).var(ddof=0)
    
    stock_df['20 Day High'] = stock_df['High'].rolling(20, min_periods=1).max()
    stock_df['20 Day Low'] = stock_df['Low'].rolling(20, min_periods=1).min()
    
    # 10 Day SMt & Dt --->>> This is used for advanced analytics
    stock_df['20 Day SMt'] = stock_df['Mt'].rolling(20, min_periods=1).mean()
    stock_df['20 Day Dt'] = stock_df['Mt'].rolling(20, min_periods=1).std(ddof=0)
    
    ###############################################################
    ###############  Advanced Analytics ###########################
    ###############################################################
    
    # Weighted Moving Averages
    stock_df['5 Day Weighted Close Ave'] = np.mean([stock_df['Close'].rolling(1, min_periods=1).mean(),\
                                                    stock_df['Close'].rolling(2, min_periods=1).mean(),\
                                                    stock_df['Close'].rolling(3, min_periods=1).mean(),\
                                                    stock_df['Close'].rolling(4, min_periods=1).mean(),\
                                                    stock_df['Close'].rolling(5, min_periods=1).mean()])
    
    stock_df['10 Day Weighted Close Ave'] = np.mean([stock_df['Close'].rolling(1, min_periods=1).mean(),\
                                                     stock_df['Close'].rolling(2, min_periods=1).mean(),\
                                                     stock_df['Close'].rolling(3, min_periods=1).mean(),\
                                                     stock_df['Close'].rolling(4, min_periods=1).mean(),\
                                                     stock_df['Close'].rolling(5, min_periods=1).mean(),\
                                                     stock_df['Close'].rolling(6, min_periods=1).mean(),\
                                                     stock_df['Close'].rolling(7, min_periods=1).mean(),\
                                                     stock_df['Close'].rolling(8, min_periods=1).mean(),\
                                                     stock_df['Close'].rolling(9, min_periods=1).mean(),\
                                                     stock_df['Close'].rolling(10, min_periods=1).mean()])
    
    stock_df['20 Day Weighted Close Ave'] = np.mean([stock_df['Close'].rolling(1, min_periods=1).mean(),\
                                                     stock_df['Close'].rolling(2, min_periods=1).mean(),\
                                                     stock_df['Close'].rolling(3, min_periods=1).mean(),\
                                                     stock_df['Close'].rolling(4, min_periods=1).mean(),\
                                                     stock_df['Close'].rolling(5, min_periods=1).mean(),\
                                                     stock_df['Close'].rolling(6, min_periods=1).mean(),\
                                                     stock_df['Close'].rolling(7, min_periods=1).mean(),\
                                                     stock_df['Close'].rolling(8, min_periods=1).mean(),\
                                                     stock_df['Close'].rolling(9, min_periods=1).mean(),\
                                                     stock_df['Close'].rolling(10, min_periods=1).mean(),\
                                                     stock_df['Close'].rolling(11, min_periods=1).mean(),\
                                                     stock_df['Close'].rolling(12, min_periods=1).mean(),\
                                                     stock_df['Close'].rolling(13, min_periods=1).mean(),\
                                                     stock_df['Close'].rolling(14, min_periods=1).mean(),\
                                                     stock_df['Close'].rolling(15, min_periods=1).mean(),\
                                                     stock_df['Close'].rolling(16, min_periods=1).mean(),\
                                                     stock_df['Close'].rolling(17, min_periods=1).mean(),\
                                                     stock_df['Close'].rolling(18, min_periods=1).mean(),\
                                                     stock_df['Close'].rolling(19, min_periods=1).mean(),\
                                                     stock_df['Close'].rolling(20, min_periods=1).mean()])
    
                                                     
    
    # Momentum
    stock_df['5 Day Momentum'] = stock_df['Close'] - stock_df['Close'].shift(5, fill_value=0)
    stock_df['10 Day Momentum'] = stock_df['Close'] - stock_df['Close'].shift(10, fill_value=0)
    stock_df['20 Day Momentum'] = stock_df['Close'] - stock_df['Close'].shift(20, fill_value=0)
    
    # Stochasitc K%
    stock_df['5 Day Stochastic K'] = 100*(stock_df['Close'] - stock_df['5 Day Low'])\
    / (stock_df['5 Day High'] - stock_df['5 Day Low'])
    stock_df['10 Day Stochastic K'] = 100*(stock_df['Close'] - stock_df['10 Day Low'])\
    / (stock_df['10 Day High'] - stock_df['10 Day Low'])
    stock_df['20 Day Stochastic K'] = 100*(stock_df['Close'] - stock_df['20 Day Low'])\
    / (stock_df['20 Day High'] - stock_df['20 Day Low'])
    
    # Stochastic D%
    stock_df['5 Day Stochastic K'] = stock_df['5 Day Stochastic K'].rolling(5, min_periods=1).mean()
    stock_df['10 Day Stochastic K'] = stock_df['10 Day Stochastic K'].rolling(5, min_periods=1).mean()
    stock_df['20 Day Stochastic K'] = stock_df['20 Day Stochastic K'].rolling(5, min_periods=1).mean()
    
    
    # Relative Strength Index (RSI)
    stock_df['5 Day RSI'] = 100 - 100/(1 + stock_df['Price Change'].rolling(5, min_periods=1).mean())
    stock_df['10 Day RSI'] = 100 - 100/(1 + stock_df['Price Change'].rolling(10, min_periods=1).mean())
    stock_df['20 Day RSI'] = 100 - 100/(1 + stock_df['Price Change'].rolling(20, min_periods=1).mean())
    
    # Signal
    
    # Larry Williams
    stock_df['Larry Williams R 5 Day'] = 100*(stock_df['5 Day High'] - stock_df['Close'])\
    / (stock_df['5 Day High'] - stock_df['5 Day Low'])
    stock_df['Larry Williams R 10 Day'] = 100*(stock_df['10 Day High'] - stock_df['Close'])\
    / (stock_df['10 Day High'] - stock_df['10 Day Low'])
    stock_df['Larry Williams R 20 Day'] = 100*(stock_df['20 Day High'] - stock_df['Close'])\
    / (stock_df['20 Day High'] - stock_df['20 Day Low'])
    
    # Accumulation/Distribution Oscillator
    stock_df['AD Oscillator'] = (stock_df['High'] - stock_df['Close']) / (stock_df['High'] - stock_df['Low'])
    
    # CCI (Commodity Channel Index)
    stock_df['5 Day CCI'] = (stock_df['Mt']-stock_df['5 Day SMt']) / (0.015*stock_df['5 Day Dt'])
    stock_df['10 Day CCI'] = (stock_df['Mt']-stock_df['10 Day SMt']) / (0.015*stock_df['10 Day Dt'])
    stock_df['20 Day CCI'] = (stock_df['Mt']-stock_df['20 Day SMt']) / (0.015*stock_df['20 Day Dt'])
    
    stock_df['5 Day CCI'].fillna(0, inplace=True)
    stock_df['10 Day CCI'].fillna(0, inplace=True)
    stock_df['20 Day CCI'].fillna(0, inplace=True)
    
    return stock_df

In [6]:
googl_df = rolling_aves(googl_df)

In [7]:
def date_time_prep(stock_df):
  # Makes columns for day/month/year from datetime index
  stock_df['Day'] = stock_df.index.day
  stock_df['Month'] = stock_df.index.month
  stock_df['Year'] = stock_df.index.year

  # Calculates the number of days since IPO
  stock_df['Days From IPO'] = (stock_df.index - stock_df.index[0]).days

  return stock_df

In [8]:
googl_df = date_time_prep(googl_df)
googl_df

Unnamed: 0_level_0,Open,High,Low,Close,Volume,Dividends,Stock Splits,Mt,Price Change,5 Day Open Mean,...,Larry Williams R 10 Day,Larry Williams R 20 Day,AD Oscillator,5 Day CCI,10 Day CCI,20 Day CCI,Day,Month,Year,Days From IPO
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2004-08-19,50.050049,52.082081,48.028027,50.220219,44659096,0,0.0,50.110109,50.220219,50.050049,...,45.925932,45.925932,0.459259,0.000000,0.000000,0.000000,19,8,2004,0
2004-08-20,50.555557,54.594597,50.300301,54.209209,22834343,0,0.0,53.034702,0.079430,50.302803,...,5.868930,5.868930,0.089744,66.666667,66.666667,66.666667,20,8,2004,1
2004-08-23,55.430431,56.796799,54.579578,54.754753,18256126,0,0.0,55.377043,0.010064,52.012012,...,23.287703,23.287703,0.920994,78.481214,78.481214,78.481214,23,8,2004,4
2004-08-24,55.675674,55.855858,51.836838,52.487488,15247337,0,0.0,53.393394,-0.041408,52.927928,...,49.143835,49.143835,0.838107,14.691954,14.691954,14.691954,24,8,2004,5
2004-08-25,52.532532,54.054054,51.991993,53.053055,9188602,0,0.0,53.033034,0.010775,52.848849,...,42.694049,42.694049,0.485436,1.718508,1.718508,1.718508,25,8,2004,6
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2021-03-08,2084.060059,2113.739990,2005.189941,2007.500000,1842700,0,0.0,2042.143311,-0.042712,2055.373999,...,86.017312,88.851572,0.978719,-30.866396,-24.047454,-68.143428,8,3,2021,6045
2021-03-09,2049.750000,2063.919922,2035.599976,2040.359985,1693300,0,0.0,2046.626628,0.016369,2052.355981,...,61.282788,67.639253,0.831920,1.778410,-9.430004,-51.837991,9,3,2021,6046
2021-03-10,2061.399902,2061.399902,2019.329956,2036.189941,1353100,0,0.0,2038.973267,-0.002044,2054.245972,...,64.765371,70.331168,0.599239,-43.874503,-29.326273,-64.182892,10,3,2021,6047
2021-03-11,2058.219971,2111.270020,2056.449951,2100.540039,1384200,0,0.0,2089.420003,0.031603,2062.915967,...,11.023845,28.790833,0.195731,99.771468,121.683216,55.063973,11,3,2021,6048


In [9]:
def future_close_setup(stock_df, days=1):
  #This function adds a second closing column and moves it up the number of rows
  # needed to predict that many days ahead
  stock_df['Close in ' + str(days) + ' Days'] = stock_df['Close']
  stock_df['Close in ' + str(days) + ' Days'] = stock_df['Close in ' + str(days) + ' Days'].shift(-days)
  return stock_df

In [10]:
googl_df = future_close_setup(googl_df, 5)
googl_df

Unnamed: 0_level_0,Open,High,Low,Close,Volume,Dividends,Stock Splits,Mt,Price Change,5 Day Open Mean,...,Larry Williams R 20 Day,AD Oscillator,5 Day CCI,10 Day CCI,20 Day CCI,Day,Month,Year,Days From IPO,Close in 5 Days
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2004-08-19,50.050049,52.082081,48.028027,50.220219,44659096,0,0.0,50.110109,50.220219,50.050049,...,45.925932,0.459259,0.000000,0.000000,0.000000,19,8,2004,0,54.009010
2004-08-20,50.555557,54.594597,50.300301,54.209209,22834343,0,0.0,53.034702,0.079430,50.302803,...,5.868930,0.089744,66.666667,66.666667,66.666667,20,8,2004,1,53.128128
2004-08-23,55.430431,56.796799,54.579578,54.754753,18256126,0,0.0,55.377043,0.010064,52.012012,...,23.287703,0.920994,78.481214,78.481214,78.481214,23,8,2004,4,51.056057
2004-08-24,55.675674,55.855858,51.836838,52.487488,15247337,0,0.0,53.393394,-0.041408,52.927928,...,49.143835,0.838107,14.691954,14.691954,14.691954,24,8,2004,5,51.236237
2004-08-25,52.532532,54.054054,51.991993,53.053055,9188602,0,0.0,53.033034,0.010775,52.848849,...,42.694049,0.485436,1.718508,1.718508,1.718508,25,8,2004,6,50.175175
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2021-03-08,2084.060059,2113.739990,2005.189941,2007.500000,1842700,0,0.0,2042.143311,-0.042712,2055.373999,...,88.851572,0.978719,-30.866396,-24.047454,-68.143428,8,3,2021,6045,
2021-03-09,2049.750000,2063.919922,2035.599976,2040.359985,1693300,0,0.0,2046.626628,0.016369,2052.355981,...,67.639253,0.831920,1.778410,-9.430004,-51.837991,9,3,2021,6046,
2021-03-10,2061.399902,2061.399902,2019.329956,2036.189941,1353100,0,0.0,2038.973267,-0.002044,2054.245972,...,70.331168,0.599239,-43.874503,-29.326273,-64.182892,10,3,2021,6047,
2021-03-11,2058.219971,2111.270020,2056.449951,2100.540039,1384200,0,0.0,2089.420003,0.031603,2062.915967,...,28.790833,0.195731,99.771468,121.683216,55.063973,11,3,2021,6048,


In [20]:
def lstm_prep(stock_df, lookback=5):
    base_df =  stock_df.copy()
    col_names = stock_df.columns
    for i in range(1, lookback+1):
        curr_cols = []
        for col in col_names:
            curr_cols.append('- ' + str(i) + ' Days ' + col)
        curr_df = base_df.shift(i, fill_value=0)
        curr_df.columns = curr_cols
        stock_df = pd.concat([stock_df, curr_df], axis=1)
    
    return stock_df
    
lstm_prep(googl_df)

Unnamed: 0_level_0,Open,High,Low,Close,Volume,Dividends,Stock Splits,Mt,Price Change,5 Day Open Mean,...,- 5 Days Larry Williams R 20 Day,- 5 Days AD Oscillator,- 5 Days 5 Day CCI,- 5 Days 10 Day CCI,- 5 Days 20 Day CCI,- 5 Days Day,- 5 Days Month,- 5 Days Year,- 5 Days Days From IPO,- 5 Days Close in 5 Days
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2004-08-19,50.050049,52.082081,48.028027,50.220219,44659096,0,0.0,50.110109,50.220219,50.050049,...,0.000000,0.000000,0.000000,0.000000,0.000000,0,0,0,0,0.000000
2004-08-20,50.555557,54.594597,50.300301,54.209209,22834343,0,0.0,53.034702,0.079430,50.302803,...,0.000000,0.000000,0.000000,0.000000,0.000000,0,0,0,0,0.000000
2004-08-23,55.430431,56.796799,54.579578,54.754753,18256126,0,0.0,55.377043,0.010064,52.012012,...,0.000000,0.000000,0.000000,0.000000,0.000000,0,0,0,0,0.000000
2004-08-24,55.675674,55.855858,51.836838,52.487488,15247337,0,0.0,53.393394,-0.041408,52.927928,...,0.000000,0.000000,0.000000,0.000000,0.000000,0,0,0,0,0.000000
2004-08-25,52.532532,54.054054,51.991993,53.053055,9188602,0,0.0,53.033034,0.010775,52.848849,...,0.000000,0.000000,0.000000,0.000000,0.000000,0,0,0,0,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2021-03-08,2084.060059,2113.739990,2005.189941,2007.500000,1842700,0,0.0,2042.143311,-0.042712,2055.373999,...,25.113957,0.157141,62.791782,-25.162432,3.604253,1,3,2021,6038,2007.500000
2021-03-09,2049.750000,2063.919922,2035.599976,2040.359985,1693300,0,0.0,2046.626628,0.016369,2052.355981,...,33.781441,0.794850,73.954489,10.469599,9.178106,2,3,2021,6039,2040.359985
2021-03-10,2061.399902,2061.399902,2019.329956,2036.189941,1353100,0,0.0,2038.973267,-0.002044,2054.245972,...,86.327503,0.787657,-61.382043,-81.422032,-114.844079,3,3,2021,6040,2036.189941
2021-03-11,2058.219971,2111.270020,2056.449951,2100.540039,1384200,0,0.0,2089.420003,0.031603,2062.915967,...,71.790008,0.587912,-24.648233,-44.631864,-81.094932,4,3,2021,6041,2100.540039


In [13]:
googl_df.shift(1, fill_value=0)

Unnamed: 0_level_0,Open,High,Low,Close,Volume,Dividends,Stock Splits,Mt,Price Change,5 Day Open Mean,...,Larry Williams R 20 Day,AD Oscillator,5 Day CCI,10 Day CCI,20 Day CCI,Day,Month,Year,Days From IPO,Close in 5 Days
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2004-08-19,0.000000,0.000000,0.000000,0.000000,0,0,0.0,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0,0,0,0,0.000000
2004-08-20,50.050049,52.082081,48.028027,50.220219,44659096,0,0.0,50.110109,50.220219,50.050049,...,45.925932,0.459259,0.000000,0.000000,0.000000,19,8,2004,0,54.009010
2004-08-23,50.555557,54.594597,50.300301,54.209209,22834343,0,0.0,53.034702,0.079430,50.302803,...,5.868930,0.089744,66.666667,66.666667,66.666667,20,8,2004,1,53.128128
2004-08-24,55.430431,56.796799,54.579578,54.754753,18256126,0,0.0,55.377043,0.010064,52.012012,...,23.287703,0.920994,78.481214,78.481214,78.481214,23,8,2004,4,51.056057
2004-08-25,55.675674,55.855858,51.836838,52.487488,15247337,0,0.0,53.393394,-0.041408,52.927928,...,49.143835,0.838107,14.691954,14.691954,14.691954,24,8,2004,5,51.236237
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2021-03-08,2061.149902,2106.199951,2027.780029,2097.070068,2649400,0,0.0,2077.016683,0.031043,2048.161987,...,31.030825,0.116423,76.560859,98.697245,13.488929,5,3,2021,6042,2050.000000
2021-03-09,2084.060059,2113.739990,2005.189941,2007.500000,1842700,0,0.0,2042.143311,-0.042712,2055.373999,...,88.851572,0.978719,-30.866396,-24.047454,-68.143428,8,3,2021,6045,
2021-03-10,2049.750000,2063.919922,2035.599976,2040.359985,1693300,0,0.0,2046.626628,0.016369,2052.355981,...,67.639253,0.831920,1.778410,-9.430004,-51.837991,9,3,2021,6046,
2021-03-11,2061.399902,2061.399902,2019.329956,2036.189941,1353100,0,0.0,2038.973267,-0.002044,2054.245972,...,70.331168,0.599239,-43.874503,-29.326273,-64.182892,10,3,2021,6047,


In [17]:
googl_df.columns

Index(['Open', 'High', 'Low', 'Close', 'Volume', 'Dividends', 'Stock Splits',
       'Mt', 'Price Change', '5 Day Open Mean', '5 Day High Mean',
       '5 Day Low Mean', '5 Day Close Mean', '5 Day Volume Mean',
       '5 Day Open Var', '5 Day High Var', '5 Day Low Var', '5 Day Close Var',
       '5 Day Volume Var', '5 Day High', '5 Day Low', '5 Day SMt', '5 Day Dt',
       '10 Day Open Mean', '10 Day High Mean', '10 Day Low Mean',
       '10 Day Close Mean', '10 Day Volume Mean', '10 Day Open Var',
       '10 Day High Var', '10 Day Low Var', '10 Day Close Var',
       '10 Day Volume Var', '10 Day High', '10 Day Low', '10 Day SMt',
       '10 Day Dt', '20 Day Open Mean', '20 Day High Mean', '20 Day Low Mean',
       '20 Day Close Mean', '20 Day Volume Mean', '20 Day Open Var',
       '20 Day High Var', '20 Day Low Var', '20 Day Close Var',
       '20 Day Volume Var', '20 Day High', '20 Day Low', '20 Day SMt',
       '20 Day Dt', '5 Day Weighted Close Ave', '10 Day Weighted Close Ave',
 

In [None]:
def end_to_end_lstm_prep(stock_objects, lookback=5, pred_len=5):
    stock_dfs = {}
    for stock in stock_objects:
        stock_dfs[stock] = stock_objects[stock].history(period='max')
        stock_dfs[stock] = rolling_aves(stock_dfs[stock])
        stock_dfs[stock] = lstm_prep(stock_dfs[stock], lookback=lookback)
        stock_dfs[stock] = future_close_setup(stock_dfs[stock], pred_len)
    
    combine_df = combiner(stock_dfs)
    
    return combine_df, stock_dfs

In [23]:
def future_percent_change_setup(stock_df, days=1):
  #This function adds a second closing column and moves it up the number of rows
  # needed to predict that many days ahead
  stock_df['Percentage Change in ' + str(days) + ' Days'] = stock_df['Close']
  stock_df['Percentage Change in ' + str(days) + ' Days'] = stock_df['Percentage Change in ' + str(days) + ' Days'].shift(-days)
  stock_df['Percentage Change in ' + str(days) + ' Days'] =(stock_df['Percentage Change in ' + str(days) + ' Days']- stock_df['Close']) / stock_df['Close']
  return stock_df

In [37]:
def classifier_setup(stock_df, days=1):
    stock_df = future_percent_change_setup(stock_df, days=days)
    stock_df['Increase Flag'] = 0
    stock_df.loc[stock_df[stock_df.columns[-2]] > 0, 'Increase Flag'] = 1
    stock_df.drop(stock_df.columns[-2], inplace=True, axis=1)
    return stock_df
classifier_setup(googl_df)

Unnamed: 0_level_0,Open,High,Low,Close,Volume,Dividends,Stock Splits,Mt,Price Change,5 Day Open Mean,...,AD Oscillator,5 Day CCI,10 Day CCI,20 Day CCI,Day,Month,Year,Days From IPO,Close in 5 Days,Increase Flag
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2004-08-19,50.050049,52.082081,48.028027,50.220219,44659096,0,0.0,50.110109,50.220219,50.050049,...,0.459259,0.000000,0.000000,0.000000,19,8,2004,0,54.009010,1
2004-08-20,50.555557,54.594597,50.300301,54.209209,22834343,0,0.0,53.034702,0.079430,50.302803,...,0.089744,66.666667,66.666667,66.666667,20,8,2004,1,53.128128,1
2004-08-23,55.430431,56.796799,54.579578,54.754753,18256126,0,0.0,55.377043,0.010064,52.012012,...,0.920994,78.481214,78.481214,78.481214,23,8,2004,4,51.056057,0
2004-08-24,55.675674,55.855858,51.836838,52.487488,15247337,0,0.0,53.393394,-0.041408,52.927928,...,0.838107,14.691954,14.691954,14.691954,24,8,2004,5,51.236237,1
2004-08-25,52.532532,54.054054,51.991993,53.053055,9188602,0,0.0,53.033034,0.010775,52.848849,...,0.485436,1.718508,1.718508,1.718508,25,8,2004,6,50.175175,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2021-03-08,2084.060059,2113.739990,2005.189941,2007.500000,1842700,0,0.0,2042.143311,-0.042712,2055.373999,...,0.978719,-30.866396,-24.047454,-68.143428,8,3,2021,6045,,1
2021-03-09,2049.750000,2063.919922,2035.599976,2040.359985,1693300,0,0.0,2046.626628,0.016369,2052.355981,...,0.831920,1.778410,-9.430004,-51.837991,9,3,2021,6046,,0
2021-03-10,2061.399902,2061.399902,2019.329956,2036.189941,1353100,0,0.0,2038.973267,-0.002044,2054.245972,...,0.599239,-43.874503,-29.326273,-64.182892,10,3,2021,6047,,1
2021-03-11,2058.219971,2111.270020,2056.449951,2100.540039,1384200,0,0.0,2089.420003,0.031603,2062.915967,...,0.195731,99.771468,121.683216,55.063973,11,3,2021,6048,,0


In [31]:
googl_df

Unnamed: 0_level_0,Open,High,Low,Close,Volume,Dividends,Stock Splits,Mt,Price Change,5 Day Open Mean,...,5 Day CCI,10 Day CCI,20 Day CCI,Day,Month,Year,Days From IPO,Close in 5 Days,Percentage Change in 1 Days,Increase Flag
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2004-08-19,50.050049,52.082081,48.028027,50.220219,44659096,0,0.0,50.110109,50.220219,50.050049,...,0.000000,0.000000,0.000000,19,8,2004,0,54.009010,0.079430,0
2004-08-20,50.555557,54.594597,50.300301,54.209209,22834343,0,0.0,53.034702,0.079430,50.302803,...,66.666667,66.666667,66.666667,20,8,2004,1,53.128128,0.010064,0
2004-08-23,55.430431,56.796799,54.579578,54.754753,18256126,0,0.0,55.377043,0.010064,52.012012,...,78.481214,78.481214,78.481214,23,8,2004,4,51.056057,-0.041408,0
2004-08-24,55.675674,55.855858,51.836838,52.487488,15247337,0,0.0,53.393394,-0.041408,52.927928,...,14.691954,14.691954,14.691954,24,8,2004,5,51.236237,0.010775,0
2004-08-25,52.532532,54.054054,51.991993,53.053055,9188602,0,0.0,53.033034,0.010775,52.848849,...,1.718508,1.718508,1.718508,25,8,2004,6,50.175175,0.018019,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2021-03-08,2084.060059,2113.739990,2005.189941,2007.500000,1842700,0,0.0,2042.143311,-0.042712,2055.373999,...,-30.866396,-24.047454,-68.143428,8,3,2021,6045,,0.016369,0
2021-03-09,2049.750000,2063.919922,2035.599976,2040.359985,1693300,0,0.0,2046.626628,0.016369,2052.355981,...,1.778410,-9.430004,-51.837991,9,3,2021,6046,,-0.002044,0
2021-03-10,2061.399902,2061.399902,2019.329956,2036.189941,1353100,0,0.0,2038.973267,-0.002044,2054.245972,...,-43.874503,-29.326273,-64.182892,10,3,2021,6047,,0.031603,0
2021-03-11,2058.219971,2111.270020,2056.449951,2100.540039,1384200,0,0.0,2089.420003,0.031603,2062.915967,...,99.771468,121.683216,55.063973,11,3,2021,6048,,-0.024060,0
