# Outline

1. Import Packages

2. Define Universe of Stocks and Proxies

3. Downloading, Combining, and Cleaning the Data
    - 3.1 Data Downloader
    - 3.2 Data Combiner
    - 3.3 Data Cleaner
    - 3.4 Feature Engineering First Pass

4. Feature Engineering

5. Data Preparation
    - 5.1 Data Stationarity Check
    - 5.2 Train Test Split and Feature Scaling
    - 5.3 Split Data into Days
    - 5.4 Split Daily Data into Trading Windows
    
6. Data Pipelining

7. Model Construction

8. Fine Tuning the Models

9. Running the Models for all ETFs

10. Resutls Summary/Visualization


# 1.0 Import Packages

In [1]:
import pandas as pd
import numpy as np
import statistics
import pathlib
import matplotlib.pyplot as plt
import seaborn as sns
import csv
import math

from datetime import datetime, timedelta
from alpha_vantage.timeseries import TimeSeries
from time import sleep
from tqdm import tqdm
from pymongo import MongoClient,InsertOne
from sklearn.preprocessing import MinMaxScaler

import tensorflow as tf
from tensorflow import keras
from keras.models import load_model
from tensorflow.keras import models, layers, backend, regularizers
from tensorflow.keras.layers import BatchNormalization
from tensorflow.keras.callbacks import EarlyStopping

from sklearn.metrics import confusion_matrix, accuracy_score, roc_auc_score, classification_report

from statsmodels.tsa.stattools import adfuller

import matplotlib.pyplot as plt
%matplotlib inline


######### Alphavantage key
#Chris_for_prime@outlook.com
key = 'Z8JAUR4XUY8O1CV2'

# 2.0 Define Universe of Stocks and Proxies

For this project I wanted to analyze Exchange Traded Funds (ETFs) and use features that serve as proxies for economic inidcators.

List of ETFs: https://etfdb.com/compare/volume/
   - High volume now and 2 years ago.  My definition of high volume is > 100,000 shares per day.  Most are much higher than that.

Economic Indicators:
   - Economic Growth
   - Inflation
   - Unemployment
   - Business Confidence
   - Housing
    
Proxies for Each Economic Indicator:
   - SPDR S&P 500 ETF Trust (SPY)
   - iShares 20+ Year Treasury Bond ETF (TLT)
   - Barclays iPath Series B S&P 500 VIX Short-Term Futures ETN (VXX)
   - Consumer Discretionary Select Sector SPDR Fund (XLY)
   - Vanguard Real Estate Index Fund (VNQ)

Market Holidays
   - Load in list of market holidays.  The stock market is closed on these days (either partially or completely) and so the data will not be included in training the models.


In [2]:
# List of Stocks
CoList = pd.read_excel('Input Files/List of ETFs.xlsx',sheet_name='Low_Missing')
CoList = CoList['Symbol']

# List of Proxies
proxies = pd.read_excel('Input Files/List of ETFs.xlsx',sheet_name='Proxies')
proxies = proxies['Symbol']

# List of Market Holidays
holiday_list = list(pd.read_excel("Input Files/Stock Market Holidays.xlsx")['Date'])
holiday_list = [holiday.strftime('%Y-%m-%d') for holiday in holiday_list]

################## Set Interval #######################
# 1min, 5min, 15min, 30min, 60min
minute_interval = '1min'

proxies

0    SPY
1    TLT
2    VXX
3    XLY
4    VNQ
Name: Symbol, dtype: object

# 3.0 Downloading, Combining, and Cleaning the Data

## 3.1 Data Downloader

Downloads data from AlphaVantage using API Calls.
Data is downloaded in CSV files, with each monthy being one file.
Files are named individually and stored in folders named for each ETF

In [3]:
def data_downloader(minute_interval, co):
    #################### Set Time Periods to DL ##########################
    years = [1,2]
    months = [1,2,3,4,5,6,7,8,9,10,11,12]

    ################# Header ###################################

    ts = TimeSeries(key, output_format='csv')


    #################### Check if Data Exists, if not, Download #######################
    # See if folder exists, if not, create it
    destination = pathlib.Path.cwd() / 'AlphaVantage Data'/ minute_interval / '1. Raw Data' / co
    if not destination.exists():
        destination.mkdir(parents=True, exist_ok=True)
        
    limit_reached = False

    # For each month, download data if needed
    for year in years:
        for month in months:
            
            # If API limit is not reached
            if limit_reached == False:

                file_name = minute_interval +co+' - year'+str(year)+'month'+str(month)+'.csv'
                file = pathlib.Path.cwd() / 'AlphaVantage Data'/ minute_interval/ '1. Raw Data' / co / file_name

                if not file.is_file():
                    print(co, year, month)
                    data, meta_data = ts.get_intraday_extended(symbol=co,interval=minute_interval, slice='year'+str(year)+'month'+ str(month))

                    df_list = []
                    for row in data:
                        df_list.append(row)
                    df = pd. DataFrame(df_list)
                    df.columns = df.iloc[0]
                    df = df.drop(0)
                    df = df.reset_index(drop=True)

                    # If df has content and not the error message, save it
                    if len(df) > 2:
                        # Save the DF
                        df.to_csv(file,index=False)
                        
                        sleep(12)

                    else:
                        print("API Limit Reached")
                        limit_reached = True
                        break  

                else:
                    continue

    return

#### Download ETF and Proxy Data
for co in tqdm(CoList):
    #print(co)
    data_downloader(minute_interval, co)

100%|██████████| 54/54 [00:00<00:00, 93.83it/s]


## 3.2 Data Combiner

Combines the monthly pricing data into one CSV.  The AlphaVantage API returns a CSV for each month of data.  For two years of data this is 24 separate CSVs.  This function combines them into one. 

In [4]:
def data_combiner(minute_interval, co):

    # Set Filename
    file_name = minute_interval + " " + co + ' combined_raw.csv'
    file = pathlib.Path.cwd() / 'AlphaVantage Data'/ minute_interval / '2. Combined Raw Data' / file_name 
        
    if not file.is_file():
    
        col_names = ['time','open','high','low','close','volume']

        # Initialize empty DF
        main_df = pd.DataFrame(columns=col_names)

        # Get Files for each company
        destination = pathlib.Path.cwd() / 'AlphaVantage Data'/ minute_interval / '1. Raw Data' / co
        p = destination.glob('**/*')
        files = [x for x in p if x.is_file()]

        # Loop through files and combine into one big DF
        for file in files:
            test_df = pd.read_csv(file)
            main_df = pd.concat([main_df,test_df],ignore_index=True)

            # Sort by time
            main_df.sort_values("time", inplace = True, ignore_index=True) 
            main_df.drop_duplicates(subset ="time", keep = 'first', inplace = True, ignore_index=True)

        # See if folder exists, if not, create it
        destination = pathlib.Path.cwd() / 'AlphaVantage Data'/ minute_interval / '2. Combined Raw Data'
        if not destination.exists():
            destination.mkdir(parents=True, exist_ok=True)

        # Save File
        main_df.to_csv(file, index=False)

    else:
        return 
    
    return

#### Combine ETF Data
for co in tqdm(CoList):
    #print("Combining",co)
    data_combiner(minute_interval, co)

100%|██████████| 54/54 [00:00<00:00, 717.87it/s]


## 3.3 Data Cleaner

- Trims the combined datasets to only include minutes that occur during market hours.  
- Adds columns for datetime and hour/minute.
- Makes sure all minutes during the day are represented by a row, and for the missing rows, impute values from prior minute.
- Keep log of missing data

In [5]:
def data_cleaner(minute_interval, co):
    
    # Set Filename
    file_name = minute_interval + " " + co + ' cleaned_combined.csv'
    file = pathlib.Path.cwd() / 'AlphaVantage Data'/ minute_interval / '3. Cleaned Combined Data' / file_name 

    if not file.is_file():
    
        print("++++++++++++++++++++++++++++")
        print(f"Cleaning for: {co}")

        file_name = minute_interval + " " + co + ' combined_raw.csv'
        file = pathlib.Path.cwd() / 'AlphaVantage Data'/ minute_interval / '2. Combined Raw Data' / file_name 

        df = pd.read_csv(file)

        ################################ Add columns for date data ##########################################################

        # Create datetime from given string date

        df['datetime'] = pd.to_datetime(df['time'])

        # Create short date
        df['short_date'] = df['datetime'].dt.strftime('%Y-%m-%d')
        # Create hour/minute feature
        df['hour_minute'] = df['datetime'].dt.strftime('%H:%M:%S')
        # Create Hour Feature
        df['hour'] = df['datetime'].dt.hour
        # Create minute feature
        df['minute'] = df['datetime'].dt.minute
        # Day of the week integer
        df['weekday'] = df['datetime'].dt.weekday 

        ############################### Data Cleaning ####################################################################

        missing_percents_list = []
        dates_list = []
        clean_combined = pd.DataFrame(columns=df.columns)

        print("Walking through the days")
        # Group the whole combined df by date
        for doc in df.groupby(df['short_date']):

            # For each day, if it's not a holiday, clean the day, log the missing values, impute missing values, and combine back into the main DF    
            if doc[0] not in holiday_list:
                sample = doc[1].copy()

                # Filter out pre-market and after market
                sample1 = sample.loc[(sample['hour_minute'] < "16:00:00") & (sample['hour_minute'] >= "09:30:00")]

                # Get List of times that should be in the period
                given_time = datetime.strptime(doc[0], '%Y-%m-%d')
                start_time = given_time + timedelta(hours=9)
                start_time = start_time + timedelta(minutes=30)

                required_times = []
                time = start_time
                required_times.append(time)

                for x in range(389):
                    time = time + timedelta(minutes=1)
                    required_times.append(time)

                # Make List of times not in period that should be
                datetimes_to_add = []

                for required_time in required_times:
                    if required_time not in list(sample1['datetime']):
                        datetimes_to_add.append(required_time)

                # Creat variables for the row that should be there
                for date in datetimes_to_add:
                    time = date.strftime('%Y-%m-%d %H:%M:%S')
                    op = np.nan
                    high = np.nan
                    low = np.nan
                    close = np.nan
                    volume = np.nan
                    d_time = date
                    short_date = date.strftime('%Y-%m-%d')
                    hour_min = date.strftime('%H:%M:%S')
                    hr = date.hour
                    minute = date.minute
                    weekday = date.weekday()

                    # Create new df (1 row) out of variables
                    df_new_line = pd.DataFrame([[time,op,high,low,close,volume,d_time,short_date,hour_min,hr,minute,weekday]], columns=sample1.columns )

                    # add the row to the existing df
                    sample1 = pd.concat([sample1,df_new_line], ignore_index=True)

                # Sort the DF by time
                df_to_save = sample1.sort_values(by=['datetime'], ignore_index=True)

                # missing data stats
                number_missing = sum(df_to_save.open.isna())
                number_total = df_to_save.shape[0]
                missing_pct = round(number_missing/number_total*100,2)
                missing_percents_list.append(missing_pct)
                dates_list.append(doc[0])

                # Interpolate to fill in missing data.  Try 'forward first' to get previous minute's data, then use future data if that fails.  Shouldn't be too much data
                df_to_save = df_to_save.interpolate(method='linear',limit_direction ='forward')
                df_to_save = df_to_save.interpolate(method='linear',limit_direction ='backward')

                # Add day df into new_combined df
                clean_combined = pd.concat([clean_combined,df_to_save],ignore_index=True)     

        # Sort the new df by time and delete duplicates
        clean_combined.sort_values("time", inplace = True, ignore_index=True) 
        clean_combined.drop_duplicates(subset ="time", keep = 'first', inplace = True, ignore_index=True)

        ################################ Save Cleaned Combined CSV ##########################################

        print('Saving File')

        # See if folder exists, if not, create it
        destination = pathlib.Path.cwd() / 'AlphaVantage Data'/ minute_interval / '3. Cleaned Combined Data'
        if not destination.exists():
            destination.mkdir(parents=True, exist_ok=True)

        # Set Filename
        file_name = minute_interval + " " + co + ' cleaned_combined.csv'
        file = pathlib.Path.cwd() / 'AlphaVantage Data'/ minute_interval / '3. Cleaned Combined Data' / file_name 

        # Save File
        clean_combined.to_csv(file, index=False)

        ################################ Save log of missing percents ########################################
        print("Logging Missing Data")

        missing_data_df = pd.DataFrame()
        missing_data_df['Day'] = dates_list
        missing_data_df['%Missing'] = missing_percents_list

        # See if folder exists for missing data log, if not, create it
        destination = pathlib.Path.cwd() / 'Logs'/ 'Missing Values Logs' / minute_interval
        if not destination.exists():
            destination.mkdir(parents=True, exist_ok=True)

        file_name = minute_interval + " " + co + ' Missing Values Log.csv'
        file = pathlib.Path.cwd() / 'Logs' / 'Missing Values Logs' / minute_interval / file_name

        # Save File
        missing_data_df.to_csv(file, index=False)
    
    else:
        return

    return

# Clean the Data
for co in tqdm(CoList):
    data_cleaner(minute_interval, co)

100%|██████████| 54/54 [00:00<00:00, 887.60it/s]


## 3.4 Feature Engineering First Pass

Calculates technical indicators for each ETF/Proxy and adds them to the combined CSV and saves a new CSV

#### Functions to Calculate Technical Indicators

- Bollinger Bands: https://www.investopedia.com/terms/b/bollingerbands.asp
- Relative Strength Index (RSI): https://www.investopedia.com/terms/r/rsi.asp
- Moving Average Convergence Divergence (MACD): https://www.investopedia.com/terms/m/macd.asp


Reference: https://github.com/kconstable/market_predictions/blob/main/market_data.ipynb

In [6]:
def calc_bollinger(df,feature,window=20*60,st=2):
    """
    Calculates bollinger bands for a price time-series.  
    Input: 
    df     : A dataframe of time-series prices
    feature: The name of the feature in the df to calculate the bands for
    window : The size of the rolling window.  Defaults to 20 days with is standard
    st     : The number of standard deviations to use in the calculation. 2 is standard 
    Output: 
    Returns the df with the bollinger band columns added
    """

    # rolling mean and stdev
    rolling_m  = df[feature].rolling(window).mean()
    rolling_st = df[feature].rolling(window).std()

    # add the upper/lower and middle bollinger bands
    df['b-upper']  = rolling_m + (rolling_st * st)
    df['b-middle'] = rolling_m 
    df['b-lower']  = rolling_m - (rolling_st * st)
    
def calc_rsi(df,feature='close',window=14*60):
    """
    Calculates the RSI for the input feature
    Input:
    df      : A dataframe with a time-series of prices
    feature : The name of the feature in the df to calculate the bands for
    window  : The size of the rolling window.  Defaults to 14 days which is standard
    Output: 
    Returns the df with the rsi band column added
    """
    # RSI
    # calc the diff in daily prices, exclude nan
    diff =df[feature].diff()
    diff.dropna(how='any',inplace=True)

    # separate positive and negitive changes
    pos_m, neg_m = diff.copy(),diff.copy()
    pos_m[pos_m<0]=0
    neg_m[neg_m>0]=0

    # positive/negative rolling means
    prm = pos_m.rolling(window).mean()
    nrm = neg_m.abs().rolling(window).mean()

    # calc the rsi and add to the df
    ratio = prm /nrm
    rsi = 100.0 - (100.0 / (1.0 + ratio))
    df['rsi']=rsi

def calc_macd(df,feature='close'):
    """
    Calculates the MACD and signial for the input feature
    Input:
    df      : A dataframe with a time-series of prices
    feature : The name of the feature in the df to calculate the bands for
    Output: 
    Returns the df with the macd columns added
    """
    ema12 = df[feature].ewm(span=12*60,adjust=False).mean()
    ema26 = df[feature].ewm(span=26*60,adjust=False).mean()
    df['macd']=ema12-ema26
    df['macd_signal'] = df['macd'].ewm(span=9*60,adjust=False).mean()

#### Adding Technical Indicators to ETF/Proxy Combined CSVs

Takes the pricing data and adds technical indicators as features, as well as seasonality features for time of day and year

In [7]:
def add_tech_indicators(minute_intervale, co):
    
    # Set Filename
    file_name = minute_interval + " " + co + ' cleaned_combined_with_features.csv'
    file = pathlib.Path.cwd() / 'AlphaVantage Data'/ minute_interval / '4. Cleaned Combined Data With Features' / file_name 

    if not file.is_file():
    
        ######################################### Open CSV #######################################################
        #print("Opening File")
        file_name = minute_interval + " " + co + ' cleaned_combined.csv'
        file = pathlib.Path.cwd() / 'AlphaVantage Data'/ minute_interval / '3. Cleaned Combined Data' / file_name 
        df = pd.read_csv(file)

        ##################################### Calculate/Add Technical Indicators #################################

        #print('Calculating Technical Indicators')
        calc_bollinger(df,'close',window=20,st=2)
        calc_rsi(df,feature='close',window=14)
        calc_macd(df,feature='close')

        ######################################### Seasonality Features ###########################################

        date_time = pd.to_datetime(df['time'], format='%Y-%m-%d %H:%M:%S')
        timestamp_s = date_time.map(pd.Timestamp.timestamp)

        day = 24*60*60
        year = (365.2425)*day

        df['Day sin'] = np.sin(timestamp_s * (2 * np.pi / day))
        df['Day cos'] = np.cos(timestamp_s * (2 * np.pi / day))
        df['Year sin'] = np.sin(timestamp_s * (2 * np.pi / year))
        df['Year cos'] = np.cos(timestamp_s * (2 * np.pi / year))

        #################################### Trim First Day Off because of NaNs #################################
        # Technical Indicators calculations use the early data, so the early data won't have technical indicators. 
        # Trim off first day for easy solution to get rid of NaNs.

        df = df[390:]

        ################################ Save CSV ##########################################

        # See if folder exists, if not, create it
        destination = pathlib.Path.cwd() / 'AlphaVantage Data'/ minute_interval / '4. Cleaned Combined Data With Features'
        if not destination.exists():
            destination.mkdir(parents=True, exist_ok=True)

        # Save File
        df.to_csv(file, index=False)
    
    else:
        return

    return

# Calculate/add technical indicators to each ETF/Proxy
for co in tqdm(CoList):
    add_tech_indicators(minute_interval, co)

100%|██████████| 54/54 [00:00<00:00, 441.89it/s]


# 4.0 Proxy Feature Engineering

For each ETF CSV, add the proxy features including:
- Close Price
- All Technical Indicators

Then save dataframe as new CSV

In [57]:
# Load in Proxy DFs
SPY = pd.read_csv(pathlib.Path.cwd() / 'AlphaVantage Data'/ minute_interval / '4. Cleaned Combined Data With Features' / '1min SPY cleaned_combined_with_features.csv')
TLT = pd.read_csv(pathlib.Path.cwd() / 'AlphaVantage Data'/ minute_interval / '4. Cleaned Combined Data With Features' / '1min TLT cleaned_combined_with_features.csv')
VXX = pd.read_csv(pathlib.Path.cwd() / 'AlphaVantage Data'/ minute_interval / '4. Cleaned Combined Data With Features' / '1min VXX cleaned_combined_with_features.csv')
XLY = pd.read_csv(pathlib.Path.cwd() / 'AlphaVantage Data'/ minute_interval / '4. Cleaned Combined Data With Features' / '1min XLY cleaned_combined_with_features.csv')
VNQ = pd.read_csv(pathlib.Path.cwd() / 'AlphaVantage Data'/ minute_interval / '4. Cleaned Combined Data With Features' / '1min VNQ cleaned_combined_with_features.csv')

# Trims Proxies and Renames Columns.  Prepares for merging them with ETF Df
proxy_names = ['SPY', 'TLT', 'VXX', 'XLY', 'VNQ']
proxies = [SPY, TLT, VXX, XLY, VNQ]
trimmed_proxies = []

for x in range(len(proxies)):
    # Drop redundant columns
    proxy = proxies[x].drop(['time','open','high','low','volume','short_date',
                             'hour_minute','hour','minute','weekday',
                             'Day sin', 'Day cos', 'Year sin', 'Year cos'
                            ], axis=1)
    
    cols = proxy.columns
    # Rename unique columns
    new_cols = [proxy_names[x] + col if col != 'datetime' else col for col in cols]
    proxy.columns = new_cols
    # Add proxy to new list
    trimmed_proxies.append(proxy)

for co in tqdm(CoList):
    # Load the ETF df
    file_name = '1min '+co+' cleaned_combined_with_features.csv'
    etf = pd.read_csv(pathlib.Path.cwd() / 'AlphaVantage Data'/ minute_interval / '4. Cleaned Combined Data With Features' / file_name)
        
    # Combine Proxies 
    for proxy in trimmed_proxies:
        etf = pd.merge(etf, proxy,  how='left', on = ['datetime'])

    # Interpolate Missing Values                 
    etf = etf.interpolate(method='linear',limit_direction ='forward')
    etf = etf.interpolate(method='linear',limit_direction ='backward',limit=5)
    
    # Save the DF
    
    # See if folder exists, if not, create it
    destination = pathlib.Path.cwd() / 'AlphaVantage Data'/ minute_interval / '5. Cleaned Combined Data With Proxies'
    if not destination.exists():
        destination.mkdir(parents=True, exist_ok=True)

    # Set Filename
    file_name = minute_interval + " " + co + ' cleaned_combined_with_proxies.csv'
    file = pathlib.Path.cwd() / 'AlphaVantage Data'/ minute_interval / '5. Cleaned Combined Data With Proxies' / file_name 

    if not file.is_file():
        # Save File
        etf.to_csv(file, index=False)

100%|██████████| 54/54 [26:58<00:00, 29.98s/it]


# 5. Data Preparation

## 5.1 Data Stationarity Check

Deep learning performs better on stationary data.  But as the EDA has shown, pricing data for these ETFs are not usually stationary.  The stock price fluctuates over time and has clear trends over time.

References: 

https://analyzingalpha.com/check-time-series-stationarity-python#augmented-dickey-fuller-adf
https://github.com/kconstable/market_predictions/blob/main/LSTM_model.ipynb

In [50]:
def transform_stationary(df,features_to_transform,transform='log'):
    """
    Transform time-series data using a log or boxcox transform.  Calculate the augmented
    dickey-fuller (ADF) test for stationarity after the transform
    Inputs:
    df: a dataframe of features
    features_to_transform: A list of features to apply the transform
    transform: The transform to apply (log, boxbox)
    Output
    Applies the transforms inplace in df
    """
    # transform each column in the features_to_transform list
    for feature in df.columns:
        if feature in features_to_transform:
            # log transform
            if transform=='log':
                df[feature] = df[feature].apply(np.log)

            # boxcox transform  
            elif transform=='boxcox':
                bc,_ = stats.boxcox(df[feature])
                df[feature] = bc

            else:
                print("Transformation not recognized")

This function takes the ETF pricing data and makes it stationary, and saves a new CSV.

In [282]:
def make_csv_stationary(co, features_to_transform):
    file_name = minute_interval + " " + co + ' cleaned_combined_stationary.csv'
    file = pathlib.Path.cwd() / 'AlphaVantage Data'/ minute_interval / '6. Cleaned Combined Data Stationarity' / file_name 
    
    if not file.is_file():
    
        # Read in File to edit
        file_name = minute_interval + " " + co + ' cleaned_combined_with_proxies.csv'
        file = pathlib.Path.cwd() / 'AlphaVantage Data'/ minute_interval / '5. Cleaned Combined Data With Proxies' / file_name 

        df = pd.read_csv(file)

        close_prices = df.copy()['close']
        transform_stationary(df,features_to_transform,'log')

        df['close_prices'] = close_prices

        # See if folder exists, if not, create it
        destination = pathlib.Path.cwd() / 'AlphaVantage Data'/ minute_interval / '6. Cleaned Combined Data Stationarity'
        if not destination.exists():
            destination.mkdir(parents=True, exist_ok=True)

        # Set Filename
        file_name = minute_interval + " " + co + ' cleaned_combined_stationary.csv'
        file = pathlib.Path.cwd() / 'AlphaVantage Data'/ minute_interval / '6. Cleaned Combined Data Stationarity' / file_name 
        df.to_csv(file, index=False)
        
        return
        
    else:
        return

Run the Function for Each ETF

In [283]:
features_to_transform = ['open', 'high', 'low', 'close','b-upper','b-middle', 'b-lower',
    'SPYclose','SPYb-upper', 'SPYb-middle', 'SPYb-lower', 
    'TLTclose', 'TLTb-upper', 'TLTb-middle', 'TLTb-lower',
    'VXXclose', 'VXXb-upper', 'VXXb-middle', 'VXXb-lower', 
    'XLYclose', 'XLYb-upper', 'XLYb-middle', 'XLYb-lower', 
    'VNQclose', 'VNQb-upper', 'VNQb-middle','VNQb-lower',]

for co in tqdm(CoList):
    make_csv_stationary(co, features_to_transform)

100%|██████████| 54/54 [25:43<00:00, 28.58s/it]


Now, the data is downloaded, feature engineered, cleaned, and made stationary. The next notebook will prepare it for deep learning and then train the models.