In [59]:
# all imports
import pandas as pd
import numpy as np
import yfinance as yf 

import matplotlib.pyplot as plt

from ta.momentum import AwesomeOscillatorIndicator, KAMAIndicator, \
    PercentagePriceOscillator, ROCIndicator, RSIIndicator, StochRSIIndicator, \
    StochasticOscillator, TSIIndicator

from ta.volume import AccDistIndexIndicator, ChaikinMoneyFlowIndicator, \
    EaseOfMovementIndicator, ForceIndexIndicator, MFIIndicator, \
     NegativeVolumeIndexIndicator, OnBalanceVolumeIndicator, \
        VolumePriceTrendIndicator, VolumeWeightedAveragePrice

from ta.volatility import AverageTrueRange, UlcerIndex

from ta.trend import ADXIndicator, AroonIndicator, CCIIndicator, DPOIndicator, \
    EMAIndicator, SMAIndicator, WMAIndicator

import matplotlib.dates as mdates
import matplotlib as mpl
import matplotlib.gridspec as gridspec

from sklearn.model_selection import train_test_split
import datetime

from sklearn.preprocessing import StandardScaler, MinMaxScaler, OrdinalEncoder

In [60]:
# Download Data
print("Downloading SPY Price Data...")
spy_ohlc_df = yf.download('SPY', start='1993-02-01', end='2022-12-05')

# index are the current Daates, so set df["Dates"] = index
spy_ohlc_df["Dates"] = spy_ohlc_df.index

new_dates = []
for date in spy_ohlc_df["Dates"]:
    new_date = date.strftime('%Y/%m/%d')
    new_date = datetime.datetime.strptime(new_date, '%Y/%m/%d')
    new_dates.append(new_date)
spy_ohlc_df["Dates"] = new_dates


spy_ohlc_df.reset_index(drop=True, inplace=True)  # reset the index and drop, inplace
spy_ohlc_df = spy_ohlc_df.drop(["Adj Close"], axis=1)  # drop Adj Close - not of interest
print("SPY DF Last 5 Rows:")
print(spy_ohlc_df.tail())

# print columns and rows:
print("\n")
print("There are " + str(spy_ohlc_df.shape[0]) + " Rows and " + str(spy_ohlc_df.shape[1]) 
      + " Cols for SPY")
print("\n")



Downloading SPY Price Data...
[*********************100%***********************]  1 of 1 completed
SPY DF Last 5 Rows:
            Open        High         Low       Close     Volume      Dates
7511  399.089996  400.809998  395.109985  395.910004   67881600 2022-11-28
7512  396.049988  397.299988  393.299988  395.230011   52310000 2022-11-29
7513  395.489990  407.679993  393.480011  407.679993  144566700 2022-11-30
7514  408.769989  410.000000  404.750000  407.380005   76398200 2022-12-01
7515  402.250000  407.859985  402.140015  406.910004   85273900 2022-12-02


There are 7516 Rows and 6 Cols for SPY




In [61]:
# Momentum:

spy_ohlc_df['AwesomeIndicator'] = AwesomeOscillatorIndicator(low=spy_ohlc_df["Low"],
                                                        high=spy_ohlc_df["High"], window1=3,
                                                        window2=2).awesome_oscillator()

spy_ohlc_df['KAMA'] = KAMAIndicator(close=spy_ohlc_df["Close"], window=2, pow1=2, 
                                    pow2=2).kama()

spy_ohlc_df['PercentagePriceOscillator'] = PercentagePriceOscillator(
                                            close=spy_ohlc_df["Close"], window_slow=3,
                                                     window_fast=2, window_sign=2).ppo()

spy_ohlc_df['ROC'] = ROCIndicator(close=spy_ohlc_df["Close"], window=200).roc()

spy_ohlc_df['RSI'] = RSIIndicator(close=spy_ohlc_df["Close"], window=200).rsi()

spy_ohlc_df['StochRSI'] = StochRSIIndicator(close=spy_ohlc_df["Close"],
                                                     window=19, smooth1=19).stochrsi_k()

spy_ohlc_df['StochasticOscillator'] = StochasticOscillator(close=spy_ohlc_df["Close"],
                                                        high=spy_ohlc_df["High"],
                                                        low=spy_ohlc_df["Low"],
                                                        window=4).stoch()

spy_ohlc_df['TSI'] = TSIIndicator(close=spy_ohlc_df["Close"], window_slow=3,
                                  window_fast=4).tsi()

# Volume:

spy_ohlc_df['AccDistIndicator'] = AccDistIndexIndicator(close=spy_ohlc_df["Close"],
                                                high=spy_ohlc_df["High"],
                                                low=spy_ohlc_df["Low"],
                                                volume=spy_ohlc_df["Volume"]).acc_dist_index()

spy_ohlc_df['ChaikinMoneyFlow'] = ChaikinMoneyFlowIndicator(close=spy_ohlc_df["Close"],
                                            high=spy_ohlc_df["High"],
                                            low=spy_ohlc_df["Low"],
                                            volume=spy_ohlc_df["Volume"],
                                            fillna=True).chaikin_money_flow()

spy_ohlc_df['EaseOfMovement'] = EaseOfMovementIndicator(high=spy_ohlc_df["High"],
                                            low=spy_ohlc_df["Low"],
                                            volume=spy_ohlc_df["Volume"],
                                            fillna=True).ease_of_movement()

spy_ohlc_df['ForceIndex'] =  ForceIndexIndicator(close=spy_ohlc_df["Close"],
                                            volume=spy_ohlc_df["Volume"], 
                                            fillna=True).force_index()

spy_ohlc_df['MFI'] = MFIIndicator(close=spy_ohlc_df["Close"],
                                        high=spy_ohlc_df["High"],
                                        low=spy_ohlc_df["Low"],
                                        volume=spy_ohlc_df["Low"],
                                        window=2).money_flow_index()

spy_ohlc_df['NegativeVolumeIndex'] =   NegativeVolumeIndexIndicator(
                                            close=spy_ohlc_df["Close"],
                                            volume=spy_ohlc_df["Volume"], 
                                            fillna=True).negative_volume_index()

spy_ohlc_df['OnBalanceVolume'] = OnBalanceVolumeIndicator(close=spy_ohlc_df["Close"],
                                            volume=spy_ohlc_df["Volume"], 
                                            fillna=True).on_balance_volume()

spy_ohlc_df['VolumePriceTrend'] = VolumePriceTrendIndicator(close=spy_ohlc_df["Close"],
                                            volume=spy_ohlc_df["Volume"], 
                                            fillna=True).volume_price_trend()

spy_ohlc_df['VolumeWeightedAveragePrice'] = VolumeWeightedAveragePrice(
                                        close=spy_ohlc_df["Close"],
                                        high=spy_ohlc_df["High"],
                                        low=spy_ohlc_df["Low"],
                                        volume=spy_ohlc_df["Low"],
                                        window=1).volume_weighted_average_price()

# Volatility: 

spy_ohlc_df['Average True Range'] = AverageTrueRange(close=spy_ohlc_df["Close"],
                                                    high=spy_ohlc_df["High"],
                                                    low=spy_ohlc_df["Low"],
                                                    window=102).average_true_range()

spy_ohlc_df['Ulcer Index'] =UlcerIndex(close=spy_ohlc_df["Close"], window=9).ulcer_index()

# Trend:

spy_ohlc_df['ADX'] = ADXIndicator(close=spy_ohlc_df["Close"], high=spy_ohlc_df["High"],
                                                low=spy_ohlc_df["Low"],
                                                window=3).adx()

spy_ohlc_df['Aroon'] = AroonIndicator(close=spy_ohlc_df["Close"], window=3).aroon_indicator()

spy_ohlc_df['CCI'] = CCIIndicator(close=spy_ohlc_df["Close"], high=spy_ohlc_df["High"],
                                                low=spy_ohlc_df["Low"],
                                                window=9).cci()

spy_ohlc_df['DPO'] = DPOIndicator(close=spy_ohlc_df["Close"], window=193).dpo()

spy_ohlc_df['EMA'] = EMAIndicator(close=spy_ohlc_df["Close"], window=2).ema_indicator()

spy_ohlc_df['SMA'] = SMAIndicator(close=spy_ohlc_df["Close"], window=2).sma_indicator()

spy_ohlc_df['WMA'] = WMAIndicator(close=spy_ohlc_df["Close"], window=2).wma()

# Misc:

# add month as a value between 1 and 12
months = [date.month for date in list(spy_ohlc_df["Dates"])]
spy_ohlc_df["Month"] = months

# add 'Prev Close Return 1 Day':
spy_ohlc_df['Prev Close Return 1 Day'] = spy_ohlc_df["Close"].pct_change(1)

# add 'Prev Close Return 2 Days':
spy_ohlc_df['Prev Close Return 2 Days'] = spy_ohlc_df["Close"].pct_change(2)

# add 'Prev Close Return 3 Days':
spy_ohlc_df['Prev Close Return 3 Days'] = spy_ohlc_df["Close"].pct_change(3)

# NOTE: rows causing value error will be dropped. Error due to indicator calculations.

  dip[idx] = 100 * (self._dip[idx] / value)
  din[idx] = 100 * (self._din[idx] / value)


In [68]:
def calc_target_vars(df, column="Close", period=1):
    """
    This Function Calculates the Target Variables Next Day 'Percent Next Day', 
    'Price Next Day', and 'next_day_directions' for a column name and period.
    
    :param df: a Pandas DataFrame with the specified column
    :param column: default 'Close', the column to run target variable calculations
    :param period: default 1, the period in the future to calculate the target variable 
    calculations
    :return: next_day_percent_change_vals, next_day_prices, next_day_directions, which are all
    lists of the target variable values
    """
    next_day_percent_change_vals = []
    next_day_prices = []
    for i in range(0, len(df)):
        if i == len(df) - 1:
            next_day_percent_change_vals.append(0)
            next_day_prices.append(0)
            break
        current_price = df[column].iloc[i]
        next_price = df[column].iloc[i + period]
        # percent change:
        percent_change_of_next_day = (next_price - current_price) / current_price
        next_day_percent_change_vals.append(percent_change_of_next_day)
        # next day price:
        next_day_prices.append(next_price)
            
    # Can't Calculate the next day target vars for final value:
#     next_day_percent_change_vals.append(np.nan)
#     next_day_prices.append(np.nan)
    
    return next_day_percent_change_vals, next_day_prices

In [69]:
# Add Target Variables:

# calculate the target variables:
percent_next_day, price_next_day = calc_target_vars(spy_ohlc_df)

# add 'Percent Next Day':
spy_ohlc_df['Percent Next Day'] = percent_next_day

# add 'Price Next Day':
spy_ohlc_df['Price Next Day'] = price_next_day

In [70]:
# print columns and rows:
print("After Adding Technical Indicators and other measures:")
print("\n")
print("There are " + str(spy_ohlc_df.shape[0]) + " Rows and " + str(spy_ohlc_df.shape[1]) 
      + " Cols for SPY")
print("\n")

After Adding Technical Indicators and other measures:


There are 7316 Rows and 38 Cols for SPY




In [71]:
spy_ohlc_df = spy_ohlc_df.dropna(axis=0)
print(spy_ohlc_df)

spy_ohlc_df.to_excel("S&P500 Data2.xlsx")

            Open        High         Low       Close     Volume      Dates  \
200    46.687500   46.687500   46.437500   46.562500     243300 1993-11-15   
201    46.656250   46.812500   46.468750   46.781250     492600 1993-11-16   
202    46.812500   46.812500   46.406250   46.531250      39600 1993-11-17   
203    46.468750   46.562500   46.281250   46.406250     240800 1993-11-18   
204    46.250000   46.375000   46.218750   46.312500     106000 1993-11-19   
...          ...         ...         ...         ...        ...        ...   
7511  399.089996  400.809998  395.109985  395.910004   67881600 2022-11-28   
7512  396.049988  397.299988  393.299988  395.230011   52310000 2022-11-29   
7513  395.489990  407.679993  393.480011  407.679993  144566700 2022-11-30   
7514  408.769989  410.000000  404.750000  407.380005   76398200 2022-12-01   
7515  402.250000  407.859985  402.140015  406.910004   85273900 2022-12-02   

      AwesomeIndicator        KAMA  PercentagePriceOscillator  