In [None]:
import numpy as np 
import pandas as pd
import yfinance as yf
import matplotlib.pyplot as plt
import sklearn as sk

# Step 1: Ticker
TICKER = "SPY"

# Step 2: Price Behavior 
INTERVAL = "1m"
if INTERVAL == "1h":
    PERIOD = "730d"
else:
    PERIOD = "max"

# Step 3: RSI
RSI_LENGTH = 14
RSI_OVERBROUGHT = 70
RSI_OVERSOLD = 30

# Step 3: Price difference
SHIFT = 1
    # Tomorrow's Close Price in the same Row as today

# Step 5: Data Subsection
LOOKBACK = 10000

def get_data(ticker = TICKER, interval = INTERVAL, lookback = LOOKBACK):

    # Step 1: Download Data
    df = yf.download(ticker, interval = interval, period = PERIOD)

    # Step 2: Select only Level 0 of Multilevel Columns
    df.columns = df.columns.get_level_values(0)

    # Step 3: Simplifying df for Plots
    df = df.reset_index(drop=True)

    return df.iloc[-lookback:, :]
        # -lookback: all loockback rows
        # : all columns 

# Defining y/the Dependent/the Target Variable
# --> We can create a strategy, that could forecast positively/negatively 
# with our Close Price changes 

# Lets inspect how a tech Indicator is related to how a price changes tomorrow
# For eg. RSI - you want to see a negative correlation between value of the RSI
# and Close Price Change the following day. Because the higher the value of RSI, 
# the more overbought the asset and shoudl decrease soon 
def add_target(df, shift = SHIFT):

    # Step 1:
    df["diff()"] = df["Close"].diff()

    # Step 2: 
    df[f"diff().shift({shift})"] = df["Close"].diff().shift(-shift)
    # diff(): Today's Price - Yesterday's Price == how much money you made or lost today
    # shift(-1): shift backwards in time. It takes the diff value from tomorrow into the row for today
    # We are trying to find a correlation with the price of tomorrow

    # Step 3: Finding Momentum. What is the difference between Close Price today and five days from now?
    df["Target"] = df[f"diff().shift({shift})"] - df["Close"]

    return df

# Adding the Features:

def add_RSI(df, length = RSI_LENGTH, overbrought = RSI_OVERBROUGHT, oversold = RSI_OVERSOLD):
    
    # Step 1: Define Delta (Price changes between the Rows)
    price_change = df["Close"].diff()

    # Step 2: Group Price Changes based on Positive/Negative Returns
    df["gain"] = price_change.where(price_change > 0, 0)
    df["loss"] = -price_change.where(price_change < 0, 0)

    # Step 3: Calculate the Rolling Average for Length Days
    df["avg_gain"] = df["gain"].rolling(window = length).mean()
    df["avg_loss"] = df["loss"].rolling(window = length).mean()

    # Step 4: Calculate the Relative Strength
    rs = df["avg_gain"] / df["avg_loss"]
        # In percentage Terms: how much stronger have the Up Days been compared to the Down Days
        # rs = 3: avg_gain was 3x larger than the average loss --> Strong Upward Momentum
        # rs = 0.5: avg_gain was half the size of the average loss --> Strong Downward Momentum

    # Step 5: Calculate the RSI w/ Standardization (Values between 0-100)
    df["RSI"] = 100 - (100/(1+rs))

    # Step 6: Plot the RSI
    plt.figure()
    plt.plot(df["RSI"])
    plt.title("RFI Values")

    # Step 7: Plot horizontal lines for Overbrought/Oversold
    plt.axhline(overbrought, color="red")
    plt.axhline(oversold, color="green")
        # Crossing Overbrought Line: triggers Short Signal 
        # Crossing Oversold Line: triggers Long Signal
    
    return df.dropna()

def main():
    df = get_data()
    df = add_target(df)
    df = add_RSI(df)

    return df

df = main()
df

  df = yf.download(ticker, interval = interval, period = PERIOD)
[*********************100%***********************]  1 of 1 completed


Price,Close,High,Low,Open,Volume,diff(),diff().shift(1)
0,680.500000,680.520020,679.900024,679.950012,1451696,,-0.145020
1,680.354980,680.510010,680.200012,680.510010,239244,-0.145020,-0.184998
2,680.169983,680.390015,680.140015,680.344971,180502,-0.184998,0.085693
3,680.255676,680.280029,680.130005,680.179993,126456,0.085693,-0.190674
4,680.065002,680.340027,679.919983,680.250000,170149,-0.190674,0.085022
...,...,...,...,...,...,...,...
2335,661.309998,661.679993,661.039978,661.289978,954101,0.020020,-0.309998
2336,661.000000,661.359985,660.940002,661.299988,510035,-0.309998,-0.150024
2337,660.849976,661.109985,660.770020,661.010010,1198968,-0.150024,-0.375000
2338,660.474976,661.034973,660.250000,660.840027,1648967,-0.375000,-0.334961


In [18]:
df.tail(10)

Price,Close,High,Low,Open,Volume,diff(),diff().shift(1)
2330,662.109985,662.195007,660.914978,660.919983,948814,1.195007,-0.159973
2331,661.950012,662.359985,661.75,662.119995,781559,-0.159973,-0.169983
2332,661.780029,662.210022,661.719971,661.960022,402700,-0.169983,-0.120056
2333,661.659973,661.890015,661.469971,661.789978,602486,-0.120056,-0.369995
2334,661.289978,661.909973,661.219971,661.669983,473179,-0.369995,0.02002
2335,661.309998,661.679993,661.039978,661.289978,954101,0.02002,-0.309998
2336,661.0,661.359985,660.940002,661.299988,510035,-0.309998,-0.150024
2337,660.849976,661.109985,660.77002,661.01001,1198968,-0.150024,-0.375
2338,660.474976,661.034973,660.25,660.840027,1648967,-0.375,-0.334961
2339,660.140015,660.75,660.0,660.47998,3597404,-0.334961,
