In [6]:
import yfinance as yf
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import statsmodels.api as sm

In [7]:
TICKER = 'SPY'
INTERVAL='1d'

# set period based on interval
if INTERVAL == '1h':
    PERIOD = '730d'
else:
    PERIOD = 'max'

SHIFT = 1

# what subsetion of that data are you interested in
LOOKBACK = 10000

STRATEGY = ['Open', 'High', 'Low', 'Close', 'Volume']

def get_data(ticker=TICKER, lookback=LOOKBACK, interval=INTERVAL):

    # get data at interval you want
    df = yf.download(ticker, interval=interval, auto_adjust=False, period=PERIOD)
    df.columns = df.columns.get_level_values(0)

    # reset the index to make plots prettier
    df = df.reset_index(drop=True)

    # only return the subset of data you are interested in
    return df.iloc[-lookback:, :]


# define the target variable (also called dependent variable, or y)
def add_target(df, shift=SHIFT):

    # what is the close price SHIFT days from now?
    df[f'Close + {shift}'] = df['Close'].shift(-shift)

    # what is the change in close price SHIFT days from now?
    df['Target'] = (df[f'Close + {shift}'] >  df['Close']) * 1

    return df

def generate_regression_output(df, features=STRATEGY, target='Target'):
    subset = df[features + [target]].dropna()

    X = subset[features]
    y = subset[target]

    X_with_const = sm.add_constant(X)
    model = sm.Logit(y, X_with_const).fit()

    # Console summary
    print(model.summary())

    return df

def main():
    df = get_data()
    df = add_target(df)
    df = generate_regression_output(df)

    return df

main()

[*********************100%***********************]  1 of 1 completed

Optimization terminated successfully.
         Current function value: 0.689838
         Iterations 4
                           Logit Regression Results                           
Dep. Variable:                 Target   No. Observations:                 8163
Model:                          Logit   Df Residuals:                     8157
Method:                           MLE   Df Model:                            5
Date:                Fri, 04 Jul 2025   Pseudo R-squ.:                0.001020
Time:                        07:29:18   Log-Likelihood:                -5631.1
converged:                       True   LL-Null:                       -5636.9
Covariance Type:            nonrobust   LLR p-value:                   0.04237
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
const          0.0769      0.043      1.774      0.076      -0.008       0.162
Open          -0.0256      0.




Price,Adj Close,Close,High,Low,Open,Volume,Close + 1,Target
0,24.380444,43.937500,43.968750,43.750000,43.968750,1003200,44.250000,1
1,24.553854,44.250000,44.250000,43.968750,43.968750,480500,44.343750,1
2,24.605873,44.343750,44.375000,44.125000,44.218750,201300,44.812500,1
3,24.865978,44.812500,44.843750,44.375000,44.406250,529400,45.000000,1
4,24.970007,45.000000,45.093750,44.468750,44.968750,531500,44.968750,0
...,...,...,...,...,...,...,...,...
8158,614.909973,614.909973,616.390015,610.830017,612.880005,86258400,617.849976,1
8159,617.849976,617.849976,619.219971,615.039978,617.380005,92502500,617.650024,0
8160,617.650024,617.650024,618.830017,615.520020,616.359985,70030100,620.450012,1
8161,620.450012,620.450012,620.489990,616.609985,617.239990,66510400,625.340027,1
