In [1]:
import numpy as np 
import pandas as pd
from pandas.plotting import lag_plot
import yfinance as yf
import matplotlib.pyplot as plt
import statsmodels.api as sm
from curl_cffi import requests # pip install curl_cffi
from sklearn.metrics import confusion_matrix # pip install scikit-learn

TICKER = "SPY"

INTERVAL = "1d"
if INTERVAL == "1h":
    PERIOD = "730d"
else: 
    PERIOD = "max"

SHIFT = 1 
LOOKBACK = 10000

STRATEGY = ["Close", "High", "Low", "Open","Volume"]

SESSION = requests.Session(impersonate="chrome")

def get_data(ticker = TICKER, session = SESSION, interval = INTERVAL, lookback = LOOKBACK):

    df = yf.download(ticker, session = session, interval = interval, period = PERIOD)
    df.columns = df.columns.get_level_values(0)
    df = df.reset_index(drop=True)

    return df.iloc[-lookback:, :]

# Add Features
# Add Target 
def add_target(df, shift = SHIFT):

    df[f"Close + {shift}"] = df["Close"].shift(-shift)
    df["Target"] = (df[f"Close + {shift}"] > df["Close"]) * 1

    return df

# --- (5) Generate Regression Output ---
def generate_regression_output(df, features=STRATEGY, target="Target"):

    subset = df[features + [target]].dropna()
    print(subset.columns)
    X = subset[features] 
    y = subset[target] 
    X_with_const = sm.add_constant(X)
    
    model = sm.Logit(y, X_with_const).fit() 
    print(model.summary())

    return df


# Validate Regression Output

def main():

    df = get_data()
    df = add_target(df)
    df = generate_regression_output(df)

    return df 

df = main()
df


YF.download() has changed argument auto_adjust default to True


[*********************100%***********************]  1 of 1 completed

Index(['Close', 'High', 'Low', 'Open', 'Volume', 'Target'], dtype='object', name='Price')
Optimization terminated successfully.
         Current function value: 0.689134
         Iterations 4
                           Logit Regression Results                           
Dep. Variable:                 Target   No. Observations:                 8277
Model:                          Logit   Df Residuals:                     8271
Method:                           MLE   Df Model:                            5
Date:                Tue, 16 Dec 2025   Pseudo R-squ.:               0.0009346
Time:                        19:25:10   Log-Likelihood:                -5704.0
converged:                       True   LL-Null:                       -5709.3
Covariance Type:            nonrobust   LLR p-value:                   0.05829
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
const          0.1




Price,Close,High,Low,Open,Volume,Close + 1,Target
0,24.313028,24.330321,24.209274,24.330321,1003200,24.485958,1
1,24.485958,24.485958,24.330327,24.330327,480500,24.537836,1
2,24.537836,24.555128,24.416790,24.468667,201300,24.797209,1
3,24.797209,24.814501,24.555116,24.572408,529400,24.900982,1
4,24.900982,24.952859,24.607012,24.883690,531500,24.883701,0
...,...,...,...,...,...,...,...
8272,683.039978,685.390015,682.590027,683.150024,58310100,687.570007,1
8273,687.570007,688.969971,681.309998,682.559998,85671300,689.169983,1
8274,689.169983,689.250000,682.169983,685.140015,86173700,681.760010,0
8275,681.760010,688.880005,679.169983,688.169983,113160300,680.729980,0
