In [24]:
import sys
import yfinance as yf
import pandas as pd
import matplotlib.pyplot as plt

In [25]:
def get_data(ticker: str, interval: str = '1d', lookback_days: int = 756) -> pd.DataFrame:
    """
    Loads historical market data using yfinance.
    ticker (str): Stock or ETF symbol (e.g., 'SPY')
    interval (str): Data interval ('1d', '1h', '1m', etc.)
    lookback_days (int): Number of trading days to retrieve (capped at 7 if interval is one minute)

    Returns: pd.DataFrame: DataFrame downloaded from yfinance
    """
    if interval in ['1d', '1h']:
        period = str(lookback_days)+"d"
    elif interval == '1m':
        period = "7d"  
    else:
        raise ValueError("Unsupported interval for free data")
    df = yf.download(ticker, period=period, interval=interval, progress=False)
    df.columns = [col[0] if isinstance(col, tuple) else col for col in df.columns]
    df = df.dropna()
    df = df.rename(columns=str.lower)
    df.index.name = 'datetime'

    return df

In [26]:
df = get_data("SPY","1d",756)
df.info()
print(df.head())
print(df.describe())

  df = yf.download(ticker, period=period, interval=interval, progress=False)


<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 756 entries, 2022-08-18 to 2025-08-22
Data columns (total 5 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   close   756 non-null    float64
 1   high    756 non-null    float64
 2   low     756 non-null    float64
 3   open    756 non-null    float64
 4   volume  756 non-null    int64  
dtypes: float64(4), int64(1)
memory usage: 35.4 KB
                 close        high         low        open    volume
datetime                                                            
2022-08-18  410.099396  410.789432  407.808752  409.112193  49023200
2022-08-19  404.588501  407.578775  403.706739  407.310417  68016900
2022-08-22  396.163910  399.882594  395.253397  399.710055  77695600
2022-08-23  395.205536  398.147900  394.649634  395.732657  49105200
2022-08-24  396.470673  397.850774  394.285470  394.975507  49177800
            close        high         low        open        volume
count  756.00000

In [29]:
#feature engineering

# change from previous close (close - close the day before) -->float
df['change'] = df['close'].diff()

#% change from day previous (change / close)*100 -->float
df['percent_change'] = (df['change'] / df['close'].shift(1))*100


In [30]:
df.info()
print(df.tail())

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 756 entries, 2022-08-18 to 2025-08-22
Data columns (total 7 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   close           756 non-null    float64
 1   high            756 non-null    float64
 2   low             756 non-null    float64
 3   open            756 non-null    float64
 4   volume          756 non-null    int64  
 5   change          755 non-null    float64
 6   percent_change  755 non-null    float64
dtypes: float64(6), int64(1)
memory usage: 47.2 KB
                 close        high         low        open    volume  \
datetime                                                               
2025-08-18  643.299988  644.000000  642.179993  642.859985  43804900   
2025-08-19  639.809998  644.109985  638.479980  643.119995  69750700   
2025-08-20  638.109985  639.659973  632.950012  639.400024  88890300   
2025-08-21  635.549988  637.969971  633.809998  636.280029  5

In [31]:
#these changes allow us to use these features as guidence to our trading algorithms
#maybe under certain conditions, e.g percentage change drop is more than -2%, then buy... etc etc