In [6]:
import yfinance as yf
import pandas as pd

In [7]:
def collect_single_stock_data(ticker: str, period: str = "5y", interval: str = "1wk"):
    """
    Collects historical stock data for a given ticker, period, and interval.

    Args:
        ticker (str): Stock ticker symbol.
        period (str): The period for the data (e.g., '1d', '5d', '1mo', '3mo', '6mo', '1y', '2y', '5y', '10y', 'ytd', 'max').
        interval (str): The interval for the data (e.g., '1m', '2m', '5m', '15m', '30m', '60m', '90m', '1h', '1d', '5d', '1wk', '1mo', '3mo').

    Returns:
        pandas.DataFrame: Historical stock data with the ticker column moved to the front.
    """

    stock_data = yf.Ticker(ticker).history(period=period, interval=interval)

    if stock_data.empty:
        raise ValueError(f"No data found for the given {ticker}")
    
    # stock_data["Ticker"] = ticker

    return stock_data

print(collect_single_stock_data(ticker="NVDA", period="5d", interval="1d"))


                                 Open        High         Low       Close  \
Date                                                                        
2024-11-26 00:00:00-05:00  137.699997  139.300003  135.669998  136.919998   
2024-11-27 00:00:00-05:00  135.009995  137.220001  131.800003  135.339996   
2024-11-29 00:00:00-05:00  136.779999  139.350006  136.050003  138.250000   
2024-12-02 00:00:00-05:00  138.830002  140.449997  137.820007  138.630005   
2024-12-03 00:00:00-05:00  138.315002  140.539993  137.949997  139.838898   

                              Volume  Dividends  Stock Splits  
Date                                                           
2024-11-26 00:00:00-05:00  190287700        0.0           0.0  
2024-11-27 00:00:00-05:00  226370900        0.0           0.0  
2024-11-29 00:00:00-05:00  141863200        0.0           0.0  
2024-12-02 00:00:00-05:00  171682800        0.0           0.0  
2024-12-03 00:00:00-05:00  118559245        0.0           0.0  


In [8]:
def collect_multiple_stocks_data(tickers: list[str], period: str = "5y", interval: str = "1wk"):

    data = [collect_single_stock_data(ticker, period, interval) for ticker in tickers]

    return pd.concat(data)

In [9]:
def save_dataframe_to_csv(dataframe, filepath):
    dataframe.to_csv(filepath, index=False)

# customize this to our targets
tickers = ["COST"]
# tickers = ["AAPL", "MSFT", "GOOGL", "AMZN"]

stock_data = collect_multiple_stocks_data(tickers, period="5y", interval="1d")

save_dataframe_to_csv(stock_data, "stock_data.csv")

In [10]:
# summary of the stock data
print("there are", stock_data.shape[0], "rows and", stock_data.shape[1], "columns in the stock data")
print("the header values in the stock data are", stock_data.columns.values)
print(stock_data.head())


there are 1259 rows and 7 columns in the stock data
the header values in the stock data are ['Open' 'High' 'Low' 'Close' 'Volume' 'Dividends' 'Stock Splits']
                                 Open        High         Low       Close  \
Date                                                                        
2019-12-03 00:00:00-05:00  271.233391  272.207561  269.000154  271.729675   
2019-12-04 00:00:00-05:00  272.069717  273.099027  271.251767  272.510834   
2019-12-05 00:00:00-05:00  273.071427  273.071427  268.816313  269.367737   
2019-12-06 00:00:00-05:00  271.095532  272.023734  270.433830  271.067963   
2019-12-09 00:00:00-05:00  271.113938  273.126617  270.700363  271.260986   

                            Volume  Dividends  Stock Splits  
Date                                                         
2019-12-03 00:00:00-05:00  1921400        0.0           0.0  
2019-12-04 00:00:00-05:00  1742000        0.0           0.0  
2019-12-05 00:00:00-05:00  1941100        0.0         