In [1]:
import yfinance as yf
import pandas as pd



In [2]:
def collect_single_stock_data(ticker: str, period: str = "5y", interval: str = "1wk"):
    """
    Collects historical stock data for a given ticker, period, and interval.

    Args:
        ticker (str): Stock ticker symbol.
        period (str): The period for the data (e.g., '1d', '5d', '1mo', '3mo', '6mo', '1y', '2y', '5y', '10y', 'ytd', 'max').
        interval (str): The interval for the data (e.g., '1m', '2m', '5m', '15m', '30m', '60m', '90m', '1h', '1d', '5d', '1wk', '1mo', '3mo').

    Returns:
        pandas.DataFrame: Historical stock data with the ticker column moved to the front.
    """

    stock_data = yf.Ticker(ticker).history(period=period, interval=interval)

    if stock_data.empty:
        raise ValueError(f"No data found for the given {ticker}")
    
    stock_data["Ticker"] = ticker

    return stock_data

print(collect_single_stock_data(ticker="NVDA", period="5d", interval="1d"))


                                 Open        High         Low       Close  \
Date                                                                        
2024-11-18 00:00:00-05:00  139.500000  141.550003  137.149994  140.149994   
2024-11-19 00:00:00-05:00  141.320007  147.130005  140.990005  147.009995   
2024-11-20 00:00:00-05:00  147.410004  147.559998  142.729996  145.889999   
2024-11-21 00:00:00-05:00  149.350006  152.889999  140.699997  146.669998   
2024-11-22 00:00:00-05:00  145.929993  147.160004  141.100006  141.949997   

                              Volume  Dividends  Stock Splits Ticker  
Date                                                                  
2024-11-18 00:00:00-05:00  221866000        0.0           0.0   NVDA  
2024-11-19 00:00:00-05:00  227834900        0.0           0.0   NVDA  
2024-11-20 00:00:00-05:00  309871700        0.0           0.0   NVDA  
2024-11-21 00:00:00-05:00  400946600        0.0           0.0   NVDA  
2024-11-22 00:00:00-05:00  2357722

In [3]:
def collect_multiple_stocks_data(tickers: list[str], period: str = "5y", interval: str = "1wk"):

    data = [collect_single_stock_data(ticker, period, interval) for ticker in tickers]

    return pd.concat(data)

In [4]:
def save_dataframe_to_csv(dataframe, filepath):
    dataframe.to_csv(filepath, index=False)

# customize this to our targets
tickers = ["AAPL", "MSFT", "GOOGL", "AMZN"]

stock_data = collect_multiple_stocks_data(tickers)

save_dataframe_to_csv(stock_data, "stock_data.csv")

In [14]:
# summary of the stock data
print("there are", stock_data.shape[0], "rows and", stock_data.shape[1], "columns in the stock data")
print("the header values in the stock data are", stock_data.columns.values)
print(stock_data.head())


there are 1048 rows and 8 columns in the stock data
the header values in the stock data are ['Open' 'High' 'Low' 'Close' 'Volume' 'Dividends' 'Stock Splits' 'Ticker']
                                Open       High        Low      Close  \
Date                                                                    
2019-11-18 00:00:00-05:00  63.644112  63.787110  63.219963  63.447792   
2019-11-25 00:00:00-05:00  63.673195  64.955338  63.622299  64.773560   
2019-12-02 00:00:00-05:00  64.778389  65.682434  62.117164  65.612144   
2019-12-09 00:00:00-05:00  65.440048  66.724609  64.206383  66.688255   
2019-12-16 00:00:00-05:00  67.136658  68.506050  67.131813  67.728043   

                              Volume  Dividends  Stock Splits Ticker  
Date                                                                  
2019-11-18 00:00:00-05:00   65325200        0.0           0.0   AAPL  
2019-11-25 00:00:00-05:00  301081200        0.0           0.0   AAPL  
2019-12-02 00:00:00-05:00  456599200 