In [None]:
import yfinance as yf
import pandas as pd

In [2]:
def collect_single_stock_data(ticker: str, period: str = "5y", interval: str = "1wk"):
    """
    Collects historical stock data for a given ticker, period, and interval.

    Args:
        ticker (str): Stock ticker symbol.
        period (str): The period for the data (e.g., '1d', '5d', '1mo', '3mo', '6mo', '1y', '2y', '5y', '10y', 'ytd', 'max').
        interval (str): The interval for the data (e.g., '1m', '2m', '5m', '15m', '30m', '60m', '90m', '1h', '1d', '5d', '1wk', '1mo', '3mo').

    Returns:
        pandas.DataFrame: Historical stock data with the ticker column moved to the front.
    """

    stock_data = yf.Ticker(ticker).history(period=period, interval=interval)

    if stock_data.empty:
        raise ValueError(f"No data found for the given {ticker}")
    
    stock_data["Ticker"] = ticker

    return stock_data

print(collect_single_stock_data(ticker="NVDA", period="5d", interval="1d"))


                                 Open        High         Low       Close  \
Date                                                                        
2024-11-21 00:00:00-05:00  149.350006  152.889999  140.699997  146.669998   
2024-11-22 00:00:00-05:00  145.929993  147.160004  141.100006  141.949997   
2024-11-25 00:00:00-05:00  141.990005  142.050003  135.820007  136.020004   
2024-11-26 00:00:00-05:00  137.699997  139.300003  135.669998  136.919998   
2024-11-27 00:00:00-05:00  135.009995  135.419998  131.899994  135.339996   

                              Volume  Dividends  Stock Splits Ticker  
Date                                                                  
2024-11-21 00:00:00-05:00  400946600        0.0           0.0   NVDA  
2024-11-22 00:00:00-05:00  236406200        0.0           0.0   NVDA  
2024-11-25 00:00:00-05:00  344941900        0.0           0.0   NVDA  
2024-11-26 00:00:00-05:00  190287700        0.0           0.0   NVDA  
2024-11-27 00:00:00-05:00  2239338

In [3]:
def collect_multiple_stocks_data(tickers: list[str], period: str = "5y", interval: str = "1wk"):

    data = [collect_single_stock_data(ticker, period, interval) for ticker in tickers]

    return pd.concat(data)

In [4]:
def save_dataframe_to_csv(dataframe, filepath):
    dataframe.to_csv(filepath, index=False)

# customize this to our targets
tickers = ["NVDA"]
# tickers = ["AAPL", "MSFT", "GOOGL", "AMZN"]

stock_data = collect_multiple_stocks_data(tickers)

save_dataframe_to_csv(stock_data, "stock_data.csv")

In [5]:
# summary of the stock data
print("there are", stock_data.shape[0], "rows and", stock_data.shape[1], "columns in the stock data")
print("the header values in the stock data are", stock_data.columns.values)
print(stock_data.head())


there are 262 rows and 8 columns in the stock data
the header values in the stock data are ['Open' 'High' 'Low' 'Close' 'Volume' 'Dividends' 'Stock Splits' 'Ticker']
                               Open      High       Low     Close      Volume  \
Date                                                                            
2019-11-25 00:00:00-05:00  5.440290  5.474622  5.368389  5.392273   357340000   
2019-12-02 00:00:00-05:00  5.389279  5.402724  4.988681  5.282469  1527812000   
2019-12-09 00:00:00-05:00  5.259564  5.700994  5.250850  5.576756  1561116000   
2019-12-16 00:00:00-05:00  5.624311  5.985073  5.596177  5.959678  2034124000   
2019-12-23 00:00:00-05:00  5.988309  6.020427  5.871042  5.897434   931420000   

                           Dividends  Stock Splits Ticker  
Date                                                       
2019-11-25 00:00:00-05:00      0.004           0.0   NVDA  
2019-12-02 00:00:00-05:00      0.000           0.0   NVDA  
2019-12-09 00:00:00-05:00 