In [1]:
# import all libraries from requirements.txt
import yfinance as yf
import polars as pl
import json

from pydantic import BaseModel
from typing import Optional, List, Dict, Any
from requests import Session
from requests_cache import CacheMixin, SQLiteCache
from requests_ratelimiter import LimiterMixin, MemoryQueueBucket
from pyrate_limiter import Duration, RequestRate, Limiter



In [2]:
class CachedLimiterSession(CacheMixin, LimiterMixin, Session):
    pass


def create_custom_session() -> CachedLimiterSession:
    """
    Create a custom requests session with caching and rate limiting.

    Returns
    -------
    CachedLimiterSession
        A custom requests session configured with a rate limiter allowing a maximum
        of 2 requests per 5 seconds. The session uses an in-memory queue bucket
        (MemoryQueueBucket) for efficient request tracking and a SQLite cache
        (SQLiteCache) named "yfinance.cache" for response caching.

    Notes
    -----
    1. Rationale
        The function initializes a CachedLimiterSession with a rate limiter
        allowing controlled access to external resources, preventing abuse and
        ensuring a smooth flow of requests. The choice of MemoryQueueBucket
        enhances performance by using an in-memory queue for tracking requests.
        Additionally, the use of an SQLite cache named "yfinance.cache" enables
        response caching, reducing redundant requests and improving overall
        efficiency.

    2. Implementation Details
        - The rate limiter is configured with a maximum of 2 requests per 5 seconds.
        - The bucket class is set to MemoryQueueBucket for efficient request tracking.
        - The cache backend is configured with an SQLiteCache named "yfinance.cache".

    """
    session = CachedLimiterSession(
        limiter=Limiter(
            RequestRate(2, Duration.SECOND * 5)
        ),  # max 2 requests per 5 seconds
        bucket_class=MemoryQueueBucket,
        backend=SQLiteCache("yfinance.cache"),
    )
    return session

In [3]:
# Define a Pydantic model for the stock price data
class StockPriceData(BaseModel):
    date: str
    closing_price: float
    returns: Optional[float]
    holding_period_yield: Optional[float]
    holding_period_return: Optional[float]
    portfolio_of_1000: Optional[float]


# Define a Pydantic model for the stock data
class StockData(BaseModel):
    """
    Pydantic model for representing stock data.

    Notes
    -----
    1. Rationale
        This Pydantic model defines the structure for representing stock data,
        consisting of a stock ticker symbol and a list of associated stock price data.

    """
    ticker: str
    stock_price_data: List[StockPriceData]


def fetch_stock_info(ticker: str, session: Session) -> yf.Ticker:
    """
    Fetch stock information for the given ticker using a custom requests session.

    Parameters
    ----------
    ticker: str
        The stock ticker symbol.
    session: Session
        The custom requests session configured for fetching stock information.

    Returns
    -------
    yf.Ticker:
        The stock information.

    Notes
    -----
    1. Rationale
        This function utilizes the Yahoo Finance API (yf.Ticker) to fetch detailed
        information for the specified stock ticker using the provided custom requests session.
        The use of a custom session allows for enhanced control over requests and efficient
        handling of stock-related data.

    """
    return yf.Ticker(ticker, session=session)


def get_stock_price(stock_info: yf.Ticker, period: str) -> pl.DataFrame:
    """
    Get stock price data for the given stock information and period.

    Parameters
    ----------
    stock_info: yf.Ticker
        The stock information.
    period: str
        The period for which to retrieve the stock data (e.g., '1d', '1mo', '1y').

    Returns
    -------
    pl.DataFrame
        The stock price data.

    Notes
    -----
    1. Rationale
        This function retrieves historical stock price data for the specified stock information
        and period, utilizing the Yahoo Finance API. The returned data is structured as a pandas
        DataFrame for further analysis and visualization.

    """
    return stock_info.history(period=period)


def clean_stock_price(stock_price: pl.DataFrame) -> pl.DataFrame:
    """
    Clean the raw stock price data and enhance it with additional calculated metrics.

    Parameters
    ----------
    stock_price: pl.DataFrame
        The raw stock price data.

    Returns
    -------
    pl.DataFrame
        The cleaned stock price data with additional calculated metrics.

    Notes
    -----
    1. Rationale
        This function takes raw stock price data and performs data cleaning operations,
        including selecting relevant columns, calculating returns, and deriving additional
        metrics such as holding period yield, holding period return, and portfolio value based on
        a hypothetical initial investment of $1000.

    """
    stock_price_clean = stock_price.loc[:, ["Close"]]
    stock_price_clean["closing_price"] = stock_price_clean["Close"]
    stock_price_clean["date"] = stock_price_clean.index.strftime("%Y-%m-%d %H:%M:%S%z")
    stock_price_clean["returns"] = stock_price_clean["Close"].pct_change()
    stock_price_clean["holding_period_yield"] = (
        stock_price_clean["Close"] / stock_price_clean["Close"].shift(1) - 1
    )
    stock_price_clean["holding_period_return"] = stock_price_clean[
        "Close"
    ] / stock_price_clean["Close"].shift(1)
    stock_price_clean["portfolio_of_1000"] = (
        1000 * stock_price_clean["holding_period_return"].cumprod()
    )
    stock_price_clean.index = stock_price_clean.index.strftime("%Y-%m-%d %H:%M:%S%z")
    return stock_price_clean


def get_stock_data(ticker: str, period: str) -> StockData:
    """
    Get stock data for the given ticker and period.

    Parameters
    ----------
    ticker: str
        The stock ticker symbol.
    period: str
        The period for which to retrieve the stock data (e.g., '1d', '1mo', '1y').

    Returns
    -------
    StockData
        The stock data containing the specified stock ticker symbol and associated
        cleaned stock price data.

    Notes
    -----
    1. Rationale
        This function orchestrates the retrieval and processing of stock data for the
        specified stock ticker and period. It utilizes a custom requests session,
        fetches stock information, obtains historical stock price data, cleans the data,
        and structures it using the StockData Pydantic model.

    2. Implementation Details
        - A custom requests session is created using create_custom_session().
        - Stock information is fetched using fetch_stock_info().
        - Historical stock price data is obtained using get_stock_price().
        - The raw stock price data is cleaned and additional metrics are calculated using clean_stock_price().
        - The cleaned data is structured into a StockData Pydantic model.

    """
    session = create_custom_session()
    stock_info = fetch_stock_info(ticker, session)
    stock_price = get_stock_price(stock_info, period)
    stock_price_clean = clean_stock_price(stock_price)
    stock_price_data = [
        StockPriceData(**data) for data in stock_price_clean.to_dict("records")
    ]
    return StockData(ticker=ticker, stock_price_data=stock_price_data)

In [1]:
# Define a Pydantic model for the size data
class SizeData(BaseModel):
    """
    Pydantic model representing size-related data for a company.

    Attributes
    ----------
    total_revenue (Optional[int]):
        Total revenue of the company (if available).
    total_assets (Optional[int]):
        Total assets of the company (if available).
    number_of_employees (Optional[int]):
        Number of employees in the company (if available).

    Notes
    -----
    1. Rationale
        This Pydantic model structures size-related data for a company, capturing
        key metrics such as total revenue, total assets, and the number of employees.
        The use of optional fields accommodates cases where specific data points may be
        unavailable.

    """
    total_revenue: Optional[int]
    total_assets: Optional[int]
    number_of_employees: Optional[int]


# Define a Pydantic model for the financial report data
class FinancialReportData(BaseModel):
    """
    Pydantic model representing financial report data for a company.

    Attributes
    ----------
    year (Optional[int]):
        The year for which the financial report is available (if available).
    financial_report (Dict[str, Any]):
        Dictionary containing the financial report data.
    financial_ratio (Dict[str, Any]):
        Dictionary containing financial ratios (placeholder for actual calculations).

    Notes
    -----
    1. Rationale
        This Pydantic model structures financial report data for a company, including the
        reporting year, detailed financial report, and a placeholder for financial ratios.
        Optional fields are used to handle cases where specific data points may be unavailable.

    """
    year: Optional[int]
    financial_report: Dict[str, Any]
    financial_ratio: Dict[str, Any]


# Define a Pydantic model for the fundamentals data
class FundamentalsData(BaseModel):
    """
    Pydantic model representing fundamentals data for a company.

    Attributes
    ----------
    stock_code (str):
        The stock code or ticker symbol.
    industry_classification (str):
        The industry classification of the company.
    geography (str):
        The geographical location of the company.
    size (SizeData):
        Size-related data for the company.
    financial_report (FinancialReportData):
        Financial report data for the company.

    Notes
    -----
    1. Rationale
        This Pydantic model structures comprehensive fundamentals data for a company,
        incorporating stock code, industry classification, geographical location, size-related
        data, and financial report data. Nested models SizeData and FinancialReportData are
        used for a well-organized representation.

    """
    stock_code: str
    industry_classification: str
    geography: str
    size: SizeData
    financial_report: FinancialReportData


def get_raw_data(stock_info: yf.Ticker) -> Dict[str, Any]:
    """
    Get raw data for the given stock information.

    Parameters
    ----------
    stock_info (yf.Ticker):
        The stock information.

    Returns
    --------
    Dict[str, Any]:
        A dictionary containing industry classification, geography, and size-related data.

    Notes
    -----
    1. Rationale
        This function extracts raw data from the provided stock information, including
        industry classification, geography, and size-related metrics. The data is
        structured into a dictionary for further processing.

    2. Implementation Details:
        - Industry classification and geography are obtained from stock_info.info.
        - Size-related metrics (total revenue, total assets, number of employees) are
          extracted and placed in a SizeData Pydantic model.

    """
    return {
        "industry_classification": stock_info.info.get("industry", "N/A"),
        "geography": stock_info.info.get("country", "N/A"),
        "size": SizeData(
            total_revenue=stock_info.info.get("total_revenue"),
            total_assets=stock_info.info.get("total_assets"),
            number_of_employees=stock_info.info.get("number_of_employees"),
        ),
    }


def get_financial_report(stock_info: yf.Ticker) -> FinancialReportData:
    """
    Get the financial report for the given stock information.

    Parameters
    ----------
    stock_info (yf.Ticker):
        The stock information.

    Returns
    --------
    FinancialReportData:
        The financial report data.

    Notes
    -----
    1. Rationale
        This function retrieves the financial report for the provided stock information,
        including detailed financial data and a placeholder for financial ratios. The
        data is structured into a FinancialReportData Pydantic model.

    2. Implementation Details:
        - Financial report data is obtained from stock_info.financials.
        - The latest year's financial report is extracted and placed in a dictionary.
        - Placeholder financial ratios (e.g., liquidity ratio) are included.

    """
    financials = stock_info.financials
    if not financials.empty:
        latest_year = financials.columns[0].year
        financial_report = financials[latest_year].to_dict()
    else:
        latest_year = "N/A"
        financial_report = {}

    # Placeholder for actual ratio calculations
    liquidity_ratio = {}  # Replace with actual calculations

    return FinancialReportData(
        year=latest_year,
        financial_report=financial_report,
        financial_ratio={"liquidity_ratio": liquidity_ratio},
    )


def get_stock_fundamentals(ticker: str) -> FundamentalsData:
    """
    Get the fundamentals data for the given stock ticker.

    Parameters:
    ticker (str):
        The stock ticker symbol.

    Returns:
    FundamentalsData:
        The fundamentals data.

    Rationale:
        This function orchestrates the retrieval of fundamentals data for a given stock
        ticker. It utilizes a custom requests session, extracts raw data, obtains the
        financial report, and structures the data into a FundamentalsData Pydantic model.

    Implementation Details:
        - A custom requests session is created using create_custom_session().
        - Stock information is fetched using fetch_stock_info().
        - Raw data is obtained using get_raw_data().
        - Financial report data is obtained using get_financial_report().
        - Fundamentals data is structured into a FundamentalsData Pydantic model.

    """
    session = create_custom_session()
    stock_info = fetch_stock_info(ticker, session)
    raw_data = get_raw_data(stock_info)
    financial_report = get_financial_report(stock_info)

    return FundamentalsData(
        stock_code=ticker,
        industry_classification=raw_data["industry_classification"],
        geography=raw_data["geography"],
        size=raw_data["size"],
        financial_report=financial_report,
    )

NameError: name 'BaseModel' is not defined

In [4]:
class SizeData(BaseModel):
    """
    Pydantic model representing size-related data for a company.

    Attributes:
    total_revenue (Optional[int]):
        Total revenue of the company (if available).
    total_assets (Optional[int]):
        Total assets of the company (if available).
    number_of_employees (Optional[int]):
        Number of employees in the company (if available).

    Rationale:
        This Pydantic model structures size-related data for a company, capturing
        key metrics such as total revenue, total assets, and the number of employees.
        The use of optional fields accommodates cases where specific data points may be
        unavailable.

    """

def read_ticker_symbols(file_path, ticker_column):
    """
    Read ticker symbols from a CSV file.

    Parameters
    ----------
    file_path: str
        The path to the CSV file.
    ticker_column: str
        The header of the column containing the ticker symbols.

    Returns
    -------
    list
        A list of ticker symbols extracted from the specified column in the CSV file.

    Notes
    -----
    1. Rationale
    This function reads ticker symbols from a CSV file, providing flexibility in
    specifying the file path and the column header containing the ticker symbols.
    The function uses polars to efficiently handle large CSV files and extract
    the desired information.

    2. Implementation Details
    - The CSV file is read into a LazyFrame using pl.scan_csv().
    - The LazyFrame is materialized into a DataFrame using df.collect().
    - Ticker symbols are extracted from the specified column and returned as a list.

    """
    # Read the CSV file
    df = pl.scan_csv(file_path)

    # Materialize the LazyFrame to a DataFrame
    df = df.collect()

    # Extract the ticker symbols
    ticker_symbols = df[ticker_column].to_list()

    return ticker_symbols

In [6]:
if __name__ == "__main__":
    """
    Execute data retrieval and processing for a list of stock tickers.

    Rationale:
        This script serves as the main entry point for retrieving and processing stock
        data for a list of ticker symbols. It reads ticker symbols from a CSV file, gets
        stock data for each ticker, and logs the results as a prettified JSON.

    Implementation Details:
        - Ticker symbols are read from the specified CSV file using read_ticker_symbols().
        - For each ticker, stock data is obtained using get_stock_data() with a predefined period.
        - The retrieved data is appended to the stock_ticker_list.
        - The final stock_ticker_list is logged as a prettified JSON.

    """
    # Read the ticker symbols from the CSV file
    # path = 'https://raw.githubusercontent.com/datasets/s-and-p-500-companies/main/data/constituents.csv'
    path = "../data/test.csv"
    ticker_symbols = read_ticker_symbols(file_path=path, ticker_column="Symbol")
    print(ticker_symbols)

    stock_ticker_list = []

    for ticker in ticker_symbols:
        data = get_stock_data(ticker=ticker, period="1mo")
        # Append the data to the stock_ticker_list
        stock_ticker_list.append(data)

    # Log stock_ticker_list as a prettified JSON
    stock_ticker_list_json = json.dumps(
        [data.dict() for data in stock_ticker_list], indent=2
    )
    print(stock_ticker_list_json)

['MMM', 'AOS', 'ABT', 'ABBV']


  df.index += _pd.TimedeltaIndex(dst_error_hours, 'h')
  df.index += _pd.TimedeltaIndex(dst_error_hours, 'h')


[
  {
    "ticker": "MMM",
    "stock_price_data": [
      {
        "date": "2023-12-27 00:00:00-0500",
        "closing_price": 108.73999786376953,
        "returns": NaN,
        "holding_period_yield": NaN,
        "holding_period_return": NaN,
        "portfolio_of_1000": NaN
      },
      {
        "date": "2023-12-28 00:00:00-0500",
        "closing_price": 109.69000244140625,
        "returns": 0.008736477803015008,
        "holding_period_yield": 0.008736477803015008,
        "holding_period_return": 1.008736477803015,
        "portfolio_of_1000": 1008.736477803015
      },
      {
        "date": "2023-12-29 00:00:00-0500",
        "closing_price": 109.31999969482422,
        "returns": -0.0033731674568945325,
        "holding_period_yield": -0.0033731674568945325,
        "holding_period_return": 0.9966268325431055,
        "portfolio_of_1000": 1005.3338407435075
      },
      {
        "date": "2024-01-02 00:00:00-0500",
        "closing_price": 110.0,
        "returns": 

  df.index += _pd.TimedeltaIndex(dst_error_hours, 'h')
  df.index += _pd.TimedeltaIndex(dst_error_hours, 'h')
/var/folders/gq/v8lzr0jd1bl_3c9_b1_h5zww0000gn/T/ipykernel_2208/419013069.py:32: PydanticDeprecatedSince20: The `dict` method is deprecated; use `model_dump` instead. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.5/migration/
  [data.dict() for data in stock_ticker_list], indent=2
