In [19]:
# import all libraries from requirements.txt
import yfinance as yf
import polars as pl
import json

from pydantic import BaseModel
from typing import Optional, List
from requests import Session
from requests_cache import CacheMixin, SQLiteCache
from requests_ratelimiter import LimiterMixin, MemoryQueueBucket
from pyrate_limiter import Duration, RequestRate, Limiter

In [20]:
class CachedLimiterSession(CacheMixin, LimiterMixin, Session):
    pass


def create_custom_session() -> CachedLimiterSession:
    """
    This function creates a custom requests session with caching and rate limiting.

    Returns:
    CachedLimiterSession: A custom requests session.
    """
    session = CachedLimiterSession(
        limiter=Limiter(
            RequestRate(2, Duration.SECOND * 5)
        ),  # max 2 requests per 5 seconds
        bucket_class=MemoryQueueBucket,
        backend=SQLiteCache("yfinance.cache"),
    )
    return session

In [21]:
# Define a Pydantic model for the stock price data
class StockPriceData(BaseModel):
    date: str
    closing_price: float
    returns: Optional[float]
    holding_period_yield: Optional[float]
    holding_period_return: Optional[float]
    portfolio_of_1000: Optional[float]


# Define a Pydantic model for the stock data
class StockData(BaseModel):
    ticker: str
    stock_price_data: List[StockPriceData]


def fetch_stock_info(ticker: str, session: Session) -> yf.Ticker:
    return yf.Ticker(ticker, session=session)


def get_stock_price(stock_info: yf.Ticker, period: str) -> pl.DataFrame:
    return stock_info.history(period=period)


def clean_stock_price(stock_price: pl.DataFrame) -> pl.DataFrame:
    stock_price_clean = stock_price.loc[:, ["Close"]]
    stock_price_clean["closing_price"] = stock_price_clean["Close"]
    stock_price_clean["date"] = stock_price_clean.index.strftime("%Y-%m-%d %H:%M:%S%z")
    stock_price_clean["returns"] = stock_price_clean["Close"].pct_change()
    stock_price_clean["holding_period_yield"] = (
        stock_price_clean["Close"] / stock_price_clean["Close"].shift(1) - 1
    )
    stock_price_clean["holding_period_return"] = stock_price_clean[
        "Close"
    ] / stock_price_clean["Close"].shift(1)
    stock_price_clean["portfolio_of_1000"] = (
        1000 * stock_price_clean["holding_period_return"].cumprod()
    )
    stock_price_clean.index = stock_price_clean.index.strftime("%Y-%m-%d %H:%M:%S%z")
    return stock_price_clean


def get_stock_data(ticker: str, period: str) -> StockData:
    session = create_custom_session()
    stock_info = fetch_stock_info(ticker, session)
    stock_price = get_stock_price(stock_info, period)
    stock_price_clean = clean_stock_price(stock_price)
    stock_price_data = [
        StockPriceData(**data) for data in stock_price_clean.to_dict("records")
    ]
    return StockData(ticker=ticker, stock_price_data=stock_price_data)

In [37]:
# Define a Pydantic model for the size data
class SizeData(BaseModel):
    total_revenue: Optional[int]
    total_assets: Optional[int]
    number_of_employees: Optional[int]


# Define a Pydantic model for the financial report data
class FinancialReportData(BaseModel):
    year: Optional[int]
    financial_report: Dict[str, Any]
    financial_ratio: Dict[str, Any]


# Define a Pydantic model for the fundamentals data
class FundamentalsData(BaseModel):
    stock_code: str
    industry_classification: str
    geography: str
    size: SizeData
    financial_report: FinancialReportData


def get_raw_data(stock_info: yf.Ticker) -> Dict[str, Any]:
    return {
        "industry_classification": stock_info.info.get("industry", "N/A"),
        "geography": stock_info.info.get("country", "N/A"),
        "size": SizeData(
            total_revenue=stock_info.info.get("total_revenue"),
            total_assets=stock_info.info.get("total_assets"),
            number_of_employees=stock_info.info.get("number_of_employees"),
        ),
    }


def get_financial_report(stock_info: yf.Ticker) -> FinancialReportData:
    financials = stock_info.financials
    if not financials.empty:
        latest_year = financials.columns[0].year
        financial_report = financials[latest_year].to_dict()
    else:
        latest_year = "N/A"
        financial_report = {}

    # Placeholder for actual ratio calculations
    liquidity_ratio = {}  # Replace with actual calculations

    return FinancialReportData(
        year=latest_year,
        financial_report=financial_report,
        financial_ratio={"liquidity_ratio": liquidity_ratio},
    )


def get_stock_fundamentals(ticker: str) -> FundamentalsData:
    session = create_custom_session()
    stock_info = fetch_stock_info(ticker, session)
    raw_data = get_raw_data(stock_info)
    financial_report = get_financial_report(stock_info)

    return FundamentalsData(
        stock_code=ticker,
        industry_classification=raw_data["industry_classification"],
        geography=raw_data["geography"],
        size=raw_data["size"],
        financial_report=financial_report,
    )

SyntaxError: incomplete input (392434684.py, line 2)

In [22]:
def read_ticker_symbols(file_path, ticker_column):
    """
    This function reads the ticker symbols from a CSV file.

    Parameters:
    file_path (str): The path to the CSV file.
    ticker_column (str): The header of the column containing the ticker symbols.

    Returns:
    list: A list of ticker symbols.
    """
    # Read the CSV file
    df = pl.scan_csv(file_path)

    # Materialize the LazyFrame to a DataFrame
    df = df.collect()

    # Extract the ticker symbols
    ticker_symbols = df[ticker_column].to_list()

    return ticker_symbols

In [23]:
if __name__ == "__main__":
    # Read the ticker symbols from the CSV file
    # path = 'https://raw.githubusercontent.com/datasets/s-and-p-500-companies/main/data/constituents.csv'
    path = "data/test.csv"
    ticker_symbols = read_ticker_symbols(file_path=path, ticker_column="Symbol")
    print(ticker_symbols)

    stock_ticker_list = []

    for ticker in ticker_symbols:
        data = get_stock_data(ticker=ticker, period="1mo")
        # Append the data to the stock_ticker_list
        stock_ticker_list.append(data)

    # Log stock_ticker_list as a prettified JSON
    stock_ticker_list_json = json.dumps(
        [data.dict() for data in stock_ticker_list], indent=2
    )
    print(stock_ticker_list_json)

['MMM', 'AOS', 'ABT', 'ABBV']


  df.index += _pd.TimedeltaIndex(dst_error_hours, 'h')
  df.index += _pd.TimedeltaIndex(dst_error_hours, 'h')
  df.index += _pd.TimedeltaIndex(dst_error_hours, 'h')
  df.index += _pd.TimedeltaIndex(dst_error_hours, 'h')


[
  {
    "ticker": "MMM",
    "stock_price_data": [
      {
        "date": "2023-12-26 00:00:00-0500",
        "closing_price": 108.11000061035156,
        "returns": NaN,
        "holding_period_yield": NaN,
        "holding_period_return": NaN,
        "portfolio_of_1000": NaN
      },
      {
        "date": "2023-12-27 00:00:00-0500",
        "closing_price": 108.73999786376953,
        "returns": 0.005827372582196233,
        "holding_period_yield": 0.005827372582196233,
        "holding_period_return": 1.0058273725821962,
        "portfolio_of_1000": 1005.8273725821962
      },
      {
        "date": "2023-12-28 00:00:00-0500",
        "closing_price": 109.69000244140625,
        "returns": 0.008736477803015008,
        "holding_period_yield": 0.008736477803015008,
        "holding_period_return": 1.008736477803015,
        "portfolio_of_1000": 1014.6147610964256
      },
      {
        "date": "2023-12-29 00:00:00-0500",
        "closing_price": 109.31999969482422,
        "

/var/folders/gq/v8lzr0jd1bl_3c9_b1_h5zww0000gn/T/ipykernel_2156/2631876278.py:17: PydanticDeprecatedSince20: The `dict` method is deprecated; use `model_dump` instead. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.5/migration/
  [data.dict() for data in stock_ticker_list], indent=2
