In [42]:
%load_ext autoreload
%autoreload 2
from pathlib import Path
from trade.helpers.helper import CustomCache
from trade.helpers.helper_types import SingletonMetaClass
from trade.optionlib.config.defaults import DIVIDEND_LOOKBACK_YEARS, DIVIDEND_FORECAST_METHOD
from trade.optionlib.config.types import DiscreteDivGrowthModel, DivType
from trade.optionlib.assets.dividend import (
    get_vectorized_dividend_scehdule,
    vector_convert_to_time_frac,
    vectorized_discrete_pv,
    get_vectorized_dividend_rate,
    get_vectorized_continuous_dividends,
    get_vectorized_dividend_rate,
    get_div_histories,
    _dual_project_dividends,
    ScheduleEntry,
    Schedule,
    SECONDS_IN_YEAR,
    SECONDS_IN_DAY,
)
import os
from trade import HOLIDAY_SET
from trade.helpers.helper import is_USholiday, is_busday, to_datetime
from EventDriven.riskmanager.market_data import MarketTimeseries, AtIndexResult, TimeseriesData
import pandas as pd
from typing import Optional, Dict, Union, List, Iterable
from typing import overload, Literal
from trade.helpers.helper import compare_dates, get_missing_dates
from trade.helpers.Logging import setup_logger
from datetime import datetime
from pandas.tseries.offsets import BDay
from trade.optionlib.config.defaults import OPTION_TIMESERIES_START_DATE
from trade.helpers.decorators import cProfiler
from trade.helpers.helper import print_top_cprofile_stats
from trade.optionlib.assets.forward import (
    vectorized_discrete_pv,
    vectorized_forward_continuous,
    vectorized_forward_discrete,
    get_vectorized_continuous_dividends,
)

from trade.optionlib.vol.implied_vol import (
    vector_vol_estimation,
    bsm_vol_est_brute_force,
    estimate_crr_implied_volatility,
    crr_binomial_pricing
)

from trade.optionlib.pricing.binomial import vector_crr_binomial_pricing

from trade.optionlib.utils.batch_operation import vector_batch_processor
from trade.assets.rates import get_risk_free_rate_helper, _fetch_rates
import time
# from dbase.DataAPI.ThetaData import (
#     retrieve_bulk_eod,
#     retrieve_eod_ohlc,
#     retrieve_quote,
#     list_contracts,
# )

from dbase.DataAPI.ThetaData.v2 import (
    retrieve_bulk_eod,
    retrieve_eod_ohlc,
    retrieve_quote,
    list_contracts,
    quote_to_eod_patch,
    list_dates,
)

from trade.optionlib.utils.format import (
    assert_equal_length, 
    convert_to_array
)


from dbase.DataAPI.ThetaData.utils import _handle_opttick_param
from dbase.utils import default_timestamp
logger = setup_logger(__name__, stream_log_level="INFO")
DM_GEN_PATH = Path(os.getenv("GEN_CACHE_PATH")) / "dm_gen_cache"
TS = MarketTimeseries(_end=datetime.now())

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
2026-01-18 19:43:31 [test] trade.__init__ INFO: Signal function for `_on_exit_sanitize` added to signal number 15.
2026-01-18 19:43:31 [test] trade.__init__ INFO: Exit handler `_on_exit_sanitize` registered for normal program exit.


In [43]:
## Cache Key construction
from dataclasses import dataclass
from datetime import datetime, date, time, timezone
from enum import Enum
from hashlib import sha1
from typing import Any, Dict, Mapping, Optional, Tuple


class Interval(str, Enum):
    INTRADAY = "intraday"  # historical intraday timestamp
    EOD = "eod"  # end-of-day daily snapshot
    NA = "na"  # not applicable

class SeriesId(str, Enum):
    HIST = "hist"
    AT_TIME = "at_time"
    SNAPSHOT = "snapshot"

In [None]:
class ArtifactType(str, Enum):
    # Market / inputs
    SPOT = "spot"
    CHAIN = "chain"
    RATES = "rates"
    DIVS = "divs"
    FWD = "forward"
    OPTION_SPOT = "option_spot"
    DATES = "dates"

    # Volatility
    IV = "iv"
    TVAR = "tvar"

    # Greeks
    GREEKS = "greeks"
    DELTA = "delta"
    GAMMA = "gamma"
    VEGA = "vega"
    THETA = "theta"
    VOMMA = "vomma"
    VANNA = "vanna"
    RHO = "rho"


def _norm_str(x: str) -> str:
    return x.strip().upper()

def _safe_part(x: Optional[str]) -> str:
    return x if x not in (None, "", "None") else "-"

In [45]:
def _format_value(v: Any) -> str:
    """
    Keep it simple + deterministic.
    """
    if v is None:
        return "-"
    if isinstance(v, Enum):
        return str(v.value)
    if isinstance(v, str):
        return _norm_str(v)
    if isinstance(v, bool):
        return "1" if v else "0"
    if isinstance(v, (int,)):
        return str(v)
    if isinstance(v, float):
        # avoid 0.30000000000004 style keys
        return f"{v:.12g}"
    if isinstance(v, datetime):
        # stable, compact. (no tz handling by design here)
        return v.strftime("%Y%m%dT%H%M%S")
    if isinstance(v, date):
        return v.strftime("%Y%m%d")
    
    if isinstance(v, time):
        return v.strftime("%H%M%S")
    return str(v)


def construct_cache_key(
    symbol: str,
    interval: Optional[Interval],
    artifact_type: ArtifactType,
    series_id: SeriesId,
    **extra_parts: Any,
) -> str:
    """Constructs deterministic cache key from symbol, interval, artifact type, series ID, and extra parts."""
    
    if series_id in (SeriesId.AT_TIME, SeriesId.SNAPSHOT):
        assert 'time' in extra_parts, "time must be provided for at_time or snapshot series_id"
        assert 'date' in extra_parts, "date must be provided for at_time or snapshot series_id"
        assert isinstance(extra_parts['time'], time), "time must be a time object"
        assert isinstance(extra_parts['date'], date), "date must be a date object"


    parts = [
        f"symbol:{_norm_str(symbol)}",
        f"interval:{_format_value(interval)}",
        f"artifact_type:{artifact_type.value}",
        f"series_id:{series_id.value}"
    ]

    for k in sorted(extra_parts.keys()):
        parts.append(f"{k}:{_format_value(extra_parts[k])}")

    return "|".join(parts)


k = construct_cache_key(
    "AAPL",
    Interval.EOD,
    ArtifactType.IV,
    SeriesId.HIST,
    date=date(2024, 1, 1),
    model="SABR",
)
print(k)


symbol:AAPL|interval:eod|artifact_type:iv|series_id:hist|date:20240101|model:SABR


In [46]:
def _parse_cache_key(key: str) -> Dict[str, str]:
    """Parses a pipe-delimited cache key into a dictionary of key-value pairs."""
    parts = key.split("|")
    result = {}
    for part in parts:
        k, v = part.split(":", 1)
        result[k] = v
    return result

print(_parse_cache_key(k))

{'symbol': 'AAPL', 'interval': 'eod', 'artifact_type': 'iv', 'series_id': 'hist', 'date': '20240101', 'model': 'SABR'}


## Building DataManagers


In [47]:
DM_GEN_PATH.as_posix(), DM_GEN_PATH

('/Users/chiemelienwanisobi/cloned_repos/QuantTools/.cache/dm_gen_cache',
 PosixPath('/Users/chiemelienwanisobi/cloned_repos/QuantTools/.cache/dm_gen_cache'))

In [48]:


from abc import ABC
from typing import Any, Callable, ClassVar, Dict, Optional, Type, TypeVar

# Assumes you already have these (from your cache_key module)
# from cache_key import construct_cache_key, Interval, ArtifactType, SeriesId

T = TypeVar("T")


# REMEBER: Take out the commented out parts
@dataclass(frozen=True, slots=True)
class CacheSpec:
    """
    Optional: a small config object you can pass around, so all managers
    initialize their caches in a consistent way.

    If you already have a cache registry/factory, you may not need this.

    args:
        base_dir (Optional[Path]): Directory for cache storage.
        default_expire_days (Optional[int]): Default expiration time in days. This is how many days till the entire cache entry expires.
        default_expire_seconds (Optional[int]): Default expiration time in seconds. This is how many seconds till a single cache entry expires.
        cache_fname (Optional[str]): Foldername for the cache storage.
        clear_on_exit (bool): If True, clears the cache on exit.
    """

    base_dir: Optional[Path] = DM_GEN_PATH.as_posix()
    default_expire_days: Optional[int] = 500
    default_expire_seconds: Optional[int] = None
    cache_fname: Optional[str] = None
    clear_on_exit: bool = False


class BaseDataManager(ABC):
    """
    Foundation class for all DataManagers.

    Goals:
    - Every inheritor gets a cache.
    - Every inheritor MUST define CACHE_NAME.
    - Provide consistent key creation (namespaced).
    - Provide thin get/set/get_or_compute wrappers.
    - Keep business logic out of the base.
    """

    # --- REQUIRED by inheritors ---
    CACHE_NAME: ClassVar[str] = ""

    # --- Optional defaults for common patterns ---
    DEFAULT_INTERVAL: ClassVar[Optional["Interval"]] = None
    DEFAULT_SERIES_ID: ClassVar["SeriesId"]  # prefer explicit in subclasses

    # Internal registry to prevent accidental duplicate cache names
    _CACHE_NAME_REGISTRY: ClassVar[Dict[str, Type["BaseDataManager"]]] = {}

    def __init_subclass__(cls, **kwargs: Any) -> None:
        """Enforces that all subclasses define CACHE_NAME and DEFAULT_SERIES_ID."""
        super().__init_subclass__(**kwargs)

        # Skip enforcement for the abstract base itself
        if cls is BaseDataManager:
            return

        cache_name = getattr(cls, "CACHE_NAME", None)

        if not isinstance(cache_name, str) or not cache_name.strip():
            raise TypeError(f"{cls.__name__} must define a non-empty class variable CACHE_NAME: str")

        cache_name = cache_name.strip()

        # Enforce uniqueness to avoid collisions
        existing = cls._CACHE_NAME_REGISTRY.get(cache_name)
        # if existing is not None and existing is not cls:
        #     raise TypeError(
        #         f"Duplicate CACHE_NAME='{cache_name}'. "
        #         f"Already used by {existing.__name__}. "
        #         f"Pick a unique CACHE_NAME for {cls.__name__}."
        #     )

        cls._CACHE_NAME_REGISTRY[cache_name] = cls

        # Optional: enforce that DEFAULT_SERIES_ID exists (if you want)
        if not hasattr(cls, "DEFAULT_SERIES_ID"):
            raise TypeError(f"{cls.__name__} must define DEFAULT_SERIES_ID (e.g., SeriesId.HIST).")

    def __init__(
        self,
        *,
        cache_spec: Optional[CacheSpec] = None,
        enable_namespacing: bool = False,
    ) -> None:
        """
        Parameters
        ----------
        cache:
            Your existing CustomCache instance (diskcache-backed).
        cache_spec:
            Optional shared configuration (base_dir, TTL defaults, etc.).
        enable_namespacing:
            If True, keys are prefixed with CACHE_NAME to avoid collisions.
        """
        self.cache_spec = cache_spec or CacheSpec(cache_fname=self.CACHE_NAME)
        self.cache = CustomCache(location=self.cache_spec.base_dir, 
                                 fname=self.cache_spec.cache_fname, 
                                 expire_days=self.cache_spec.default_expire_days,
                                 clear_on_exit=self.cache_spec.clear_on_exit)
        self.enable_namespacing = enable_namespacing
        out = self.cache.expire()
        if out > 0:
            logger.info(f"{self.CACHE_NAME} has expired {out} entries")

    # Key construction
    def make_key(
        self,
        *,
        symbol: str,
        interval: Optional[Interval] = None,
        artifact_type: ArtifactType,
        series_id: Optional[SeriesId] = None,
        **extra_parts: Any,
    ) -> str:
        """
        Namespaced key builder that wraps your construct_cache_key.

        You decided:
        - no caching SNAPSHOT series_id (but you might still request it)
        - time is explicit if you do AT_TIME
        """
        interval = interval if interval is not None else self.DEFAULT_INTERVAL
        series_id = series_id if series_id is not None else self.DEFAULT_SERIES_ID

        raw = construct_cache_key(
            symbol=symbol,
            interval=interval,
            artifact_type=artifact_type,
            series_id=series_id,
            **extra_parts,
        )

        if not self.enable_namespacing:
            return raw

        return f"{self.CACHE_NAME}|{raw}"

    # Cache IO
    def get(self, key: str, default: Any = None) -> Any:
        return self.cache.get(key, default=default)

    def set(self, key: str, value: Any, *, expire: Optional[int] = None) -> None:
        if expire is None:
            expire = self.cache_spec.default_expire_seconds
        self.cache.set(key, value, expire=expire)

    def delete(self, key: str) -> None:
        self.cache.delete(key)

    def contains(self, key: str) -> bool:
        return key in self.cache
    
    def cache_it(self, key: str, value: Any, *, expire: Optional[int] = None) -> None:
        raise NotImplementedError(f"{self.__class__.__name__}.cache() not implemented.")

    def get_or_compute(
        self,
        key: str,
        compute_fn: Callable[[], T],
        *,
        expire: Optional[int] = None,
        force: bool = False,
    ) -> T:
        """
        Read-through caching helper.

        force=True bypasses cache read, recomputes and overwrites cache.
        """
        if not force:
            hit = self.cache.get(key, default=None)
            if hit is not None:
                return hit  # type: ignore[return-value]

        value = compute_fn()
        self.set(key, value, expire=expire)
        return value

    # Offload hook (cron calls this)
    def offload(self, *args: Any, **kwargs: Any) -> None:
        """
        Optional standard hook.

        You can override in subclasses or implement a shared offloader that
        knows how to iterate keys / export values. Keeping it as a stub here
        avoids forcing a storage design too early.
        """
        raise NotImplementedError(f"{self.__class__.__name__}.offload() not implemented.")



In [49]:
DATE_HINT = Union[datetime, str]

@dataclass(slots=True)
class DateRangePacket:
    """
    Simple container for start/end date ranges with both datetime and string formats.
    """
    start_date: DATE_HINT
    end_date: DATE_HINT
    start_str: Optional[str] = None
    end_str: Optional[str] = None
    maturity_date: DATE_HINT = None
    maturity_str: Optional[str] = None

    def __post_init__(self):

        self.start_date = to_datetime(self.start_date)
        self.end_date = to_datetime(self.end_date)
        if self.maturity_date is not None:
            self.maturity_date = to_datetime(self.maturity_date)

        self.start_str = self.start_str or self.start_date.strftime("%Y-%m-%d")
        self.end_str = self.end_str or self.end_date.strftime("%Y-%m-%d")
        if self.maturity_date is not None:
            self.maturity_str = self.maturity_str or self.maturity_date.strftime("%Y-%m-%d")
        else:
            self.maturity_str = None

In [50]:
from bisect import bisect_left, bisect_right
from datetime import date
from typing import List


def slice_schedule(full_schedule: List, val_date: date, mat_date: date) -> List:
    """
    Return entries in full_schedule with entry.date in [val_date, mat_date].
    Assumes full_schedule is sorted by entry.date ascending and each entry has .date (datetime.date).
    """
    dates = [e.date for e in full_schedule]  # small list; ok to rebuild (or precompute once)
    i0 = bisect_left(dates, val_date)
    i1 = bisect_right(dates, mat_date)
    return full_schedule[i0:i1]


In [51]:
from trade.helpers.helper import ny_now
def _should_save_today(max_date: date) -> bool:
    """
    Determines if data should be saved today based on the max_date and current time in New York.
    """
    today = date.today()
    current_hour = ny_now().hour
    return max_date >= today and current_hour >= 16

In [52]:
def is_available_on_date(date: date) -> bool:
    """
    Returns True if the given date is a business day and not a US holiday, False otherwise.
    """
    date = to_datetime(date).strftime("%Y-%m-%d")
    return is_busday(date) and not is_USholiday(date)

In [53]:

def _data_structure_cache_it(self: BaseDataManager, 
                             key: str, 
                             value: Union[pd.Series, pd.DataFrame],
                             *, 
                             expire: Optional[int] = None):
    """Merges and caches rate timeseries, excluding today's partial data."""
    value = value.copy()
    if not isinstance(value, (pd.Series, pd.DataFrame)):
        raise TypeError(f"Expected pd.Series or pd.DataFrame for caching, got {type(value)}")
    
    if not isinstance(value.index, pd.DatetimeIndex):
        raise TypeError("Expected DatetimeIndex for caching timeseries data.")
    
    if not isinstance(self, BaseDataManager):
        raise TypeError(f"{self.__class__.__name__} must be a subclass of BaseDataManager.")
    
    
    ## Since it is a timeseries, we will append to existing if exists
    existing = self.get(key, default=None)
    if existing is not None:
        # Merge existing and new values. We're expecting pd.Series
        merged = pd.concat([existing, value])
        value = merged[~merged.index.duplicated(keep="last")]

    if value.empty:
        logger.info(f"Not caching empty timeseries for key: {key}")
        return

    if not _should_save_today(max_date=value.index.max().date()):
        logger.info(f"Cutting off today's data for key: {key} to avoid saving partial day data.")
        value = value[value.index < pd.to_datetime(date.today())]

    value.sort_index(inplace=True)

    self.set(key, value, expire=expire)


def _simple_list_cache_it(
        self: BaseDataManager,
        key: str,
        value: List[Any],
        *,
        expire: Optional[int] = None
):
    """Cache a list of simple values. Will append and keep unique. Also sort"""

    if not isinstance(value, list):
        raise TypeError(f"Expected list. Recieved {type(value)}")
    
    existing: List = self.get(key, default = [])
    existing.extend(value)
    existing = sorted(list(set(existing)))
    self.set(key, value, expire=expire)

In [54]:
def _data_structure_sanitize(df: Union[pd.Series, pd.DataFrame],
                             start: Union[datetime, str],
                             end: Union[datetime, str],) -> Union[pd.Series, pd.DataFrame]:
    """Sanitizes the data structure by removing duplicates and sorting the index."""
    print(f"Sanitizing data from {start} to {end}...")
    if not isinstance(df, (pd.Series, pd.DataFrame)):
        raise TypeError(f"Expected pd.Series or pd.DataFrame for sanitization, got {type(df)}")
    
    # Ensure DatetimeIndex. If not, attempt conversion
    if not isinstance(df.index, pd.DatetimeIndex):
        try: 
            df.index = to_datetime(df.index, format="%Y-%m-%d")
        except Exception as e:
            raise TypeError("Expected DatetimeIndex for sanitization of timeseries data.") from e
        
    
    # Remove duplicates, keeping the last occurrence
    df = df[~df.index.duplicated(keep="last")]
    
    # Sort the index
    df = df.sort_index()

    # if dataframe, lower case columns
    if isinstance(df, pd.DataFrame):
        df.columns = df.columns.str.lower()

    # Filter by start and end dates
    df = df[(df.index >= pd.to_datetime(start)) & (df.index <= pd.to_datetime(end))]

    # Re-sort after filtering
    df = df.sort_index()

    # Index name=datetime
    df.index.name = "datetime"
    
    return df

In [55]:
def _check_cache_for_timeseries_data_structure(
    self: BaseDataManager,
    key: str,
    start_dt: DATE_HINT,
    end_dt: DATE_HINT,
) -> Tuple[Optional[Union[pd.Series, pd.DataFrame]], bool, DATE_HINT, DATE_HINT]:
    """
    Checks cache for existing timeseries data structure and identifies missing dates.

    Return args order:
    - cached_data: The cached pd.Series or pd.DataFrame if fully present, else None
    - is_partial: True if some dates are missing, False if fully present
    - missing_start_date: The earliest missing date if partially present, else start_dt
    - missing_end_date: The latest missing date if partially present, else end_dt
    """

    cached_data = self.get(key, default=None)
    if not isinstance(self, BaseDataManager):
        raise TypeError(f"{self.__class__.__name__} must be a subclass of BaseDataManager.")
    
    if not isinstance(cached_data, (pd.Series, pd.DataFrame, type(None))):
        return None, False, start_dt, end_dt
    
    if cached_data is None:
        return None, False, start_dt, end_dt
    
    missing = get_missing_dates(cached_data, _start=start_dt, _end=end_dt)
    if not missing:
        logger.info(f"Cache hit for timeseries data structure key: {key}")
        cached_data = _data_structure_sanitize(
            cached_data,
            start=start_dt,
            end=end_dt,
        )
        return cached_data, False, start_dt, end_dt
    logger.info(
        f"Cache partially covers requested date range for timeseries data structure. "
        f"Key: {key}. Fetching missing dates: {missing}"
    )
    return cached_data, True, min(missing), max(missing)


### Dividends DataManager

In [56]:
class OptionSpotEndpointSource(Enum):
    """
    Thetadata creates a native EOD report every day by 6pm ET.
    This enum allows choosing between using that EOD report or the intraday quote end point.
    This is essential because during market hours, the EOD report is not yet available.
    """

    EOD = "eod"
    QUOTE = "quote"

class OptionPricingModel(Enum):
    """Enumeration of option pricing model."""

    BSM = "Black-Scholes"
    BINOMIAL = "Binomial"


class VolatilityModel(Enum):
    """Enumeration of volatility model."""

    MARKET = "market"
    MODEL_DYNAMIC = "model_dynamic"


@dataclass
class OptionDataConfig(metaclass=SingletonMetaClass):
    """Configuration for OptionDataManager."""

    option_spot_endpoint_source: OptionSpotEndpointSource = OptionSpotEndpointSource.EOD
    default_lookback_years: int = DIVIDEND_LOOKBACK_YEARS
    default_forecast_method: DiscreteDivGrowthModel = DiscreteDivGrowthModel.CONSTANT
    dividend_type: DivType = DivType.DISCRETE
    include_special_dividends: bool = False
    option_model: OptionPricingModel = OptionPricingModel.BSM
    volatility_model: VolatilityModel = VolatilityModel.MARKET

    def assert_valid(self) -> None:
        """Validates all configuration values against business rules."""
        assert self.default_lookback_years > 0, "Lookback years must be positive."
        assert self.default_lookback_years <= 5, "Lookback years seems too large. Max 5."
        assert isinstance(
            self.default_forecast_method, DiscreteDivGrowthModel
        ), "Invalid forecast method. Expected DiscreteDivGrowthModel Enum."
        assert isinstance(self.dividend_type, DivType), "Invalid dividend type. Expected DivType Enum."
        assert isinstance(self.include_special_dividends, bool), "include_special_dividends must be a boolean."
        assert isinstance(self.option_spot_endpoint_source, OptionSpotEndpointSource), (
            "Invalid option_spot_endpoint_source. Expected OptionSpotEndpointSource Enum."
        )
        assert isinstance(self.option_model, OptionPricingModel), "Invalid option_model. Expected OptionPricingModel Enum."
        assert isinstance(self.volatility_model, VolatilityModel), "Invalid volatility_model. Expected VolatilityModel Enum."

    def __post_init__(self) -> None:
        """Validates configuration after initialization."""
        self.assert_valid()

    def __setattr__(self, name, value):
        """Validates configuration after any attribute change."""
        super().__setattr__(name, value)
        self.assert_valid()


In [57]:
## How dividends timeseries will work:
## If discrete:
    ## All constant(+...)  will cache up to < today
    ## All None Constant will not cache
## If continuous:
    ## Rely on MarktetTimeseries to provide continuous dividend yield history. It already caches.


    

In [58]:
@dataclass
class Result:
    """Base class for all data manager result containers."""

    def _additional_repr_fields(self) -> Dict[str, Any]:
        """Provides additional fields for string representation. Override in subclasses."""
        return {}
    
    def __repr__(self) -> str:
        """Returns string representation with additional fields from subclass."""
        additional_fields = self._additional_repr_fields()
        if additional_fields:
            fields_str = ", ".join(f"{k}={v!r}" for k, v in additional_fields.items())
            return f"{self.__class__.__name__}({fields_str})"
        return f"{self.__class__.__name__}()"

@dataclass
class DividendsResult(Result):
    """Contains dividend schedule or yield data for a date range."""
    daily_discrete_dividends: Optional[pd.Series] = None
    daily_continuous_dividends: Optional[pd.Series] = None
    dividend_type: Optional[DivType] = None
    key: Optional[str] = None
    undo_adjust: Optional[bool] = None

    def __repr__(self) -> str:
        return super().__repr__()
    
    def is_empty(self) -> bool:
        """Checks if dividend data is missing or empty."""
        if self.dividend_type == DivType.DISCRETE:
            return self.daily_discrete_dividends is None or self.daily_discrete_dividends.empty
        elif self.dividend_type == DivType.CONTINUOUS:
            return self.daily_continuous_dividends is None or self.daily_continuous_dividends.empty
        return True
    
    def _additional_repr_fields(self) -> Dict[str, Any]:
        """Provides dividend-specific fields for string representation."""
        return {
            "dividend_type": self.dividend_type,
            "key": self.key,
            "is_empty": self.is_empty(),
            "undo_adjust": self.undo_adjust,
        }


In [59]:
class DividendDataManager(BaseDataManager):
    """Manages dividend data retrieval, caching, and schedule construction for a specific symbol."""
    CACHE_NAME: ClassVar[str] = "dividend_data_manager"
    DEFAULT_SERIES_ID: ClassVar["SeriesId"] = SeriesId.HIST
    CONFIG = OptionDataConfig()
    INSTANCES = {}

    def __new__(cls, symbol: str, *args: Any, **kwargs: Any) -> "DividendDataManager":
        """Returns cached instance for symbol, creating new one if needed."""
        if symbol not in cls.INSTANCES:
            TS.load_timeseries(symbol, start_date=OPTION_TIMESERIES_START_DATE, end_date=datetime.now())
            instance = super(DividendDataManager, cls).__new__(cls)
            cls.INSTANCES[symbol] = instance
        return cls.INSTANCES[symbol]
    
    def __init__(self, symbol: str, *, cache_spec: Optional[CacheSpec] = None, enable_namespacing: bool = False) -> None:
        """Initializes manager for a symbol with cache and temp cache for short-lived data."""

        if getattr(self, "_initialized", False):
            return
        self._initialized = True
        super().__init__(cache_spec=cache_spec, enable_namespacing=enable_namespacing)
        self.symbol = symbol
        self.temp_cache: CustomCache = CustomCache(location=DM_GEN_PATH.as_posix(), 
                                                   fname="dividend_temp_cache", 
                                                   expire_days=1, 
                                                   clear_on_exit=True)

    ## General caching logic
    def cache_it(self, 
                 key: str, 
                 value: Any, 
                 *, 
                 expire: Optional[int] = None, 
                 _type: str = "discrete") -> None:
        """Caches dividend data with merge logic for discrete dividends (no future dates)."""

        
        ## If discrete dividends, we first check if key exists
        ## If it does, we add to it. Only values <= today.
        ## If it does not, we create new entry
        if _type == "discrete":
            existing = self.get(key, default=None)
            today = datetime.today().date()
            allowed = [e for e in value if e.date <= today]

            if existing is not None:
                # Merge existing and new values. We're expecting lists of ScheduleEntry
                merged = existing + allowed
                
                ## Unique by date
                merged = {entry.date: entry for entry in merged}
                uniques = sorted(merged.values(), key=lambda e: e.date)
                self.set(key, uniques, expire=expire)
                return
            else:
                self.set(key, allowed, expire=expire)
                return

        # For other types or if no existing, just setattr
        self.set(key, value, expire=expire)

    ## Dividend yield history retrieval for continuous dividends. Already cached in MarketTimeseries.
    def get_div_yield_history(self, symbol: str, skip_preload_check: bool = False) -> pd.Series:
        """Retrieves continuous dividend yield history from MarketTimeseries."""
        div_history = TS.get_timeseries(symbol, skip_preload_check=skip_preload_check)
        return div_history.dividend_yield

    ## Discrete dividend schedule retrieval with caching.
    def get_discrete_dividend_schedule(
        self,
        *,
        end_date: Union[str, datetime, pd.Timestamp],
        start_date: Union[str, datetime, pd.Timestamp],
        valuation_date: Optional[Union[str, datetime, pd.Timestamp]] = None,
    ) -> Tuple[List[ScheduleEntry], str]:
        """Returns discrete dividend schedule between dates with partial cache support."""
        
        start_str = datetime.strftime(start_date, "%Y-%m-%d") if isinstance(start_date, (datetime, pd.Timestamp)) else start_date
        end_str = datetime.strftime(end_date, "%Y-%m-%d") if isinstance(end_date, (datetime, pd.Timestamp)) else end_date
        ticker = self.symbol
        method = self.CONFIG.default_forecast_method.value
        lookback_years = self.CONFIG.default_lookback_years
        key = self.make_key(
            symbol=ticker,
            artifact_type=ArtifactType.DIVS,
            series_id=SeriesId.HIST,
            method=method,
            lookback_years=lookback_years,
            current_state="schedule",
            interval=Interval.NA,
            vendor="yfinance"
        )

        available_schedule = self.get(key, default=None)
        if available_schedule:
            logger.info(f"Cache hit for key: {key}")
            ## If max date in available schedule >= end_date, we can use cache
            max_cached_date = max(entry.date for entry in available_schedule)
            min_cached_date = min(entry.date for entry in available_schedule)
            fully_covered = (min_cached_date <= datetime.strptime(start_str, "%Y-%m-%d").date()) and (
                max_cached_date >= datetime.strptime(end_str, "%Y-%m-%d").date()
            )
            if fully_covered:
                logger.info(f"Cache fully covers requested date range. Key: {key}")

                ## Filter to requested date range
                start_dt = datetime.strptime(start_date, "%Y-%m-%d").date()
                end_dt = datetime.strptime(end_date, "%Y-%m-%d").date()
                filtered_schedule = [e for e in available_schedule if start_dt <= e.date <= end_dt]
                return filtered_schedule, key
            else:
                logger.info(f"Cache partially covers requested date range. Key: {key}. Fetching missing data.")

        schedule = get_vectorized_dividend_scehdule(
            tickers=[ticker],
            end_dates=[end_date],
            start_dates=[start_date],
            method=method,
            lookback_years=lookback_years,
            valuation_dates=[valuation_date] if valuation_date else None,
        )
        raw_schedule = schedule[0].schedule
        self.cache_it(key, raw_schedule, _type="discrete")

        return raw_schedule, key

    ## Switcher to choose between constructing all the way or using cached pieces
    def _get_discrete_schedule_timeseries(
        self,
        start_date: Union[datetime, str],
        end_date: Union[datetime, str],
        maturity_date: Union[datetime, str],
        div_type: Optional[DivType] = None,
        undo_adjust: bool = True,
    ) -> Tuple[pd.Series, str]:
        """Builds daily dividend schedule series with partial cache merging and split adjustment."""
        logger.info(f"Fetching discrete dividend schedule timeseries for {self.symbol} from {start_date} to {end_date} with maturity {maturity_date}")
        div_type = DivType(div_type) if div_type is not None else self.CONFIG.dividend_type
        is_partial = False
        start_dt = pd.to_datetime(start_date).date()
        end_dt = pd.to_datetime(end_date).date()
        mat_dt = pd.to_datetime(maturity_date).date()
        start_str = datetime.strftime(start_dt, "%Y-%m-%d")
        end_str = datetime.strftime(end_dt, "%Y-%m-%d")
        mat_str = datetime.strftime(mat_dt, "%Y-%m-%d")
        
        if mat_dt < start_dt:
            print(f"Maturity date {mat_dt} is before start date {start_dt}")
            raise ValueError("maturity_date must be >= start_date")

        key = self.make_key(
            symbol=self.symbol,
            artifact_type=ArtifactType.DIVS,
            series_id=SeriesId.HIST,
            method=self.CONFIG.default_forecast_method.value,
            lookback_years=self.CONFIG.default_lookback_years,
            current_state="schedule_timeseries",
            interval=Interval.EOD,
            undo_adjust=undo_adjust,
            maturity=mat_str,
        )

        cached_series = self.get(key, default=None)
        if cached_series is not None:
            logger.info(f"Cache hit for discrete schedule timeseries key: {key}")
            missing_dates = get_missing_dates(
                cached_series,
                start_str,
                end_str
            )
            if not missing_dates:
                logger.info(f"Cache fully covers requested date range for timeseries. Key: {key}")
                cached_series = cached_series[
                    (cached_series.index >= pd.to_datetime(start_date)) 
                    & (cached_series.index <= pd.to_datetime(end_date))
            ]
                return cached_series, key
            else:
                logger.info(f"Cache partially covers requested date range for timeseries. Key: {key}. Fetching missing dates: {missing_dates}")
                start_str, end_str = min(missing_dates), max(missing_dates)
                is_partial = True
        else:
            logger.info(f"No cache found for discrete schedule timeseries key: {key}. Building from scratch.")
        
        # Build from scratch for missing dates
        # Fetch ONCE: all events from start_date to maturity_date
        full_schedule, _ = self.get_discrete_dividend_schedule(
            start_date=start_str,
            end_date=mat_str,
            valuation_date=start_str,
        )

        # Build daily schedules efficiently using a moving pointer
        series = {}
        date_range = pd.date_range(start=start_dt, end=end_dt, freq="B").strftime("%Y-%m-%d")
        for d in date_range:
            if d in HOLIDAY_SET:
                # Skip holidays
                continue
            d_date = datetime.strptime(d, "%Y-%m-%d").date()

            ## Simple filter approach
            series[d_date] = Schedule(slice_schedule(full_schedule, d_date, mat_dt))
        data = pd.Series(series, name="dividend_schedule")
        
        # Back-adjust to represent cashflows as of valuation date. Ie undoing splits
        if undo_adjust:
            data = data.to_frame()
            split_factors = TS._split_factor[self.symbol].copy()
            data["split_factor"] = split_factors
            data["dividend_schedule"] = data["dividend_schedule"] * data["split_factor"]
            data = data["dividend_schedule"]
        
        # Cache the constructed timeseries
        if is_partial:
            # Merge with existing cached series
            merged = pd.concat([cached_series, data])
            data = merged[~merged.index.duplicated(keep='last')]
        
        data = _data_structure_sanitize(data, start_date, end_date)
        
        self.set(key, data, expire=86400/2)  # 12 hours expiry for timeseries cache
        return data, key

    def get_schedule_timeseries(
        self,
        start_date: Union[datetime, str],
        end_date: Union[datetime, str],
        maturity_date: Union[datetime, str],
        div_type: Optional[DivType] = None,
        undo_adjust: bool = True,
    ) -> DividendsResult:
        """
        Returns a DAILY series (indexed by date) where each value is the dividend schedule
        from that valuation date up to maturity_date.

        - start_date/end_date define the valuation date range
        - maturity_date is the fixed horizon (e.g., option expiry)
        """

        div_type = DivType(div_type) if div_type is not None else self.CONFIG.dividend_type
        result = DividendsResult()
        result.dividend_type = div_type
        result.undo_adjust = undo_adjust

        if div_type == DivType.DISCRETE:
            data, key = self._get_discrete_schedule_timeseries(
                start_date=start_date,
                end_date=end_date,
                maturity_date=maturity_date,
                div_type=div_type,
                undo_adjust=undo_adjust,
            )
            data.index = pd.to_datetime(data.index)
            data.index.name = "datetime"
            data = data[
                (data.index >= pd.to_datetime(start_date)) 
                & (data.index <= pd.to_datetime(end_date))]
            data = data.sort_index()
            data = data.drop_duplicates()
            result.daily_discrete_dividends = data
            result.key = key

        elif div_type == DivType.CONTINUOUS:
            start_str = pd.to_datetime(start_date).strftime("%Y-%m-%d") if isinstance(start_date, datetime) else start_date
            end_str = pd.to_datetime(end_date).strftime("%Y-%m-%d") if isinstance(end_date, datetime) else end_date
            yield_history = self.get_div_yield_history(self.symbol, skip_preload_check=True)
            filtered = yield_history[(yield_history.index >= start_str) & (yield_history.index <= end_str)]
            result.daily_continuous_dividends = filtered
            result.key = None
        return result
        
    ## RT Enabled
    def get_schedule(
        self,
        valuation_date: Union[datetime, str],
        maturity_date: Union[datetime, str],
        div_type: Optional[DivType] = None,
        undo_adjust: bool = True,
    ) -> DividendsResult:
        """Returns dividend schedule for a single valuation date to maturity."""
        
        
        div_type = DivType(div_type) if div_type is not None else self.CONFIG.dividend_type

        val_str = valuation_date.strftime("%Y-%m-%d") if isinstance(valuation_date, datetime) else valuation_date
        mat_str = maturity_date.strftime("%Y-%m-%d") if isinstance(maturity_date, datetime) else maturity_date

        if div_type == DivType.DISCRETE:
            data, key = self.get_discrete_dividend_schedule(
                start_date=val_str,
                end_date=mat_str,
                valuation_date=val_str,  # optional, but consistent
            )
            if undo_adjust:
                split_factor = TS._split_factor[self.symbol].loc[pd.to_datetime(val_str)]
            else:
                split_factor = 1.0
            data = Schedule(schedule=[entry * split_factor for entry in data])
            data = pd.Series({val_str: data})
        elif div_type == DivType.CONTINUOUS:
            data = self.get_div_yield_history(self.symbol)
            data = data[(data.index >= pd.to_datetime(valuation_date)) & (data.index <= pd.to_datetime(maturity_date))]
            key = None
        else:
            raise ValueError(f"Unsupported dividend type: {div_type}")

        result = DividendsResult()
        
        if div_type == DivType.DISCRETE:
            result.daily_discrete_dividends = data
        else:
            result.daily_continuous_dividends = data
        result.key = key
        result.undo_adjust = undo_adjust
        result.dividend_type = div_type
        
        return result

    def offload(self, *args: Any, **kwargs: Any) -> None:
        """
        Example implementation of offload for DividendDataManager.
        """
        print(f"No offload logic implemented for {self.CACHE_NAME}")






In [60]:
testdiv = DividendDataManager(symbol="AAPL")
testdiv.get_discrete_dividend_schedule(
    start_date="2024-01-01",
    end_date="2025-12-31",
)

2026-01-18 19:43:34 [test] trade.__init__ INFO: Signal function for `_on_exit` added to signal number 15.
2026-01-18 19:43:34 [test] trade.__init__ INFO: Signal function for `_on_exit` added to signal number 2.
2026-01-18 19:43:34 [test] trade.__init__ INFO: Exit handler `_on_exit` registered for normal program exit.
2026-01-18 19:43:34 [test] __main__ INFO: Cache hit for key: symbol:AAPL|interval:na|artifact_type:divs|series_id:hist|current_state:SCHEDULE|lookback_years:1|method:CONSTANT|vendor:YFINANCE
2026-01-18 19:43:34 [test] __main__ INFO: Cache partially covers requested date range. Key: symbol:AAPL|interval:na|artifact_type:divs|series_id:hist|current_state:SCHEDULE|lookback_years:1|method:CONSTANT|vendor:YFINANCE. Fetching missing data.
2026-01-18 19:43:34 [test] trade.optionlib.assets.dividend INFO: Using dual projection method for ticker AAPL
2026-01-18 19:43:35 [test] trade.optionlib.assets.dividend INFO: Expected Dividend Size before adjustment: 17, for original valuation:

([<ScheduleEntry: 2024-02-09 - 0.24>,
  <ScheduleEntry: 2024-05-10 - 0.25>,
  <ScheduleEntry: 2024-08-12 - 0.25>,
  <ScheduleEntry: 2024-11-08 - 0.25>,
  <ScheduleEntry: 2025-02-10 - 0.25>,
  <ScheduleEntry: 2025-05-12 - 0.26>,
  <ScheduleEntry: 2025-08-11 - 0.26>,
  <ScheduleEntry: 2025-11-10 - 0.26>],
 'symbol:AAPL|interval:na|artifact_type:divs|series_id:hist|current_state:SCHEDULE|lookback_years:1|method:CONSTANT|vendor:YFINANCE')

In [61]:
testdiv.CONFIG.dividend_type = DivType.DISCRETE
d = testdiv.get_schedule_timeseries(
    start_date="2025-01-01",
    end_date="2026-01-14",
    maturity_date="2026-10-29",
    undo_adjust=True,
)

# d2 = testdiv.get_schedule(
#     valuation_date="2026-01-15",
#     maturity_date="2026-05-31",
#     undo_adjust=False,
# )
print("Discrete Dividends Schedule Timeseries:")
print(d.daily_discrete_dividends)
# print("Discrete Dividends Schedule at specific valuation date:")
# print(d2.daily_discrete_dividends)

2026-01-18 19:43:35 [test] __main__ INFO: Fetching discrete dividend schedule timeseries for AAPL from 2025-01-01 to 2026-01-14 with maturity 2026-10-29


2026-01-18 19:43:35 [test] __main__ INFO: Cache hit for discrete schedule timeseries key: symbol:AAPL|interval:eod|artifact_type:divs|series_id:hist|current_state:SCHEDULE_TIMESERIES|lookback_years:1|maturity:2026-10-29|method:CONSTANT|undo_adjust:1
2026-01-18 19:43:35 [test] __main__ INFO: Cache fully covers requested date range for timeseries. Key: symbol:AAPL|interval:eod|artifact_type:divs|series_id:hist|current_state:SCHEDULE_TIMESERIES|lookback_years:1|maturity:2026-10-29|method:CONSTANT|undo_adjust:1
Discrete Dividends Schedule Timeseries:
datetime
2025-01-02    ((2025-02-10, 0.25), (2025-05-12, 0.26), (2025...
2025-01-03    ((2025-02-10, 0.25), (2025-05-12, 0.26), (2025...
2025-01-06    ((2025-02-10, 0.25), (2025-05-12, 0.26), (2025...
2025-01-07    ((2025-02-10, 0.25), (2025-05-12, 0.26), (2025...
2025-01-08    ((2025-02-10, 0.25), (2025-05-12, 0.26), (2025...
                                    ...                        
2026-01-08    ((2026-02-10, 0.26), (2026-05-10, 0.26),

In [62]:
TS._spot.keys()

['AAPL', 'NVDA', 'TSLA', 'COST', 'AMZN', 'META', 'AMD', 'SBUX', 'NFLX', 'BA']

In [63]:
d

DividendsResult(dividend_type=<DivType.DISCRETE: 'discrete'>, key='symbol:AAPL|interval:eod|artifact_type:divs|series_id:hist|current_state:SCHEDULE_TIMESERIES|lookback_years:1|maturity:2026-10-29|method:CONSTANT|undo_adjust:1', is_empty=False, undo_adjust=True)

In [64]:
testdiv.CONFIG.dividend_type = DivType.CONTINUOUS
d = testdiv.get_schedule_timeseries(
    start_date="2022-10-08",
    end_date="2025-10-31",
    maturity_date="2025-10-31",
)
d.daily_continuous_dividends

2022-10-10    0.001665
2022-10-11    0.001683
2022-10-12    0.001690
2022-10-13    0.001635
2022-10-14    0.001690
                ...   
2025-10-27    0.000968
2025-10-28    0.000967
2025-10-29    0.000965
2025-10-30    0.000959
2025-10-31    0.000963
Length: 800, dtype: float64

## Rates Data

In [65]:
import yfinance as yf
def deannualize(annual_rate, periods=365):
    """Converts annual rate to per-period rate."""
    return (1 + annual_rate) ** (1 / periods) - 1

@dataclass
class RatesResult(Result):
    """Contains risk-free rate data for a date range."""
    daily_risk_free_rates: Optional[pd.Series] = None
    
    def is_empty(self) -> bool:
        """Checks if rate data is missing or empty."""
        return self.daily_risk_free_rates is None or self.daily_risk_free_rates.empty
    
    def _additional_repr_fields(self):
        """Provides rate-specific fields for string representation."""
        return {
            "is_empty": self.is_empty(),
        }
    def __repr__(self) -> str:
        return super().__repr__()
    

class RatesDataManager(BaseDataManager):
    """Singleton manager for risk-free rate data from treasury bills (^IRX)."""
    CACHE_NAME: ClassVar[str] = "rates_data_manager"
    DEFAULT_SERIES_ID: ClassVar["SeriesId"] = SeriesId.HIST
    INSTANCE = None
    DEFAULT_YFINANCE_TICKER = "^IRX"  # 13 WEEK TREASURY BILL
    CONFIG: OptionDataConfig = OptionDataConfig()
    
    def __new__(
        cls,
        *,
        cache_spec: Optional[CacheSpec] = None,
        enable_namespacing: bool = False,
    ) -> "RatesDataManager":
        """Ensures only one instance exists (singleton pattern)."""
        
        if cls.INSTANCE is not None:
            return cls.INSTANCE
        instance = super(RatesDataManager, cls).__new__(cls)
        cls.INSTANCE = instance
        return instance
    
    def __init__(self, *, cache_spec: Optional[CacheSpec] = None, enable_namespacing: bool = False) -> None:
        """Initializes singleton instance once, skipping subsequent calls."""
        if getattr(self, "_init_called", False):
            return
        self._init_called = True
        super().__init__(cache_spec=cache_spec, enable_namespacing=enable_namespacing)

    def get_rate(
        self,
        date: Union[datetime, str],
        interval: Interval = Interval.EOD,
        str_interval: Optional[str] = None,
    ) -> RatesResult:
        """Returns risk-free rate for a single date."""
        
        if not is_available_on_date(to_datetime(date).date()):
            logger.warning(f"Requested date {date} is not a business day or is a US holiday. Returning empty RatesResult.")
            return RatesResult(daily_risk_free_rates=pd.Series(dtype=float))
        date_str = pd.to_datetime(date).strftime("%Y-%m-%d") if isinstance(date, datetime) else date
        
        rates_data = self.get_risk_free_rate_timeseries(
            start_date=date_str,
            end_date=date_str,
            interval=interval,
            str_interval=str_interval,
        )
        rate = rates_data.daily_risk_free_rates
        if rate is not None and not rate.empty:
            rate = rate.iloc[0:1]
            

        return RatesResult(daily_risk_free_rates=rate)

    def get_risk_free_rate_timeseries(
        self,
        start_date: Union[datetime, str],
        end_date: Union[datetime, str],
        interval: Interval = Interval.EOD,
        str_interval: Optional[str] = None,
    ) -> RatesResult:
        """Returns risk-free rate timeseries with partial cache support."""
        
        start_str = pd.to_datetime(start_date).strftime("%Y-%m-%d") if isinstance(start_date, datetime) else start_date
        end_str = pd.to_datetime(end_date).strftime("%Y-%m-%d") if isinstance(end_date, datetime) else end_date
        
        ## Make cache key
        key = self.make_key(
            symbol=self.DEFAULT_YFINANCE_TICKER,
            artifact_type=ArtifactType.RATES,
            series_id=SeriesId.HIST,
            interval=interval,
        )
        
        ## Determine yfinance interval
        if not str_interval:
            fn_interval = "1d" if interval == Interval.EOD else "30m"
        else:
            fn_interval = str_interval
        
        ## Check cache
        series = self.get(key, default=None)

        ## Check if cache covers requested date range
        if series is not None:
            logger.info(f"Cache hit for risk-free rate timeseries key: {key}")
            missing = get_missing_dates(
                series,
                pd.to_datetime(start_date).strftime("%Y-%m-%d"),
                pd.to_datetime(end_date).strftime("%Y-%m-%d"),
            )


            ## If no missing dates, return cached series
            if not missing:
                logger.info(f"Cache fully covers requested date range for risk-free rate timeseries. Key: {key}")
                series = _data_structure_sanitize(
                    series,
                    start=start_str,
                    end=end_str,
                )
                return RatesResult(daily_risk_free_rates=series)
            else:
                ## Fetch missing dates
                start_date = min(missing)
                end_date = max(missing)
                logger.info(f"Cache partially covers requested date range for risk-free rate timeseries. Key: {key}. Fetching missing dates: {missing}")
        else:
            logger.info(f"No cache found for risk-free rate timeseries key: {key}. Fetching from source.")


        # Fetch rates data
        rates_data = self._query_yfinance(
            start_date=start_date,
            end_date=end_date,
            interval=fn_interval,
        )["annualized"]

        if series is not None:
            # Merge with existing cached series
            merged = pd.concat([series, rates_data])
            rates_data = merged[~merged.index.duplicated(keep='last')]
        
        ## Cache the updated series
        self.cache_it(key, rates_data)
        
        ## Sanitize before returning
        rates_data = _data_structure_sanitize(
            rates_data,
            start=start_str, # Ensure only requested range
            end=end_str,
        )

        return RatesResult(rates_data)

    def cache_it(self, key, value, *, expire=None):
        """Merges and caches rate timeseries, excluding today's partial data."""
        ## Since it is a timeseries, we will append to existing if exists
        _data_structure_cache_it(self, key, value, expire=expire)

        
    def _query_yfinance(
        self,
        start_date: Union[datetime, str],
        end_date: Union[datetime, str],
        interval: str,
    ) -> pd.DataFrame:
        """Fetches ^IRX treasury bill rates from yfinance and formats output."""

        ## Date buffer to ensure we get all data
        start_date = to_datetime(start_date) - pd.Timedelta(days=5)
        end_date = to_datetime(end_date) + pd.Timedelta(days=5)
    
        data_min = yf.download(
            "^IRX",
            start=start_date,
            end=end_date,
            interval=interval,
            progress=False,
            multi_level_index=False,
        )

        data_min.columns = data_min.columns.str.lower()
        data_min["daily"] = data_min["close"].apply(deannualize)
        data_min["annualized"] = data_min["close"] / 100
        data_min["name"] = "^IRX"
        data_min["description"] = "13 WEEK TREASURY BILL"
        data_min.index.name = "Datetime"
        data_min = data_min[["name", "description", "daily", "annualized"]]
        data_min = data_min[(data_min.index >= pd.to_datetime(start_date)) & (data_min.index <= pd.to_datetime(end_date))]
        return data_min


In [66]:
rt_manager = RatesDataManager()

rates_result = rt_manager.get_risk_free_rate_timeseries(
    start_date="2026-01-09",
    end_date="2026-01-12",
)
rates_result.daily_risk_free_rates

2026-01-18 19:43:36 [test] __main__ INFO: Cache hit for risk-free rate timeseries key: symbol:^IRX|interval:eod|artifact_type:rates|series_id:hist
2026-01-18 19:43:36 [test] __main__ INFO: Cache fully covers requested date range for risk-free rate timeseries. Key: symbol:^IRX|interval:eod|artifact_type:rates|series_id:hist
Sanitizing data from 2026-01-09 to 2026-01-12...


datetime
2026-01-09    0.03513
2026-01-12    0.03533
Name: annualized, dtype: float64

In [67]:
rt_manager.get_rate(
    date=pd.Timestamp("2026-01-13"),
).daily_risk_free_rates

2026-01-18 19:43:36 [test] __main__ INFO: Cache hit for risk-free rate timeseries key: symbol:^IRX|interval:eod|artifact_type:rates|series_id:hist
2026-01-18 19:43:36 [test] __main__ INFO: Cache fully covers requested date range for risk-free rate timeseries. Key: symbol:^IRX|interval:eod|artifact_type:rates|series_id:hist
Sanitizing data from 2026-01-13 to 2026-01-13...


datetime
2026-01-13    0.0356
Name: annualized, dtype: float64

## Forward Price (Mostly for black scholes)

### Forward DataManager

In [68]:
@dataclass
class ForwardResult(Result):
    """Contains forward price data (discrete or continuous dividend model)."""
    daily_discrete_forward: Optional[pd.Series] = None
    daily_continuous_forward: Optional[pd.Series] = None
    dividend_type: Optional[DivType] = None
    key: Optional[str] = None
    dividend_result: Optional[DividendsResult] = None
    
    def is_empty(self) -> bool:
        """Checks if forward price data is missing or empty."""
        if self.dividend_type == DivType.DISCRETE:
            return self.daily_discrete_forward is None or self.daily_discrete_forward.empty
        elif self.dividend_type == DivType.CONTINUOUS:
            return self.daily_continuous_forward is None or self.daily_continuous_forward.empty
        return True
    
    def _additional_repr_fields(self) -> Dict[str, Any]:
        """Provides forward-specific fields for string representation."""
        return {
            "dividend_type": self.dividend_type,
            "key": self.key,
            "is_empty": self.is_empty(),
        }
    def __repr__(self) -> str:
        return super().__repr__()


In [69]:
class ForwardDataManager(BaseDataManager):
    """Manages forward price computation and caching for a specific symbol using spot, rates, and dividends."""
    CACHE_NAME: ClassVar[str] = "forward_data_manager"
    DEFAULT_SERIES_ID: ClassVar["SeriesId"] = SeriesId.HIST
    INSTANCES = {}
    # CONFIG = ForwardsConfig()

    def __new__(cls, symbol: str, *args: Any, **kwargs: Any) -> "ForwardDataManager":
        """Returns cached instance for symbol, creating new one if needed."""
        if symbol not in cls.INSTANCES:
            TS.load_timeseries(symbol, start_date=OPTION_TIMESERIES_START_DATE, end_date=datetime.now())
            instance = super(ForwardDataManager, cls).__new__(cls)
            cls.INSTANCES[symbol] = instance
        return cls.INSTANCES[symbol]

    def __init__(
        self,
        symbol: str,
        *,
        cache_spec: Optional[CacheSpec] = None,
        enable_namespacing: bool = False,
    ) -> None:
        """Initializes manager once per symbol instance."""
        if getattr(self, "_initialized", False):
            return

        self._initialized = True
        super().__init__(cache_spec=cache_spec, enable_namespacing=enable_namespacing)
        self.symbol = symbol

    def _normalize_inputs(
        self,
        start_date: Union[datetime, str],
        end_date: Union[datetime, str],
        maturity_date: Union[datetime, str],
        div_type: Optional[DivType],
    ) -> Tuple[DivType, date, date, date, str, str, str]:
        """Converts date inputs to both date objects and strings."""
        div_type = DivType(div_type) if div_type is not None else DivType.DISCRETE

        start_dt = (
            datetime.strptime(start_date, "%Y-%m-%d") if isinstance(start_date, str) else start_date
        )
        end_dt = datetime.strptime(end_date, "%Y-%m-%d") if isinstance(end_date, str) else end_date
        mat_dt = (
            datetime.strptime(maturity_date, "%Y-%m-%d")
            if isinstance(maturity_date, str)
            else maturity_date
        )

        start_str = datetime.strftime(start_dt, "%Y-%m-%d")
        end_str = datetime.strftime(end_dt, "%Y-%m-%d")
        mat_str = datetime.strftime(mat_dt, "%Y-%m-%d")
        return div_type, start_dt, end_dt, mat_dt, start_str, end_str, mat_str

    def _build_key(self, *, mat_str: str, div_type: DivType, use_chain_spot: bool) -> str:
        """Constructs cache key from maturity, dividend type, and spot type."""
        return self.make_key(
            symbol=self.symbol,
            artifact_type=ArtifactType.FWD,
            series_id=SeriesId.HIST,
            maturity=mat_str,
            div_type=div_type.value,
            use_chain_spot=use_chain_spot,
            interval=Interval.EOD,
        )

    def _try_get_cached(
        self,
        *,
        key: str,
        start_str: str,
        end_str: str,
        start_date: Union[datetime, str],
        end_date: Union[datetime, str],
        div_type: DivType,
    ) -> Tuple[Optional[pd.Series], bool, str, str, Optional[ForwardResult]]:
        """Checks cache for existing data and identifies missing dates."""
        cached_series = self.get(key, default=None)
        if cached_series is None:
            return None, False, start_str, end_str, None

        missing = get_missing_dates(cached_series, _start=start_str, _end=end_str)
        if not missing:
            logger.info(f"Cache hit for forward timeseries key: {key}")
            cached_series = _data_structure_sanitize(
                cached_series,
                start=start_str,
                end=end_str,
            )

            result = ForwardResult()
            if div_type == DivType.DISCRETE:
                result.daily_discrete_forward = cached_series
            else:
                result.daily_continuous_forward = cached_series
            result.dividend_type = div_type
            result.key = key
            return cached_series, False, start_str, end_str, result

        logger.info(
            f"Cache partially covers requested date range for forward timeseries. "
            f"Key: {key}. Fetching missing dates: {missing}"
        )
        return cached_series, True, min(missing), max(missing), None

    def _get_dividend_result(
        self,
        *,
        start_str: str,
        end_str: str,
        mat_str: str,
        div_type: DivType,
        dividend_result: Optional[DividendsResult],
        use_chain_spot: bool,
    ) -> DividendsResult:
        """Fetches or validates dividend data with adjustment consistency checks."""
        if dividend_result is None:
            dividend_result = DividendDataManager(symbol=self.symbol).get_schedule_timeseries(
                start_date=start_str,
                end_date=end_str,
                maturity_date=mat_str,
                div_type=div_type,
                undo_adjust=use_chain_spot,  # If using chain spot, back adjust dividends
            )

        if dividend_result.is_empty():
            raise ValueError("Dividend result is empty. Cannot compute forward prices without dividend information.")

        if dividend_result.undo_adjust != use_chain_spot:
            raise ValueError("Mismatch between dividend_result.undo_adjust and use_chain_spot. They must be the same.")

        return dividend_result

    def _load_spot(self, *, use_chain_spot: bool, spot: Optional[TimeseriesData] = None) -> pd.Series:
        """Loads spot or chain_spot price series."""
        if spot is None:
            spot = TS.get_timeseries(self.symbol, skip_preload_check=True)
        if use_chain_spot:
            return spot.chain_spot["close"]
        return spot.spot["close"]

    def _load_rates(self, *, start_str: str, end_str: str, rates: Optional[RatesResult] = None) -> pd.Series:
        """Loads risk-free rates for date range."""
        if rates is None:
            rates_data = RatesDataManager().get_risk_free_rate_timeseries(
                start_date=start_str,
                end_date=end_str,
                interval=Interval.EOD,
            )
            rates = rates_data.daily_risk_free_rates
        else:
            rates = rates.daily_risk_free_rates
        rates = rates[(rates.index >= pd.to_datetime(start_str)) & (rates.index <= pd.to_datetime(end_str))]
        return rates

    def _align_3(
        self, spot: pd.Series, rates: pd.Series, third: pd.Series, *, third_name: str
    ) -> Tuple[pd.Series, pd.Series, pd.Series]:
        """Aligns three series to common dates and validates no NaNs."""
        idx = spot.index.intersection(rates.index).intersection(third.index)

        spot = spot.reindex(idx)
        rates = rates.reindex(idx)
        third = third.reindex(idx)

        if rates.isna().any():
            raise ValueError("NaNs in rates after alignment.")
        if third.isna().any():
            raise ValueError(f"NaNs in {third_name} after alignment.")

        return spot, rates, third

    def _compute_forward_discrete(
        self,
        *,
        spot: pd.Series,
        rates: pd.Series,
        discrete_divs: pd.Series,  # series of Schedule objects
        mat_dt: date,
    ) -> pd.Series:
        """Computes forward prices using discrete dividend schedules."""

        
        pv_divs = vectorized_discrete_pv(
            schedules=discrete_divs.to_list(),
            r=rates.tolist(),
            _valuation_dates=discrete_divs.index.tolist(),
            _end_dates=[mat_dt] * len(discrete_divs),
        )
        pv_divs = [pv_divs] if isinstance(pv_divs, (int, float)) else pv_divs 

        second_vector = [(mat_dt - val).days * SECONDS_IN_DAY for val in discrete_divs.index]
        t = [val / SECONDS_IN_YEAR for val in second_vector]


        forwards = vectorized_forward_discrete(
            S=spot.tolist(),
            r=rates.tolist(),
            T=t,
            pv_divs=pv_divs,
        )
        return pd.Series(data=forwards, index=discrete_divs.index)

    def _compute_forward_continuous(
        self,
        *,
        spot: pd.Series,
        rates: pd.Series,
        continuous_divs: pd.Series,  # series of dividend yields
        mat_dt: date,
    ) -> pd.Series:
        """Computes forward prices using continuous dividend yields."""
        q_factor = get_vectorized_continuous_dividends(
            div_rates=continuous_divs.tolist(),
            _valuation_dates=continuous_divs.index.tolist(),
            _end_dates=[mat_dt] * len(continuous_divs),
        )

        second_vector = [(mat_dt - val).days * SECONDS_IN_DAY for val in continuous_divs.index]
        t = [val / SECONDS_IN_YEAR for val in second_vector]

        forwards = vectorized_forward_continuous(
            S=spot.tolist(),
            r=rates.tolist(),
            T=t,
            q_factor=q_factor,
        )
        return pd.Series(data=forwards, index=continuous_divs.index)

    def _merge_partial(self, cached_series: pd.Series, forward_series: pd.Series) -> pd.Series:
        """Merges newly computed data with cached data, keeping latest values."""
        merged = pd.concat([cached_series, forward_series])
        forward_series = merged[~merged.index.duplicated(keep="last")]
        return forward_series



    def get_forward_timeseries(
        self,
        start_date: Union[datetime, str],
        end_date: Union[datetime, str],
        maturity_date: Union[datetime, str],
        div_type: Optional[DivType] = None,
        spot: Optional[TimeseriesData] = None,
        rates: Optional[RatesResult] = None,
        *,
        dividend_result: Optional[DividendsResult] = None,
        use_chain_spot: bool = True,
    ) -> ForwardResult:
        """
        Returns a DAILY series (indexed by date) where each value is the forward price
        from that valuation date up to maturity_date.

        - start_date/end_date define the valuation date range
        - maturity_date is the fixed horizon (e.g., option expiry)
        """
        result = ForwardResult()
        og_start_date = start_date
        og_end_date = end_date
        div_type, start_dt, end_dt, mat_dt, start_str, end_str, mat_str = self._normalize_inputs(
            start_date=start_date,
            end_date=end_date,
            maturity_date=maturity_date,
            div_type=div_type,
        )

        if mat_dt < start_dt:
            raise ValueError("maturity_date must be >= start_date")

        key = self._build_key(mat_str=mat_str, div_type=div_type, use_chain_spot=use_chain_spot)

        cached_series, partial_hit, start_str, end_str, cached_result = self._try_get_cached(
            key=key,
            start_str=start_str,
            end_str=end_str,
            start_date=start_date,
            end_date=end_date,
            div_type=div_type,
        )
        if cached_result is not None:
            return cached_result
        
        dividend_result = self._get_dividend_result(
            start_str=start_str,
            end_str=end_str,
            mat_str=mat_str,
            div_type=div_type,
            dividend_result=dividend_result,
            use_chain_spot=use_chain_spot,
        )

        spot = self._load_spot(use_chain_spot=use_chain_spot, spot=spot) 
        rates = self._load_rates(start_str=start_str, end_str=end_str, rates=rates)

        if div_type == DivType.DISCRETE:
            discrete_divs = dividend_result.daily_discrete_dividends

            spot, rates, discrete_divs = self._align_3(
                spot=spot,
                rates=rates,
                third=discrete_divs,
                third_name="discrete dividend schedules",
            )

            forward_series = self._compute_forward_discrete(
                spot=spot,
                rates=rates,
                discrete_divs=discrete_divs,
                mat_dt=mat_dt,
            )

            result.daily_discrete_forward = forward_series
            result.dividend_result = dividend_result

        elif div_type == DivType.CONTINUOUS:
            continuous_divs = dividend_result.daily_continuous_dividends

            spot, rates, continuous_divs = self._align_3(
                spot=spot,
                rates=rates,
                third=continuous_divs,
                third_name="div yields",
            )

            forward_series = self._compute_forward_continuous(
                spot=spot,
                rates=rates,
                continuous_divs=continuous_divs,
                mat_dt=mat_dt,
            )

            result.daily_continuous_forward = forward_series
            result.dividend_result = dividend_result

        else:
            raise ValueError(f"Unsupported dividend type: {div_type}")

        result.dividend_type = div_type
        result.key = key

        if partial_hit:
            forward_series = self._merge_partial(cached_series=cached_series, forward_series=forward_series)

        self.cache_it(key, forward_series, expire=86400 / 2)  # 12 hours expiry

        forward_series = _data_structure_sanitize(
            forward_series,
            start=og_start_date,
            end=og_end_date,
        )

        if div_type == DivType.DISCRETE:
            result.daily_discrete_forward = forward_series
        else:
            result.daily_continuous_forward = forward_series

        return result

    def make_key(self, *, symbol, interval=None, artifact_type=None, series_id=None, **extra_parts):
        """Delegates to BaseDataManager key construction."""
        return super().make_key(
            symbol=symbol, interval=interval, artifact_type=artifact_type, series_id=series_id, **extra_parts
        )

    def cache_it(self, key, value, *, expire=None):
        """Merges and caches forward timeseries, excluding today's partial data."""
        ## Since it is a timeseries, we will append to existing if exists
        _data_structure_cache_it(self, key, value, expire=expire)
        return


    def get_forward(self, 
                    date: Union[datetime, str], 
                    maturity_date: Union[datetime, str],
                    div_type: Optional[DivType] = None,
                    dividend_result: Optional[DividendsResult] = None,
                    spot: Optional[TimeseriesData] = None,
                    rates: Optional[RatesResult] = None,
                    *, 
                    use_chain_spot: bool = True) -> ForwardResult:
        """
        Returns the forward price at a specific valuation datetime
        div_type = DivType(div_type) if div_type is not None else DivType.DISCRETE
        """
        div_type = DivType(div_type) if div_type is not None else DivType.DISCRETE
        date_str = date.strftime("%Y-%m-%d") if isinstance(date, datetime) else date
        mat_str = maturity_date.strftime("%Y-%m-%d") if isinstance(maturity_date, datetime) else maturity_date
        start = date_str
        end = date_str

        result = self.get_forward_timeseries(
            start_date=start,
            end_date=end,
            maturity_date=mat_str,
            div_type=div_type,
            use_chain_spot=use_chain_spot,
            dividend_result=dividend_result,
            spot=spot,
            rates=rates,
        )
        return result

        

    def offload(self, *args: Any, **kwargs: Any) -> None:
        """
        Example implementation of offload for ForwardDataManager.
        """
        print(f"No offload logic implemented for {self.CACHE_NAME}")


In [70]:
fwd_test = ForwardDataManager(symbol="COST")


In [71]:

fwd_discrete = fwd_test.get_forward_timeseries(
    start_date="2026-01-10",
    end_date="2026-01-14",
    maturity_date="2026-01-20",
    div_type=DivType.DISCRETE,
    use_chain_spot=False,
    spot=TS.get_timeseries("COST", skip_preload_check=True),
    rates=RatesDataManager().get_risk_free_rate_timeseries(
        start_date="2026-01-10",
        end_date="2026-01-14",
    ),
)

# fwd_cont = fwd_test.get_forward_timeseries(
#     start_date="2025-01-02",
#     end_date="2026-01-15",
#     maturity_date="2026-01-02",
#     div_type=DivType.CONTINUOUS,
#     use_chain_spot=False,
# )
fwd_discrete.daily_discrete_forward

2026-01-18 19:43:37 [test] __main__ INFO: Cache hit for risk-free rate timeseries key: symbol:^IRX|interval:eod|artifact_type:rates|series_id:hist
2026-01-18 19:43:37 [test] __main__ INFO: Cache fully covers requested date range for risk-free rate timeseries. Key: symbol:^IRX|interval:eod|artifact_type:rates|series_id:hist
Sanitizing data from 2026-01-10 to 2026-01-14...
2026-01-18 19:43:37 [test] __main__ INFO: Cache hit for forward timeseries key: symbol:COST|interval:eod|artifact_type:forward|series_id:hist|div_type:DISCRETE|maturity:2026-01-20|use_chain_spot:0
Sanitizing data from 2026-01-10 to 2026-01-14...


datetime
2026-01-12    943.810580
2026-01-13    942.573305
2026-01-14    951.536662
dtype: float64

In [72]:
div = testdiv.get_schedule(
    valuation_date="2026-01-14",
    maturity_date="2027-01-02",
    div_type=DivType.DISCRETE,
    undo_adjust=False,
)

rate = rt_manager.get_rate(
    date="2026-01-14",
)

spot = TS.get_timeseries("AAPL", skip_preload_check=True, start_date="2026-01-14", end_date="2026-01-14")

fwd_test.get_forward(
    date="2026-01-14",
    maturity_date="2026-01-20",
    div_type=DivType.DISCRETE,
    use_chain_spot=False,
    dividend_result=div,
    spot=spot,
    rates=rate
).daily_discrete_forward

2026-01-18 19:43:38 [test] __main__ INFO: Cache hit for key: symbol:AAPL|interval:na|artifact_type:divs|series_id:hist|current_state:SCHEDULE|lookback_years:1|method:CONSTANT|vendor:YFINANCE
2026-01-18 19:43:38 [test] __main__ INFO: Cache partially covers requested date range. Key: symbol:AAPL|interval:na|artifact_type:divs|series_id:hist|current_state:SCHEDULE|lookback_years:1|method:CONSTANT|vendor:YFINANCE. Fetching missing data.
2026-01-18 19:43:38 [test] trade.optionlib.assets.dividend INFO: Using dual projection method for ticker AAPL
2026-01-18 19:43:38 [test] trade.optionlib.assets.dividend INFO: Expected Dividend Size before adjustment: 12, for original valuation: 4. Size from historical divs: 8
2026-01-18 19:43:38 [test] trade.optionlib.assets.dividend INFO: Expected Dividend Size to be projected: 4
2026-01-18 19:43:38 [test] trade.optionlib.assets.dividend INFO: Projected Dividend List: [0.26, 0.26, 0.26, 0.26]
2026-01-18 19:43:38 [test] trade.optionlib.assets.dividend INFO:

datetime
2026-01-14    951.536662
dtype: float64

## Equity Market Timeseries

In [73]:
@dataclass
class SpotResult(Result):
    """Contains spot price data with optional split adjustment information."""
    daily_spot: Optional[pd.Series] = None
    undo_adjust: Optional[bool] = None
    key: Optional[str] = None

    def _additional_repr_fields(self) -> Dict[str, Any]:
        """Provides spot-specific fields for string representation."""
        return {
            "key": self.key,
            "is_empty": self.daily_spot is None or self.daily_spot.empty,
            "undo_adjust": self.undo_adjust,
        }
    def __repr__(self) -> str:
        return super().__repr__()


In [74]:
class SpotDataManager(BaseDataManager):
    """Manages spot price retrieval for a specific symbol with split adjustment support."""
    CACHE_NAME: ClassVar[str] = "spot_data_manager"
    DEFAULT_SERIES_ID: ClassVar["SeriesId"] = SeriesId.HIST
    INSTANCES = {}
    def __new__(cls, symbol: str, *args: Any, **kwargs: Any) -> "SpotDataManager":
        """Returns cached instance for symbol, creating new one if needed."""
        if symbol not in cls.INSTANCES:
            TS.load_timeseries(symbol, start_date=OPTION_TIMESERIES_START_DATE, end_date=datetime.now())
            instance = super(SpotDataManager, cls).__new__(cls)
            cls.INSTANCES[symbol] = instance
        return cls.INSTANCES[symbol]
    
    def __init__(self, symbol: str, *, cache_spec: Optional[CacheSpec] = None, enable_namespacing: bool = False) -> None:
        """Initializes manager once per symbol instance."""
        if getattr(self, "_initialized", False):
            return
        super().__init__(cache_spec=cache_spec, enable_namespacing=enable_namespacing)
        self.symbol = symbol

    def get_spot_timeseries(
        self,
        start_date: Union[datetime, str],
        end_date: Union[datetime, str],
        undo_adjust: bool = True,
    ) -> SpotResult:
        """Returns spot or chain_spot price series for date range from MarketTimeseries."""
        
        timeseries = TS.get_timeseries(self.symbol, skip_preload_check=True, start_date=start_date, end_date=end_date)
        if undo_adjust:
            spot_series = timeseries.chain_spot["close"]
        else:
            spot_series = timeseries.spot["close"]
        
        spot_series = _data_structure_sanitize(
            spot_series,
            start=start_date,
            end=end_date,
        )
        result = SpotResult()
        key = None # No caching key for now
        result.daily_spot = spot_series
        result.undo_adjust = undo_adjust
        result.key = key
        return result

    def get_at_time(
            self,
            date: Union[datetime, str],
    ) -> AtIndexResult:
        """Returns spot data at a specific datetime from MarketTimeseries."""
        
        return TS.get_at_index(sym=self.symbol, index=date)


In [75]:
test_spot = SpotDataManager(symbol="AAPL")
spot_result = test_spot.get_spot_timeseries(
    start_date="2026-01-10",
    end_date="2026-01-14",
    undo_adjust=True,
)
spot_result.daily_spot

Sanitizing data from 2026-01-10 to 2026-01-14...


datetime
2026-01-12    260.250000
2026-01-13    261.049988
2026-01-14    259.959991
Name: close, dtype: float64

## Option Spot

In [76]:


@dataclass
class OptionSpotResult(Result):
    """Container for option spot price timeseries data."""
    daily_option_spot: Optional[pd.DataFrame] = None
    key: Optional[str] = None
    endpoint_source: Optional[OptionSpotEndpointSource] = None

    @property
    def close(self) -> pd.Series:
        if not self.is_empty():
            return self.daily_option_spot["close"]
        else:
            return pd.Series(name="close", index=pd.DatetimeIndex([]), dtype=float)
        

    @property
    def midpoint(self) -> pd.Series:
        if not self.is_empty():
            return self.daily_option_spot["midpoint"]
        else:
            return pd.Series(name="midpoint", index=pd.DatetimeIndex([]), dtype=float)
    
    def is_empty(self) -> bool:
        """Checks if option spot data is missing or empty."""
        return self.daily_option_spot is None or self.daily_option_spot.empty
    
    def _additional_repr_fields(self) -> Dict[str, Any]:
        """Provides metadata on data presence."""
        return {
            "key": self.key,
            "is_empty": self.is_empty(),
            "endpoint_source": self.endpoint_source,
        }
        
    def __repr__(self) -> str:
        """Delegates to base Result repr."""
        return super().__repr__()
    


    

In [77]:
class OptionSpotDataManager(BaseDataManager):
    """Manages option spot price retrieval for a specific symbol from Thetadata API."""
    CACHE_NAME: str = "option_spot_manager"
    DEFAULT_SERIES_ID: str = SeriesId.HIST
    CONFIG = OptionDataConfig()
    INSTANCES = {}

    def __init__(
        self,
        symbol: str,
        *,
        cache_spec: Optional[CacheSpec] = None,
        enable_namespacing: bool = False,
    ) -> None:
        """Initializes manager for a specific symbol."""
        super().__init__(cache_spec=cache_spec, enable_namespacing=enable_namespacing)
        self.symbol = symbol

    def _sync_date(
        self,
        start_date: DATE_HINT,
        end_date: DATE_HINT,
        strike: Optional[float] = None,
        expiration: Optional[Union[datetime, str]] = None,
        right: Optional[str] = None,
    ) -> Tuple[DATE_HINT, DATE_HINT]:
        
        """"""

        dates = list_dates(
            symbol=self.symbol,
            exp=expiration,
            right=right,
            strike=strike,
        )

        dates = to_datetime(dates)
        min_date, max_date = min(dates), max(dates)
        start_date = max(min_date, start_date)
        end_date = min(end_date, max_date)

        return start_date, end_date
    
    def get_option_spot(
        self,
        date: Union[datetime, str],
        *,
        strike: Optional[float] = None,
        expiration: Optional[Union[datetime, str]] = None,
        right: Optional[str] = None,
        opttick: Optional[str] = None,
        endpoint_source: Optional[OptionSpotEndpointSource] = None,
    ) -> OptionSpotResult:
        """Fetches option spot price for a single date from Thetadata API."""
        date_str = pd.to_datetime(date).strftime("%Y-%m-%d") if isinstance(date, datetime) else date
        result = self.get_option_spot_timeseries(
            start_date=date_str,
            end_date=date_str,
            strike=strike,
            expiration=expiration,
            right=right,
            opttick=opttick,
            endpoint_source=endpoint_source,
        )
        return result

    def get_option_spot_timeseries(
        self,
        start_date: Union[datetime, str],
        end_date: Union[datetime, str],
        *,
        strike: Optional[float] = None,
        expiration: Optional[Union[datetime, str]] = None,
        right: Optional[str] = None,
        opttick: Optional[str] = None,
        endpoint_source: Optional[OptionSpotEndpointSource] = None,
    ) -> OptionSpotResult:
        """Fetches option spot price timeseries from Thetadata API."""
        if endpoint_source is None:
            endpoint_source = self.CONFIG.option_spot_endpoint_source
        
        strike, right, symbol, expiration = _handle_opttick_param(
            strike=strike,
            right=right,
            symbol=self.symbol,
            exp=expiration,
            opttick=opttick,
        )

        date_packet = DateRangePacket(start_date=start_date, end_date=end_date)
        start_date, end_date = date_packet.start_date, date_packet.end_date
        start_date, end_date = self._sync_date(
            start_date=start_date,
            end_date=end_date,
            strike=float(strike),
            expiration=expiration,
            right=right,
        )
        start_str, end_str = date_packet.start_str, date_packet.end_str
        
        # Construct cache key
        key = self.make_key(
            symbol=self.symbol,
            artifact_type=ArtifactType.OPTION_SPOT,
            series_id=SeriesId.HIST,
            endpoint_source=endpoint_source.value,
            interval=Interval.EOD,
            strike=strike,
            right=right,
            expiration=expiration,
        )


        
        # Check cache
        cached_data, is_partial, start_date, end_date = _check_cache_for_timeseries_data_structure(
            key=key,
            self=self,
            start_dt=start_date,
            end_dt=end_date,
        )

        if cached_data is not None and not is_partial:
            logger.info(f"Cache hit for option spot timeseries key: {key}")
            result = OptionSpotResult()
            result.daily_option_spot = cached_data
            result.key = key
            result.endpoint_source = endpoint_source
            return result
        elif is_partial:
            logger.info(f"Cache partially covers requested date range for option spot timeseries. Key: {key}. Fetching missing dates.")
        else:
            logger.info(f"No cache found for option spot timeseries key: {key}. Fetching from source.")
        
        # Fetch data from Thetadata API (placeholder logic)
        fetched_data = self._query_thetadata_api(
            start_date=start_date,
            end_date=end_date,
            endpoint_source=endpoint_source,
            strike=strike,
            expiration=expiration,
            right=right,
        )
        
        # Merge with cached data if partial
        if cached_data is not None and is_partial:
            merged = pd.concat([cached_data, fetched_data])
            fetched_data = merged[~merged.index.duplicated(keep='last')]

        fetched_data.index = default_timestamp(fetched_data.index)

        # Cache the fetched data
        _data_structure_cache_it(self, key, fetched_data)  # 24 hours expiry

        # Sanitize before returning
        fetched_data = _data_structure_sanitize(
            fetched_data,
            start=start_str,
            end=end_str,
        )
        
        result = OptionSpotResult()
        result.daily_option_spot = fetched_data
        result.key = key
        result.endpoint_source = endpoint_source
        return result


    def _query_thetadata_api(
        self,
        start_date: Union[datetime, str],
        end_date: Union[datetime, str],
        endpoint_source: OptionSpotEndpointSource,
        strike: Optional[float] = None,
        expiration: Optional[Union[datetime, str]] = None,
        right: Optional[str] = None,
    ) -> pd.DataFrame:
        """Placeholder method to simulate fetching option spot data from Thetadata API."""
        # In a real implementation, this method would make HTTP requests to Thetadata's API.
        if endpoint_source == OptionSpotEndpointSource.EOD:
            return retrieve_eod_ohlc(
                symbol=self.symbol,
                start_date=start_date,
                end_date=end_date,
                strike=float(strike),
                exp=expiration,
                right=right,
            )

        else:
            logger.info(f"Fetching option spot data from Thetadata Quote endpoint for {self.symbol} from {start_date} to {end_date}.")
        return quote_to_eod_patch(
            symbol=self.symbol,
            start_date=start_date,
            end_date=end_date,
            strike=float(strike),
            exp=expiration,
            right=right,
            ohlc_format=True,
        )

In [78]:
spot_option_manager = OptionSpotDataManager(symbol="AAPL")
data = spot_option_manager.get_option_spot_timeseries(
    start_date="2026-01-10",
    end_date="2026-01-14",
    endpoint_source=OptionSpotEndpointSource.QUOTE,
    strike=200,
    expiration="2028-03-17",
    right="C",
)
data.daily_option_spot

2026-01-18 19:43:39 [test] __main__ INFO: Cache hit for timeseries data structure key: symbol:AAPL|interval:eod|artifact_type:option_spot|series_id:hist|endpoint_source:QUOTE|expiration:2028-03-17|right:C|strike:200
Sanitizing data from 2026-01-10 00:00:00 to 2026-01-14 00:00:00...
2026-01-18 19:43:39 [test] __main__ INFO: Cache hit for option spot timeseries key: symbol:AAPL|interval:eod|artifact_type:option_spot|series_id:hist|endpoint_source:QUOTE|expiration:2028-03-17|right:C|strike:200


Unnamed: 0_level_0,open,high,low,close,volume,bid_size,closebid,ask_size,closeask,midpoint,weighted_midpoint
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2026-01-12,0.0,88.225,0.0,87.2,,31,86.8,29,87.6,87.2,87.186667
2026-01-13,0.0,88.2,0.0,87.85,,7,87.35,12,88.35,87.85,87.981579
2026-01-14,0.0,88.3,0.0,87.125,,17,86.55,16,87.7,87.125,87.107576


In [79]:
spot_option_manager = OptionSpotDataManager(symbol="AAPL")
data = spot_option_manager.get_option_spot(
    date="2026-01-12",
    endpoint_source=OptionSpotEndpointSource.EOD,
    strike=200,
    expiration="2028-03-17",
    right="C",
)
data.midpoint

2026-01-18 19:43:39 [test] __main__ INFO: Cache hit for timeseries data structure key: symbol:AAPL|interval:eod|artifact_type:option_spot|series_id:hist|endpoint_source:EOD|expiration:2028-03-17|right:C|strike:200
Sanitizing data from 2026-01-12 00:00:00 to 2026-01-12 00:00:00...
2026-01-18 19:43:39 [test] __main__ INFO: Cache hit for option spot timeseries key: symbol:AAPL|interval:eod|artifact_type:option_spot|series_id:hist|endpoint_source:EOD|expiration:2028-03-17|right:C|strike:200


datetime
2026-01-12    87.2
Name: midpoint, dtype: float64

In [None]:
c = list_contracts(symbol="AAPL", start_date="2026-01-16")
c.sort_values(by="expiration").query("strike == 200").head(50)

Unnamed: 0,root,expiration,strike,right
2874,AAPL,20260116,200.0,C
2875,AAPL,20260116,200.0,P
2960,AAPL,20260123,200.0,P
2962,AAPL,20260123,200.0,C
2880,AAPL,20260130,200.0,P
2879,AAPL,20260130,200.0,C
2965,AAPL,20260206,200.0,P
2966,AAPL,20260206,200.0,C
2888,AAPL,20260213,200.0,C
2887,AAPL,20260213,200.0,P


## Vol Manager

In [80]:
@dataclass
class VolatilityResult(Result):
    """Contains volatility surface data."""
    timeseries: Optional[pd.Series] = None
    key: Optional[str] = None

    def is_empty(self) -> bool:
        """Checks if volatility data is missing or empty."""
        return self.timeseries is None or self.timeseries.empty
    def _additional_repr_fields(self) -> Dict[str, Any]:
        """Provides volatility-specific fields for string representation."""
        return {
            "key": self.key,
            "is_empty": self.is_empty(),
        }
    def __repr__(self) -> str:
        return super().__repr__()



In [82]:
def time_distance_helper(start: datetime, end: datetime) -> float:
    """Calculates time distance in years between two dates."""
    delta = (to_datetime(end) - to_datetime(start)).days * SECONDS_IN_DAY
    return delta / SECONDS_IN_YEAR

In [83]:
# 

ts_start = "2025-01-01"
ts_end = "2026-01-18"
expiration = "2026-07-17"
market_price = (
    OptionSpotDataManager(symbol="AAPL")
    .get_option_spot_timeseries(
        start_date=ts_start,
        end_date=ts_end,
        strike=200,
        expiration=expiration,
        right="C",
    )
    .midpoint
)
dividends = DividendDataManager(symbol="AAPL").get_schedule_timeseries(
    start_date=market_price.index.min(),
    end_date=market_price.index.max(),
    maturity_date=expiration,
    div_type=DivType.DISCRETE,
)
dividends_res = vector_convert_to_time_frac(
    schedules=dividends.daily_discrete_dividends,
    valuation_dates=dividends.daily_discrete_dividends.index.tolist(),
    end_dates=[to_datetime(expiration)] * len(dividends.daily_discrete_dividends),
)

sigma = [0.2] * len(dividends_res)
r = RatesDataManager().get_risk_free_rate_timeseries(
    start_date=market_price.index.min(),
    end_date=market_price.index.max(),
).daily_risk_free_rates
T = [time_distance_helper(start=dt, end=expiration) for dt in dividends.daily_discrete_dividends.index]
S0 = SpotDataManager(symbol="AAPL").get_spot_timeseries(
    start_date=market_price.index.min(),
    end_date=market_price.index.max(),
).daily_spot

right = ["c"] * len(dividends.daily_discrete_dividends)
dividend_type = [DivType.DISCRETE.value] * len(dividends.daily_discrete_dividends)
K = [200.0] * len(dividends.daily_discrete_dividends)


2026-01-18 19:43:41 [test] __main__ INFO: Cache hit for timeseries data structure key: symbol:AAPL|interval:eod|artifact_type:option_spot|series_id:hist|endpoint_source:EOD|expiration:2026-07-17|right:C|strike:200
Sanitizing data from 2025-11-20 00:00:00 to 2026-01-16 00:00:00...
2026-01-18 19:43:41 [test] __main__ INFO: Cache hit for option spot timeseries key: symbol:AAPL|interval:eod|artifact_type:option_spot|series_id:hist|endpoint_source:EOD|expiration:2026-07-17|right:C|strike:200
2026-01-18 19:43:41 [test] __main__ INFO: Fetching discrete dividend schedule timeseries for AAPL from 2025-11-20 00:00:00 to 2026-01-16 00:00:00 with maturity 2026-07-17
2026-01-18 19:43:41 [test] __main__ INFO: Cache hit for discrete schedule timeseries key: symbol:AAPL|interval:eod|artifact_type:divs|series_id:hist|current_state:SCHEDULE_TIMESERIES|lookback_years:1|maturity:2026-07-17|method:CONSTANT|undo_adjust:1
2026-01-18 19:43:41 [test] __main__ INFO: Cache fully covers requested date range for t

In [91]:
i = 20
date = "2026-01-06"
crr_binomial_pricing(
    K = K[i],
    T = T[i],
    sigma = sigma[i],
    r = r.loc[dividends.daily_discrete_dividends.index[i]],
    S0 = S0.loc[dividends.daily_discrete_dividends.index[i]],
    dividend_type = dividend_type,
    dividends = (dividends_res[i].schedule),
    option_type = right[i],
    N = 100,
    american = True,
)

77.34927314808915

In [86]:
estimate_crr_implied_volatility(
    S=S0.loc[dividends.daily_discrete_dividends.index[i]],
    K=K[i],
    T=T[i],
    r=r.loc[dividends.daily_discrete_dividends.index[i]],
    market_price=market_price.loc[dividends.daily_discrete_dividends.index[i]],
    dividend_type=dividend_type[i],
    q=dividends_res[i].schedule,
    option_type=right[i],
    N=100,
)

np.float64(0.35225717942950563)

In [87]:
from trade.helpers.helper import get_parrallel_apply, runProcesses, runThreads
s = slice(0, 272)
s
len(r)
r
S0
dividends.daily_discrete_dividends

datetime
2025-11-20    ((2026-02-10, 0.26), (2026-05-10, 0.26))
2025-11-21    ((2026-02-10, 0.26), (2026-05-10, 0.26))
2025-11-24    ((2026-02-10, 0.26), (2026-05-10, 0.26))
2025-11-25    ((2026-02-10, 0.26), (2026-05-10, 0.26))
2025-11-26    ((2026-02-10, 0.26), (2026-05-10, 0.26))
2025-11-28    ((2026-02-10, 0.26), (2026-05-10, 0.26))
2025-12-01    ((2026-02-10, 0.26), (2026-05-10, 0.26))
2025-12-02    ((2026-02-10, 0.26), (2026-05-10, 0.26))
2025-12-03    ((2026-02-10, 0.26), (2026-05-10, 0.26))
2025-12-04    ((2026-02-10, 0.26), (2026-05-10, 0.26))
2025-12-05    ((2026-02-10, 0.26), (2026-05-10, 0.26))
2025-12-08    ((2026-02-10, 0.26), (2026-05-10, 0.26))
2025-12-09    ((2026-02-10, 0.26), (2026-05-10, 0.26))
2025-12-10    ((2026-02-10, 0.26), (2026-05-10, 0.26))
2025-12-11    ((2026-02-10, 0.26), (2026-05-10, 0.26))
2025-12-12    ((2026-02-10, 0.26), (2026-05-10, 0.26))
2025-12-15    ((2026-02-10, 0.26), (2026-05-10, 0.26))
2025-12-16    ((2026-02-10, 0.26), (2026-05-10, 0.26))
2

In [88]:
vector_crr_binomial_pricing(
    K[s],  # K
    T[s],  # T
    sigma[s],  # sigma
    r.tolist()[s],  # r
    ([250] * len(K))[s],  # N
    S0.tolist()[s],  # S0
    right[s],  # option_type
    ([True] * len(K))[s],  # american
    ([0.0] * len(K))[s],  # dividend_yield
    [dividends_res[i].schedule for i in range(len(dividends_res))][s],  # dividends,
    dividend_type[s],
)

[71.02630033668731,
 76.09229303220053,
 80.37353532681382,
 81.38808716012909,
 81.96444947401316,
 83.13808721068547,
 87.27711472403652,
 90.29754247077118,
 88.20660139858977,
 84.73446831932573,
 82.80205029879087,
 81.87362207160093,
 81.16549041598502,
 82.68146918792709,
 81.88873890121882,
 82.06495491180878,
 77.88984162045772,
 78.37087626259574,
 75.61148315977978,
 75.91054322876582,
 77.35073526416885,
 74.62624448060924,
 75.99617143789405,
 77.41758579513007,
 76.9557065376728,
 77.24149177132259,
 76.54931516941679,
 75.3282815804333,
 74.4296114685626,
 70.64548361128149,
 65.81246752064467,
 63.79651442825223,
 62.50227051241733,
 62.80793623535227,
 63.61869929257374,
 64.40448392746192,
 63.31367744975112,
 61.581577922293064,
 58.94114722939313]