In [4]:
import os
from pathlib import Path
import options_wizard as ow
import polars as pl
from dotenv import load_dotenv
load_dotenv()


True

In [5]:

def load_data(**kwargs) -> ow.DataType:
    """Loads in the commodity futures data"""
    import os
    from dotenv import load_dotenv
    import polars as pl

    tick = kwargs.get("tick", None)

    load_dotenv()
    cmdty_path = os.getenv("CMDTY_PATH", "").split(os.pathsep)[0]
    files = [f.path for f in os.scandir(cmdty_path) if f.is_file() and f.name.endswith(".parquet")]

    df = None
    for file in files:
        if tick == file.split('\\')[-1].replace('_FUT.parquet', ''):
            df = pl.scan_parquet(file)
    
    if df is None:
        raise ValueError(f"Tick {tick} not found in CMDTY_PATH")
    

    df = (
        df
        .with_columns(
        pl.col("LAST_TRADEABLE_DT")
        .str.strptime(pl.Date, format="%d/%m/%Y", strict=False)
        )
        .with_columns(
            pl.col("FUT_NOTICE_FIRST")
            .str.strptime(pl.Date, format="%d/%m/%Y", strict=False)
        )
        .with_columns(
            pl.col("Date")
            .cast(pl.Date)
        )
    )
    
    return ow.DataType(data = df, tick = tick)

def days_to_anchor(data: ow.DataType, **kwargs) -> ow.DataType:

    import pandas as pd
    import polars as pl
    import options_wizard as ow

    df = data._data
    tick = kwargs.get("tick", None)

    min_day = pd.Timestamp(
        df.select(pl.col("Date").min()).collect().item()
    )

    max_day = pd.Timestamp(
        df.select(pl.col("LAST_TRADEABLE_DT").max()).collect().item()
    )

    exchange = (
        df.select(pl.col("EXCH_CODE"))
        .unique()
        .collect()
        .item()
    )

    dates = ow.market_dates(
        exchange=exchange,
        lower=min_day,
        upper=max_day
    )

    calendar = (
        pl.LazyFrame({"Date": dates})
        .with_columns(pl.col("Date").cast(pl.Date))
        .with_row_index("TradeDateIdx")
        .sort("Date")               
    )

    df = df.sort("Date")

    df = (
        df.join_asof(
            calendar,
            on="Date",
            strategy="backward"
        )
        .filter(pl.col("TradeDateIdx").is_not_null())
        .rename({"TradeDateIdx": "DateIdx"})
    )

    calendar_ltd = (
        calendar
        .rename({
            "Date": "LAST_TRADEABLE_DT",
            "TradeDateIdx": "LtdIdx"
        })
        .sort("LAST_TRADEABLE_DT")   
    )

    df = df.sort("LAST_TRADEABLE_DT")

    df = (
        df.join_asof(
            calendar_ltd,
            on="LAST_TRADEABLE_DT",
            strategy="backward"
        )
        .filter(pl.col("LtdIdx").is_not_null())
    )

    df = df.with_columns(
        (pl.col("LtdIdx") - pl.col("DateIdx")).alias("DAYS_TO_ANCHOR")
    )
    df = df.with_columns(
        pl.col("DAYS_TO_ANCHOR")
        .min()
        .over("Date")
        .alias("DAYS_TO_FRONT_ANCHOR")
    )
    return ow.DataType(data = df, tick =tick )

def curve_structure(data: ow.DataType, **kwargs) -> ow.DataType:
    import polars as pl
    import options_wizard as ow

    df = data._data
    tick = kwargs.get("tick", None)

    # --- build clean cross-sectional curve (one price per tenor per day) ---
    curve = (
        df
        .select(["Date", "DAYS_TO_ANCHOR", "PX_SETTLE"])
        .group_by(["Date", "DAYS_TO_ANCHOR"])
        .agg(pl.col("PX_SETTLE").last())
        .sort(["Date", "DAYS_TO_ANCHOR"])
        .with_columns(
            pl.row_index().over("Date").alias("TenorIdx")
        )
    )

    # --- identify front / back / interior ---
    curve = curve.with_columns([
        pl.when(pl.col("TenorIdx") == 0)
          .then(pl.lit("front"))
          .when(pl.col("TenorIdx") == pl.max("TenorIdx").over("Date"))
          .then(pl.lit("back"))
          .otherwise(pl.lit("int"))
          .alias("CURVE_POS")
    ])

    # --- neighbour tenors in tenor space ---
    curve = curve.with_columns([
        pl.col("PX_SETTLE").shift(-1).over("Date").alias("f_p"),
        pl.col("PX_SETTLE").shift(1).over("Date").alias("f_m"),
        pl.col("DAYS_TO_ANCHOR").shift(-1).over("Date").alias("T_p"),
        pl.col("DAYS_TO_ANCHOR").shift(1).over("Date").alias("T_m"),
    ])

    # --- weighted second derivative (cross-sectional curvature) ---
    curve = curve.with_columns([
        (pl.col("T_p") - pl.col("DAYS_TO_ANCHOR")).alias("dt_fwd"),
        (pl.col("DAYS_TO_ANCHOR") - pl.col("T_m")).alias("dt_bwd"),
        (pl.col("T_p") - pl.col("T_m")).alias("dt_span"),
    ]).with_columns(
        pl.when(
            pl.all_horizontal([
                pl.col("f_p").is_not_null(),
                pl.col("f_m").is_not_null(),
                pl.col("dt_fwd") > 0,
                pl.col("dt_bwd") > 0,
            ])
        )
        .then(
            2.0 / pl.col("dt_span") * (
                (pl.col("f_p") - pl.col("PX_SETTLE")) / pl.col("dt_fwd")
                -
                (pl.col("PX_SETTLE") - pl.col("f_m")) / pl.col("dt_bwd")
            ) * (252.0 ** 2)
        )
        .otherwise(None)
        .alias("CURVATURE")
    )

    # --- relative curvature (scale-free) ---
    curve = curve.with_columns(
        (pl.col("CURVATURE") / pl.col("PX_SETTLE")).alias("REL_CURVATURE")
    )

    # --- keep only what we want to merge back ---
    curve = curve.select([
        "Date",
        "DAYS_TO_ANCHOR",
        "CURVE_POS",
        "REL_CURVATURE",
    ])

    # --- merge back to original dataframe ---
    out = df.join(
        curve,
        on=["Date", "DAYS_TO_ANCHOR"],
        how="left"
    )

    return ow.DataType(data=out, tick=tick)


data = load_data(tick = "CC")
data = days_to_anchor(data, tick = "CC")
data = curve_structure(data, tick = "CC")

In [6]:
out = data._data.collect().filter(pl.col("CURVE_POS") == "int")
out['REL_CURVATURE'].describe()

statistic,value
str,f64
"""count""",17897.0
"""null_count""",0.0
"""mean""",0.01201
"""std""",0.309006
"""min""",-2.93395
"""25%""",-0.077484
"""50%""",-0.017291
"""75%""",0.038237
"""max""",4.588169


In [7]:
import polars as pl

lf = (
    data._data.group_by("Date").agg(pl.col("DAYS_TO_ANCHOR").implode().alias("days_to_anchor"))  # collect into list per date.sort("Date")  # optional
)

df = lf.collect()

# Print each date followed by the DAYS_TO_ANCHOR values for its contracts
df = df.sort("Date")
for date, values in df.iter_rows():
    print(date, values)

2008-01-02 [51, 94, 139, 182, 247, 310]
2008-01-03 [50, 93, 138, 181, 246, 309]
2008-01-04 [49, 92, 137, 180, 245, 308]
2008-01-07 [48, 91, 136, 179, 244, 307]
2008-01-08 [47, 90, 135, 178, 243, 306]
2008-01-09 [46, 89, 134, 177, 242, 305]
2008-01-10 [45, 88, 133, 176, 241, 304]
2008-01-11 [44, 87, 132, 175, 240, 303]
2008-01-14 [43, 86, 131, 174, 239, 302]
2008-01-15 [42, 85, 130, 173, 238, 301]
2008-01-16 [41, 84, 129, 172, 237, 300]
2008-01-17 [40, 83, 128, 171, 236, 299]
2008-01-18 [39, 82, 127, 170, 235, 298]
2008-01-22 [37, 80, 125, 168, 233, 296]
2008-01-23 [36, 79, 124, 167, 232, 295]
2008-01-24 [35, 78, 123, 166, 231, 294]
2008-01-25 [34, 77, 122, 165, 230, 293]
2008-01-28 [33, 76, 121, 164, 229, 292]
2008-01-29 [32, 75, 120, 163, 228, 291]
2008-01-30 [31, 74, 119, 162, 227, 290]
2008-01-31 [30, 73, 118, 161, 226, 289]
2008-02-01 [29, 72, 117, 160, 225, 288]
2008-02-04 [28, 71, 116, 159, 224, 287]
2008-02-05 [27, 70, 115, 158, 223, 286]
2008-02-06 [26, 69, 114, 157, 222, 285]


In [None]:
for day = in 