# futcurves — real SR3 data test

End-to-end test of the library using **real Databento SR3 data** stored on Google Drive.

1. Authenticate Google Drive & download SR3 parquet
2. Parse contracts into `meta` and `panel` DataFrames
3. Build rolling universe
4. Build strip curve + holdings (smoothstep roll)
5. Inspect roll blending around a real roll date
6. Generate contract-level orders
7. Visualise curve, term structure, returns

**Before publishing:** remove `credentials.json`, `token.pickle`, and any hardcoded API keys.

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os, io, pickle, re
from datetime import datetime

from futcurves import (
    RollPolicy,
    build_rolling_universe,
    build_strip_curve,
    position_to_contract_orders,
)

## 1. Google Drive — download SR3 parquet

This pulls `sr3_curve_daily_3years.parquet` from the `Databento_Data` folder on your Drive.
If you already have it locally, skip the download cell.

In [None]:
from google.auth.transport.requests import Request
from google.oauth2.credentials import Credentials
from google_auth_oauthlib.flow import InstalledAppFlow
from googleapiclient.discovery import build
from googleapiclient.http import MediaIoBaseDownload

SCOPES = ["https://www.googleapis.com/auth/drive.file"]

def authenticate_google_drive():
    creds = None
    if os.path.exists("token.pickle"):
        with open("token.pickle", "rb") as f:
            creds = pickle.load(f)
    if not creds or not creds.valid:
        if creds and creds.expired and creds.refresh_token:
            creds.refresh(Request())
        else:
            flow = InstalledAppFlow.from_client_secrets_file("credentials.json", SCOPES)
            creds = flow.run_local_server(port=0)
        with open("token.pickle", "wb") as f:
            pickle.dump(creds, f)
    return build("drive", "v3", credentials=creds)


def download_from_drive(service, file_name, folder_name="Databento_Data"):
    q = f"name='{folder_name}' and mimeType='application/vnd.google-apps.folder' and trashed=false"
    folders = service.files().list(q=q, spaces="drive", fields="files(id)").execute().get("files", [])
    if not folders:
        raise FileNotFoundError(f"Folder '{folder_name}' not found on Drive")
    folder_id = folders[0]["id"]
    q = f"name='{file_name}' and '{folder_id}' in parents and trashed=false"
    files = service.files().list(q=q, spaces="drive", fields="files(id)").execute().get("files", [])
    if not files:
        raise FileNotFoundError(f"File '{file_name}' not found in '{folder_name}'")
    request = service.files().get_media(fileId=files[0]["id"])
    fh = io.FileIO(file_name, "wb")
    downloader = MediaIoBaseDownload(fh, request)
    done = False
    while not done:
        status, done = downloader.next_chunk()
    print(f"Downloaded {file_name}")
    return file_name


drive_service = authenticate_google_drive()
print("Google Drive authenticated")

In [None]:
PARQUET_FILE = "sr3_curve_daily_3years.parquet"

if not os.path.exists(PARQUET_FILE):
    download_from_drive(drive_service, PARQUET_FILE)

df_raw = pd.read_parquet(PARQUET_FILE)
print(f"Loaded {len(df_raw):,} rows")
print(f"Date range: {df_raw.index.min()} to {df_raw.index.max()}")
print(f"Columns: {df_raw.columns.tolist()}")
print(f"Unique symbols: {df_raw['symbol'].nunique()}")
df_raw.head()

## 2. Parse into `meta` and `panel`

Filter to single-leg quarterly contracts (`SR3[HMUZ]\d`), build the meta and panel DataFrames that `futcurves` expects.

In [None]:
# Filter to single-leg quarterly contracts
mask = df_raw["symbol"].str.match(r"^SR3[HMUZ]\d$")
df = df_raw[mask].copy()
print(f"After filter: {len(df):,} rows, {df['symbol'].nunique()} contracts")
print(f"Contracts: {sorted(df['symbol'].unique())}")

In [None]:
MONTH_MAP = {"H": 3, "M": 6, "U": 9, "Z": 12}


def sr3_expiry(symbol: str) -> pd.Timestamp:
    """Estimate expiry as 3rd Wednesday of the contract month."""
    month = MONTH_MAP[symbol[3]]
    year = 2020 + int(symbol[4])
    # 3rd Wednesday: find first day of month, advance to first Wed, then +14 days
    first = pd.Timestamp(year, month, 1)
    wed_offset = (2 - first.weekday()) % 7  # days until first Wednesday
    third_wed = first + pd.Timedelta(days=wed_offset + 14)
    return third_wed


contracts = sorted(df["symbol"].unique())
meta = pd.DataFrame({
    "contract": contracts,
    "expiry": [sr3_expiry(c) for c in contracts],
})
# last_trade_date is typically 2 bdays before IMM expiry for SR3
meta["last_trade_date"] = meta["expiry"] - pd.tseries.offsets.BDay(2)

print(f"meta: {len(meta)} contracts")
meta.head(10)

In [None]:
# Build panel: ts, contract, price
df["date"] = pd.to_datetime(df.index).normalize()
if df["date"].dt.tz is not None:
    df["date"] = df["date"].dt.tz_localize(None)

panel = df[["date", "symbol", "close"]].rename(
    columns={"date": "ts", "symbol": "contract", "close": "price"}
).reset_index(drop=True)

print(f"panel: {len(panel):,} rows")
print(f"Date range: {panel['ts'].min().date()} to {panel['ts'].max().date()}")
panel.head()

## 3. Build the rolling universe

In [None]:
n_positions = 20
start = str(panel["ts"].min().date())
end = str(panel["ts"].max().date())

universe = build_rolling_universe(meta, start, end, n_positions=n_positions)
print(f"universe shape: {universe.shape}")
print(f"{start} to {end}")
universe.head(10)

In [None]:
# When does the front contract roll?
front = universe[1]
roll_dates = front[front != front.shift()].dropna()
print(f"{len(roll_dates)} front contract rolls:")
for d, c in roll_dates.items():
    print(f"  {d.date()}  -> {c}")

## 4. Build strip curve + holdings

In [None]:
roll_policy = RollPolicy(
    weight_fn="smoothstep",
    roll_window_bdays=7,
    roll_end_offset_bdays=2,
)

curve_px, holdings = build_strip_curve(
    panel, universe, meta, n_positions=n_positions, roll_policy=roll_policy
)

print(f"curve_px shape: {curve_px.shape}")
print(f"NaN cells: {curve_px.isna().sum().sum()}")
curve_px.head(10)

## 5. Inspect roll blending around a real roll date

In [None]:
# Pick the second roll and show holdings for positions 1-5 through the window
roll_d = roll_dates.index[min(1, len(roll_dates) - 1)]
window = pd.date_range(
    roll_d - pd.tseries.offsets.BDay(10),
    roll_d + pd.tseries.offsets.BDay(3),
    freq="B",
)

print(f"Roll window around {roll_d.date()}\n")
for d in window:
    if d not in holdings:
        continue
    h1 = holdings[d].get(1, {})
    parts = "  ".join(f"{c}:{w:.3f}" for c, w in h1.items())
    print(f"  {d.date()}  pos1: {parts}")

In [None]:
# Blend weights table for position 1
blend_data = []
for d in window:
    if d in holdings:
        h = holdings[d].get(1, {})
        blend_data.append({"date": d, **{f"w({c})": w for c, w in h.items()}})

blend_df = pd.DataFrame(blend_data).set_index("date").fillna(0)
blend_df

## 6. Position signal -> contract orders

In [None]:
example_date = curve_px.index[-1]
notional = 1_000_000.0

for pos in [1, 5, 10, 20]:
    orders = position_to_contract_orders(
        holdings, example_date, position=pos, target_notional=-notional
    )
    print(f"pos {pos:>2d}: {orders}")

print(f"\nDate: {example_date.date()}")

In [None]:
# As Order dataclasses
order_objs = position_to_contract_orders(
    holdings, example_date, position=5, target_notional=-notional, as_dataclasses=True
)
for o in order_objs:
    print(o)

## 7. Visualise

In [None]:
# Strip curve time series
fig, ax = plt.subplots(figsize=(14, 5))
for pos in [1, 5, 10, 15, 20]:
    ax.plot(curve_px.index, curve_px[pos], label=f"pos {pos}", alpha=0.8)
ax.set_title("SR3 strip curve (real data, smoothstep roll)")
ax.set_ylabel("price")
ax.legend(ncol=5)
ax.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()

In [None]:
# Term structure snapshots (evenly spaced)
n_snapshots = 6
snap_idx = np.linspace(0, len(curve_px) - 1, n_snapshots, dtype=int)

fig, ax = plt.subplots(figsize=(10, 5))
for i in snap_idx:
    d = curve_px.index[i]
    ax.plot(
        range(1, n_positions + 1),
        curve_px.iloc[i].values,
        marker="o", markersize=4, label=str(d.date()),
    )
ax.set_xlabel("position")
ax.set_ylabel("price")
ax.set_title("SR3 term structure snapshots")
ax.legend()
ax.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()

In [None]:
# Returns heatmap
rets = curve_px.pct_change().dropna()

fig, ax = plt.subplots(figsize=(14, 5))
vbound = rets.values[np.isfinite(rets.values)]
vlim = np.percentile(np.abs(vbound), 99)
im = ax.pcolormesh(
    rets.index, rets.columns, rets.values.T,
    cmap="RdBu_r", vmin=-vlim, vmax=vlim, shading="auto",
)
fig.colorbar(im, ax=ax, label="daily return")
ax.set_ylabel("position")
ax.set_title("SR3 strip curve daily returns")
plt.tight_layout()
plt.show()

In [None]:
# Position return correlations
corr = rets.corr()

fig, ax = plt.subplots(figsize=(8, 7))
im = ax.imshow(corr.values, cmap="RdBu_r", vmin=corr.values.min(), vmax=1.0)
ax.set_xticks(range(n_positions))
ax.set_xticklabels(range(1, n_positions + 1))
ax.set_yticks(range(n_positions))
ax.set_yticklabels(range(1, n_positions + 1))
ax.set_title("Position return correlations")
fig.colorbar(im, ax=ax)
plt.tight_layout()
plt.show()

In [None]:
# Compare roll weight functions
u = np.linspace(0, 1, 200)
smoothstep = 3 * u**2 - 2 * u**3
linear = u
k = 10.0
logistic_raw = 1 / (1 + np.exp(-k * (u - 0.5)))
v0 = 1 / (1 + np.exp(-k * (-0.5)))
v1 = 1 / (1 + np.exp(-k * (0.5)))
logistic = np.clip((logistic_raw - v0) / (v1 - v0), 0, 1)

fig, ax = plt.subplots(figsize=(8, 4))
ax.plot(u, linear, label="linear")
ax.plot(u, smoothstep, label="smoothstep", linewidth=2)
ax.plot(u, logistic, label="logistic (k=10)", linestyle="--")
ax.set_xlabel("roll progress (u)")
ax.set_ylabel("weight on next contract")
ax.set_title("Roll weight functions")
ax.legend()
ax.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()

## Cleanup

Delete the downloaded parquet to save local space.

In [None]:
if os.path.exists(PARQUET_FILE):
    os.remove(PARQUET_FILE)
    print(f"Deleted {PARQUET_FILE}")