## SkewSentry Tutorial Notebook

This notebook shows how to use SkewSentry to check training ↔ serving feature parity end-to-end using the programmatic API.

What you'll do:
- Create a small example dataset
- Load a FeatureSpec from YAML
- Use PythonFunctionAdapter for offline/online feature functions
- Run the parity check and view text/HTML/JSON reports

Prereqs:
- You have this repo installed in editable mode (e.g., `uv pip install -e ".[dev]"`).


In [None]:
import pandas as pd
from pathlib import Path

from skewsentry.spec import FeatureSpec
from skewsentry.adapters.python_func import PythonFunctionAdapter
from skewsentry.runner import run_check

# Paths
ROOT = Path('examples/simple')
SPEC_PATH = ROOT / 'features.yml'
OFFLINE = 'offline_features:build_features'
ONLINE = 'online_features:get_features'

# Create a tiny dataset
import numpy as np
np.random.seed(0)

df = pd.DataFrame({
    'user_id': [1,1,2,2,3,3,3],
    'ts': pd.to_datetime(['2024-01-01','2024-01-02','2024-01-01','2024-01-03','2024-01-01','2024-01-02','2024-01-03']),
    'price': [10, 10, 5, 5, 1, 1, 1],
    'qty':   [ 1,  2, 2, 2, 1, 1, 1],
    'country': ['UK','UK','US','US','DE','DE','DE'],
})

df.head()


In [None]:
# Load the spec and adapters
spec = FeatureSpec.from_yaml(str(SPEC_PATH))

# Make example modules importable in the notebook session
import sys
sys.path.insert(0, str(ROOT))

offline = PythonFunctionAdapter(OFFLINE)
online = PythonFunctionAdapter(ONLINE)

report = run_check(
    spec=spec,
    data=df,
    offline=offline,
    online=online,
    sample=None,
    html_out='artifacts/parity_report.html',
    json_out='artifacts/parity_report.json',
)

report.ok, report.summary['failing_features']


In [None]:
# View a concise text summary
print(report.to_text())
