# 🛰️ Deployment — Model & **Data Monitoring** (SageMaker)

**Purpose:** Enable *production-grade monitoring* for the model deployed in `deployment_example.ipynb`:
- Turn on **data capture** at the endpoint
- Create **Model Monitor** schedules (Data Quality; optional: Model Quality)
- Generate/attach **baselines** (statistics + constraints)
- Inspect recent executions & **constraint violations**
- Add/propagate **lineage tags** to the endpoint

> Works in **SageMaker Studio – Code Editor** or locally (with AWS creds). Uses the same endpoint/models (CatBoost/XGBoost) you deployed earlier.

## 🧰 Prerequisites
- You already ran your prior **deployment** notebook and have a live SageMaker **endpoint**.
- You packaged your model with a valid **inference contract** (root `inference.py` & `requirements.txt`).
- (Optional) You produced a **baseline dataset** CSV (header=True) from your training/validation data.

If you haven't enabled data capture yet, this notebook will **update the endpoint config** to enable it.

## ♻️ Reproducibility & Environment Capture

In [None]:
import os, sys, json, platform, random, hashlib
from datetime import datetime
import numpy as np
import pandas as pd

SEED = 42
random.seed(SEED); np.random.seed(SEED)
RUN_TS = datetime.utcnow().strftime('%Y%m%dT%H%M%SZ')
RUN_ID = hashlib.sha1(f"{RUN_TS}-{SEED}".encode()).hexdigest()[:10]
ARTIFACT_DIR = os.environ.get('ARTIFACT_DIR', f"artifacts/monitoring_run_{RUN_TS}_{RUN_ID}")
os.makedirs(ARTIFACT_DIR, exist_ok=True)

env_info = {
    'python': sys.version,
    'platform': platform.platform(),
    'timestamp_utc': RUN_TS,
    'seed': SEED,
    'packages': {'pandas': pd.__version__, 'numpy': np.__version__}
}
with open(os.path.join(ARTIFACT_DIR, 'env_info.json'), 'w') as f:
    json.dump(env_info, f, indent=2)
env_info

## ⚙️ Configuration
Single source of truth for endpoint, capture, and monitor settings. **Adjust names/paths as needed.**

In [None]:
from pathlib import Path

CONFIG = {
    'aws': {
        'region': os.environ.get('AWS_REGION', 'eu-west-1'),
        'role': os.environ.get('SM_EXECUTION_ROLE', ''),  # if empty, we'll resolve from sagemaker
        'default_bucket': os.environ.get('SM_DEFAULT_BUCKET', ''),
    },
    'deployment': {
        # Endpoint deployed previously (Champion or Candidate)
        'endpoint_name': os.environ.get('ENDPOINT_NAME', 'mlops-demo-endpoint'),
        # Optional: tags propagated onto the endpoint
        'lineage_tags': {
            'candidate_run_id': os.environ.get('CANDIDATE_RUN_ID', ''),
            'data_version': os.environ.get('DATA_VERSION', ''),
            'model_family': os.environ.get('MODEL_FAMILY', ''),  # e.g., catboost|xgboost|sklearn
        },
        'data_capture': {
            'enable': True,
            'capture_percentage': 100,           # 0-100
            'kms_key_id': os.environ.get('CAPTURE_KMS_KEY', ''),
            's3_prefix': os.environ.get('CAPTURE_S3_PREFIX', f"s3://{os.environ.get('SM_DEFAULT_BUCKET','')}/data-capture/{datetime.utcnow().strftime('%Y%m%d')}")
        }
    },
    'monitoring': {
        'enable': True,
        'instance_type': 'ml.m5.large',
        'volume_size_gb': 30,
        'max_runtime_seconds': 3600,
        'schedule_cron': 'cron(0 */4 * * ? *)',   # every 4 hours
        # Provide either a ready CSV (with header) or let this notebook export one from parquet
        'baseline_dataset_uri': os.environ.get('BASELINE_DATASET_URI', ''),
        'fallback_parquet_uri': os.environ.get('BASELINE_PARQUET_URI', ''),  # e.g., s3://.../processed/dataset.parquet
        'baseline_sample_rows': 25000,
        'problem_type': 'BinaryClassification',  # for ModelQuality monitor
        'ground_truth_s3_uri': os.environ.get('GROUND_TRUTH_S3_URI', ''),    # CSV with ground-truth labels matching capture
    }
}
CONFIG

## 🔌 AWS Session & Endpoint
Resolve session, role, and sanity-check that the endpoint exists.

In [None]:
import boto3, sagemaker
from sagemaker import Session
from sagemaker.session import production_variant
from botocore.exceptions import ClientError

region = CONFIG['aws']['region']
sm_sess = sagemaker.Session()
sm_client = sm_sess.sagemaker_client
runtime_sm = sm_sess.sagemaker_runtime_client
role = CONFIG['aws']['role'] or sagemaker.get_execution_role()
bucket = CONFIG['aws']['default_bucket'] or sm_sess.default_bucket()

endpoint_name = CONFIG['deployment']['endpoint_name']
print('Region:', region)
print('Role  :', role)
print('Bucket:', bucket)
print('Endpoint:', endpoint_name)

# Validate endpoint exists
try:
    desc = sm_client.describe_endpoint(EndpointName=endpoint_name)
    print('Endpoint status:', desc['EndpointStatus'])
except ClientError as e:
    raise RuntimeError(f"Endpoint '{endpoint_name}' not found or not accessible: {e}")

## 🎯 Add/Update Lineage Tags on Endpoint

In [None]:
tags = [{ 'Key': k, 'Value': v } for k, v in CONFIG['deployment']['lineage_tags'].items() if v]
if tags:
    sm_client.add_tags(ResourceArn=desc['EndpointArn'], Tags=tags)
    print('✅ Added/updated lineage tags on endpoint:', {t['Key']: t['Value'] for t in tags})
else:
    print('No lineage tags provided; skipping.')

## 📡 Ensure **Data Capture** Is Enabled
If disabled, update the endpoint configuration to turn it on.

In [None]:
from sagemaker.model_monitor import DataCaptureConfig

cap_cfg = CONFIG['deployment']['data_capture']
capture_prefix = cap_cfg['s3_prefix']

# Discover current config
ep_desc = sm_client.describe_endpoint(EndpointName=endpoint_name)
epc_name = ep_desc['EndpointConfigName']
epc_desc = sm_client.describe_endpoint_config(EndpointConfigName=epc_name)
already_enabled = 'DataCaptureConfig' in epc_desc

if already_enabled:
    print('ℹ️ Data capture already enabled on the endpoint config:', epc_name)
else:
    print('Enabling data capture on a new endpoint config…')
    # Recreate endpoint config with capture
    variants = epc_desc['ProductionVariants']
    new_epc_name = f"{epc_name}-cap-{RUN_ID}"
    
    data_capture = {
        'EnableCapture': cap_cfg['enable'],
        'InitialSamplingPercentage': int(cap_cfg['capture_percentage']),
        'DestinationS3Uri': capture_prefix,
        'KmsKeyId': cap_cfg['kms_key_id'] or None,
        'CaptureOptions': [{'CaptureMode': 'Input'}, {'CaptureMode': 'Output'}],
        'CaptureContentTypeHeader': {'CsvContentTypes': ['text/csv'], 'JsonContentTypes': ['application/json']}
    }
    
    sm_client.create_endpoint_config(
        EndpointConfigName=new_epc_name,
        ProductionVariants=variants,
        DataCaptureConfig=data_capture
    )
    sm_client.update_endpoint(EndpointName=endpoint_name, EndpointConfigName=new_epc_name)
    print('⏳ Updating endpoint to new config with capture… (check console for progress)')

print('Capture S3 prefix:', capture_prefix)

## 🧱 Prepare / Locate a **Baseline Dataset**
Model Monitor needs a representative CSV with header for **Data Quality** baselines.
- If you already have one, set `CONFIG['monitoring']['baseline_dataset_uri']` to S3 path.
- Otherwise, we can **export a CSV** from a processed parquet (sample).

In [None]:
import s3fs

mon_cfg = CONFIG['monitoring']
baseline_uri = mon_cfg['baseline_dataset_uri']

if not baseline_uri and mon_cfg['fallback_parquet_uri']:
    print('🔁 Building a baseline CSV from parquet sample…')
    df_parq = pd.read_parquet(mon_cfg['fallback_parquet_uri'])
    # Simple sample; consider stratified/time-based sampling for your case
    n = min(len(df_parq), mon_cfg['baseline_sample_rows'])
    df_sample = df_parq.sample(n=n, random_state=SEED)
    baseline_uri = f"s3://{bucket}/monitoring/baseline/{RUN_TS}/baseline.csv"
    df_sample.to_csv(baseline_uri, index=False)
    print('✅ Baseline CSV written to:', baseline_uri)
elif baseline_uri:
    print('Using provided baseline CSV:', baseline_uri)
else:
    print('⚠️ No baseline provided and no parquet fallback set. You can still create a schedule without baselines, but it is recommended to provide them.')

## 📈 Create **Data Quality** Monitoring Schedule (DefaultModelMonitor)
Generates baselines (if available) and creates a recurring schedule. This expands the snippet you provided.

In [None]:
from sagemaker.model_monitor import DefaultModelMonitor, DatasetFormat

if CONFIG['monitoring']['enable'] and CONFIG['deployment']['data_capture']['enable']:
    try:
        mon = DefaultModelMonitor(
            role=role,
            instance_count=1,
            instance_type=CONFIG['monitoring']['instance_type'],
            volume_size_in_gb=CONFIG['monitoring']['volume_size_gb'],
            max_runtime_in_seconds=CONFIG['monitoring']['max_runtime_seconds'],
            sagemaker_session=sm_sess,
        )
        stats_s3, cons_s3 = None, None
        if baseline_uri:
            print('Suggesting baselines from:', baseline_uri)
            baseline_job = mon.suggest_baseline(
                baseline_dataset=baseline_uri,
                dataset_format=DatasetFormat.csv(header=True),
                output_s3_uri=f"s3://{bucket}/monitoring/baseline/{RUN_TS}",
                wait=False
            )
            print('Baseline suggestion started. Job name:', baseline_job.job_name)
            # We won't block on baseline job; schedule can be created without explicit stats/constraints
        schedule_name = f"monitor-{CONFIG['deployment']['endpoint_name']}"
        mon.create_monitoring_schedule(
            monitor_schedule_name=schedule_name,
            endpoint_input=CONFIG['deployment']['endpoint_name'],
            statistics=None,   # could point to baseline output s3 uris once completed
            constraints=None,
            schedule_cron_expression=CONFIG['monitoring']['schedule_cron']
        )
        print('✅ Monitoring schedule created:', schedule_name)
    except Exception as e:
        print('⚠️ Model Monitor setup skipped/failed:', e)
else:
    print('Monitoring disabled or data capture off.')

## (Optional) 🧪 Model Quality Monitor
Requires **ground-truth labels** uploaded to S3. Only enable if you have a reliable feedback pipeline.

In [None]:
from sagemaker.model_monitor import ModelQualityMonitor

gt_uri = CONFIG['monitoring']['ground_truth_s3_uri']
if CONFIG['monitoring']['enable'] and gt_uri:
    try:
        mq = ModelQualityMonitor(
            role=role,
            instance_count=1,
            instance_type=CONFIG['monitoring']['instance_type'],
            volume_size_in_gb=CONFIG['monitoring']['volume_size_gb'],
            max_runtime_in_seconds=CONFIG['monitoring']['max_runtime_seconds'],
            sagemaker_session=sm_sess,
            problem_type=CONFIG['monitoring']['problem_type']
        )
        mq_schedule = f"model-quality-{CONFIG['deployment']['endpoint_name']}"
        mq.create_monitoring_schedule(
            monitor_schedule_name=mq_schedule,
            endpoint_input=CONFIG['deployment']['endpoint_name'],
            ground_truth_input=gt_uri,
            schedule_cron_expression=CONFIG['monitoring']['schedule_cron']
        )
        print('✅ ModelQuality schedule created:', mq_schedule)
    except Exception as e:
        print('⚠️ ModelQuality setup skipped/failed:', e)
else:
    print('ModelQuality disabled or ground truth not provided.')

## 🔍 Inspect Schedules & Latest Execution

In [None]:
def list_schedules(prefix: str):
    res = sm_client.list_monitoring_schedules(NameContains=prefix)
    return pd.DataFrame(res.get('MonitoringScheduleSummaries', []))

sched_df = list_schedules(f"monitor-{endpoint_name}")
sched_df

In [None]:
def latest_exec(sched_name: str):
    res = sm_client.describe_monitoring_schedule(MonitoringScheduleName=sched_name)
    summary = res['MonitoringScheduleStatus']
    last_exec = res.get('LastMonitoringExecutionSummary')
    return summary, last_exec

if not sched_df.empty:
    sname = sched_df.iloc[0]['MonitoringScheduleName']
    status, last = latest_exec(sname)
    print('Schedule:', sname, '| Status:', status)
    print('Last execution summary:')
    print(json.dumps(last or {}, indent=2, default=str))
else:
    print('No schedules found (yet).')

## 🧾 Read Constraint Violations (if any)

In [None]:
import re, io, gzip
import botocore

def read_s3_json(s3_uri):
    s3 = boto3.resource('s3')
    m = re.match(r's3://([^/]+)/(.+)$', s3_uri)
    if not m: return None
    b, k = m.group(1), m.group(2)
    obj = s3.Object(b, k).get()['Body'].read()
    try:
        return json.loads(obj)
    except Exception:
        try:
            return json.loads(gzip.decompress(obj))
        except Exception:
            return None

def show_latest_violations(sched_name: str):
    res = sm_client.describe_monitoring_schedule(MonitoringScheduleName=sched_name)
    last = res.get('LastMonitoringExecutionSummary', {})
    rep = last.get('MonitoringOutput', {}).get('S3Output', {}).get('S3Uri')
    if not rep:
        print('No execution outputs yet.')
        return
    # Typical keys contain statistics.json / constraints.json
    print('Execution output S3:', rep)
    # Users can browse in S3 console; programmatic listing requires S3 ListObjects which we omit for brevity.

if not sched_df.empty:
    show_latest_violations(sched_df.iloc[0]['MonitoringScheduleName'])
else:
    print('No schedules to inspect.')

## 🧭 Peek at Captured Records (sample)

In [None]:
import s3fs, glob
from urllib.parse import urlparse

cap_uri = CONFIG['deployment']['data_capture']['s3_prefix']
print('Capture prefix:', cap_uri)
fs = s3fs.S3FileSystem()
try:
    # This lists objects, but the exact path includes endpoint name/date; we glob broadly
    bucket_path = cap_uri.replace('s3://', '')
    bucket, prefix = bucket_path.split('/', 1)
    objs = fs.glob(f"{bucket}/{prefix}/**/*.jsonl*")
    print('Found captured files:', len(objs))
    if objs:
        with fs.open(objs[0], 'rb') as f:
            raw = f.read()
        try:
            txt = raw.decode('utf-8')
        except Exception:
            import gzip
            txt = gzip.decompress(raw).decode('utf-8')
        print('--- SAMPLE CAPTURED RECORD ---')
        print('\n'.join(txt.splitlines()[:3]))
    else:
        print('No capture files yet. Send traffic to the endpoint and re-run.')
except Exception as e:
    print('Could not list/read capture files:', e)

## 🧹 (Optional) Clean Up
Helpers to delete schedules. **Do not run in production without a plan.**

In [None]:
def delete_schedule(name: str):
    try:
        sm_client.delete_monitoring_schedule(MonitoringScheduleName=name)
        print('Deleted schedule:', name)
    except ClientError as e:
        print('Skip/delete failed:', e)

# Example:
# delete_schedule(f"monitor-{endpoint_name}")

## ✅ Summary
- Data capture **enabled** or verified
- Data Quality **schedule** created (and Model Quality optional)
- **Baselines** suggested (if dataset provided)
- **Lineage tags** attached to endpoint
- Utilities to **inspect executions** and **peek captures**

Next steps: wire **Model Monitor** baselines & constraints into your **validation gates**, configure **bias/explainability** monitors, and connect CloudWatch alarms to operational SLOs.


## 🧠 Bias & Explainability Monitors (SageMaker Clarify)

Set up **Model Bias** and **Model Explainability** monitors using SageMaker Clarify.
- **Bias monitor** compares baseline/captured data across selected facets (e.g., `gender`, `age_bucket`) and label.
- **Explainability monitor** computes **SHAP** at scheduled intervals for drift in feature attributions.

> Provide minimal config in `CONFIG['clarify']` or this section will skip with guidance.


In [None]:

# Clarify configuration block
CONFIG.setdefault('clarify', {
    'enable_bias': True,
    'enable_explainability': True,
    # Label column name in *ground truth* for bias; for model quality/bias (post-training) you need ground truth.
    'label': os.environ.get('LABEL_COL', ''),
    # Facet (protected) columns to analyze fairness across (must be present in baseline/capture/ground-truth)
    'facet_cols': [c for c in os.environ.get('FACET_COLS', 'sex,age_bucket').split(',') if c],
    # CSV headers for inference payloads (if capturing CSV)
    'headers': [h for h in os.environ.get('CLARIFY_HEADERS', '').split(',') if h],
    # Inference config (content-type etc.); adjust to your endpoint contract
    'predictor_config': {
        'content_type': os.environ.get('PRED_CONTENT_TYPE', 'text/csv'),  # or 'application/json'
        'accept_type': os.environ.get('PRED_ACCEPT', 'application/json'),
        'probability_attribute': os.environ.get('PRED_PROBA_ATTR', ''),   # e.g., 'probabilities' for JSON
        'label_headers': [os.environ.get('LABEL_HEADER', '')] if os.environ.get('LABEL_HEADER') else []
    },
    # SHAP baseline sample size
    'explainability': {
        'shap_baseline_rows': int(os.environ.get('SHAP_BASELINE_ROWS', '200')),
        'seed': SEED
    }
})
CONFIG['clarify']



### Create Model **Bias** Monitoring Schedule

Requires:
- `CONFIG['monitoring']['baseline_dataset_uri']` (CSV w/ header) **and/or** a capture dataset with ground-truth mapping
- `CONFIG['clarify']['label']` and `facet_cols`
- Ground truth CSV in `CONFIG['monitoring']['ground_truth_s3_uri']`


In [None]:

from sagemaker.model_monitor import ModelBiasMonitor

clar = CONFIG['clarify']
gt_uri = CONFIG['monitoring']['ground_truth_s3_uri']
bias_sched_name = f"bias-{CONFIG['deployment']['endpoint_name']}"

if CONFIG['monitoring']['enable'] and clar.get('enable_bias') and gt_uri and clar.get('label'):
    try:
        bias_mon = ModelBiasMonitor(
            role=role,
            instance_count=1,
            instance_type=CONFIG['monitoring']['instance_type'],
            volume_size_in_gb=CONFIG['monitoring']['volume_size_gb'],
            max_runtime_in_seconds=CONFIG['monitoring']['max_runtime_seconds'],
            sagemaker_session=sm_sess
        )
        # Configure bias (post-training bias with ground truth)
        from sagemaker.clarify import BiasConfig, ModelPredictedLabelConfig, ModelConfig

        predicted_label_cfg = ModelPredictedLabelConfig(
            label=clar['label'], probability=clar['predictor_config'].get('probability_attribute') or None
        )
        bias_cfg = BiasConfig(
            label_values_or_threshold=[1],           # adjust for your positive class
            facet_name=clar['facet_cols'][0] if clar['facet_cols'] else None,
            facet_values_or_threshold=None,
        )
        model_cfg = ModelConfig(
            model_name=None,                         # not needed for endpoint monitoring
            instance_type=CONFIG['monitoring']['instance_type'],
            instance_count=1,
            accept_type=clar['predictor_config']['accept_type'],
            content_type=clar['predictor_config']['content_type'],
            endpoint_name=CONFIG['deployment']['endpoint_name']
        )
        bias_mon.create_monitoring_schedule(
            monitor_schedule_name=bias_sched_name,
            endpoint_input=CONFIG['deployment']['endpoint_name'],
            ground_truth_input=gt_uri,
            bias_config=bias_cfg,
            model_config=model_cfg,
            model_predicted_label_config=predicted_label_cfg,
            schedule_cron_expression=CONFIG['monitoring']['schedule_cron']
        )
        print("✅ Bias monitoring schedule created:", bias_sched_name)
    except Exception as e:
        print("⚠️ Bias monitor setup skipped/failed:", e)
else:
    print("Bias monitor disabled or missing label/ground truth.")



### Create Model **Explainability** (SHAP) Monitoring Schedule

Computes SHAP values on a sample of captured requests to track changes in feature importance patterns.


In [None]:

from sagemaker.model_monitor import ModelExplainabilityMonitor
from sagemaker.clarify import SHAPConfig, ModelConfig

expl_sched_name = f"explain-{CONFIG['deployment']['endpoint_name']}"

if CONFIG['monitoring']['enable'] and clar.get('enable_explainability'):
    try:
        shap_cfg = SHAPConfig(
            baseline=None,  # when None, Clarify samples baseline from captured data
            num_samples=clar['explainability']['shap_baseline_rows'],
            agg_method='mean_abs',  # default aggregation
            use_logit=False,
            seed=clar['explainability']['seed']
        )
        model_cfg = ModelConfig(
            model_name=None,
            instance_type=CONFIG['monitoring']['instance_type'],
            instance_count=1,
            accept_type=clar['predictor_config']['accept_type'],
            content_type=clar['predictor_config']['content_type'],
            endpoint_name=CONFIG['deployment']['endpoint_name']
        )
        exp_mon = ModelExplainabilityMonitor(
            role=role,
            instance_count=1,
            instance_type=CONFIG['monitoring']['instance_type'],
            volume_size_in_gb=CONFIG['monitoring']['volume_size_gb'],
            max_runtime_in_seconds=CONFIG['monitoring']['max_runtime_seconds'],
            sagemaker_session=sm_sess
        )
        exp_mon.create_monitoring_schedule(
            monitor_schedule_name=expl_sched_name,
            endpoint_input=CONFIG['deployment']['endpoint_name'],
            explainability_config=shap_cfg,
            model_config=model_cfg,
            schedule_cron_expression=CONFIG['monitoring']['schedule_cron']
        )
        print("✅ Explainability monitoring schedule created:", expl_sched_name)
    except Exception as e:
        print("⚠️ Explainability monitor setup skipped/failed:", e)
else:
    print("Explainability monitor disabled.")



## 📊 Time‑based Drift: **Temporal Distribution** Checks (from Baseline or Capture)

If your **training (baseline) data** contain a timestamp column, we compute rolling metrics by time window and compare to recent **captured** data using **PSI** and **KS** tests. Results are saved to artifacts.


In [None]:

import re, json, gzip
import numpy as np
import pandas as pd
from scipy.stats import ks_2samp

def compute_psi(expected: np.ndarray, actual: np.ndarray, buckets=10) -> float:
    expected = expected[~np.isnan(expected)]
    actual = actual[~np.isnan(actual)]
    if len(expected) < 10 or len(actual) < 10: 
        return np.nan
    # quantile bins off expected
    quantiles = np.linspace(0, 1, buckets+1)
    cuts = np.unique(np.quantile(expected, quantiles))
    expected_bins = np.digitize(expected, cuts[1:-1], right=False)
    actual_bins = np.digitize(actual, cuts[1:-1], right=False)
    e_counts = np.bincount(expected_bins, minlength=len(cuts)-1).astype(float)
    a_counts = np.bincount(actual_bins, minlength=len(cuts)-1).astype(float)
    e_prop = np.clip(e_counts / e_counts.sum(), 1e-6, None)
    a_prop = np.clip(a_counts / a_counts.sum(), 1e-6, None)
    psi = np.sum((a_prop - e_prop) * np.log(a_prop / e_prop))
    return float(psi)

def coerce_timestamp(s: pd.Series):
    for cand in ['timestamp','event_time','ts','date','datetime','created_at']:
        if cand in s.index or cand == s.name:
            pass
    # generic
    if pd.api.types.is_datetime64_any_dtype(s):
        return s
    try:
        return pd.to_datetime(s, errors='coerce')
    except Exception:
        return pd.to_datetime(pd.Series([None]*len(s)))

def load_baseline_df(uri: str) -> pd.DataFrame:
    if not uri: 
        return pd.DataFrame()
    if uri.endswith('.csv'):
        return pd.read_csv(uri)
    if uri.endswith('.parquet') or '.parquet' in uri:
        return pd.read_parquet(uri)
    return pd.DataFrame()

# 1) Load baseline and infer timestamp column
baseline_uri = CONFIG['monitoring'].get('baseline_dataset_uri') or CONFIG['monitoring'].get('fallback_parquet_uri')
df_base = load_baseline_df(baseline_uri)
ts_col = None
if not df_base.empty:
    for c in df_base.columns:
        if pd.api.types.is_datetime64_any_dtype(df_base[c]) or re.search(r'(time|date)', c, re.I):
            ts_col = c; break

if ts_col:
    df_base[ts_col] = pd.to_datetime(df_base[ts_col], errors='coerce')
    base_recent = df_base.dropna(subset=[ts_col]).copy()
    # Choose a few numeric columns for drift checks
    num_cols = [c for c in base_recent.columns if c != ts_col and pd.api.types.is_numeric_dtype(base_recent[c])]
    num_cols = num_cols[:10]  # cap for brevity
    print('Timestamp column:', ts_col, '| Features checked:', num_cols)

    # 2) Load a small sample from data capture to compare
    cap_prefix = CONFIG['deployment']['data_capture']['s3_prefix'].replace('s3://','')
    if '/' in cap_prefix:
        bucket, prefix = cap_prefix.split('/', 1)
    else:
        bucket, prefix = cap_prefix, ''

    import s3fs
    fs = s3fs.S3FileSystem()
    keys = fs.glob(f"{bucket}/{prefix}/**/*.jsonl*")[:5]  # take a few recent capture files
    frames = []
    for k in keys:
        with fs.open(k, 'rb') as f:
            raw = f.read()
        try:
            txt = raw.decode('utf-8')
        except Exception:
            txt = gzip.decompress(raw).decode('utf-8')
        lines = [json.loads(l) for l in txt.strip().splitlines() if l.strip()]
        # Try to parse 'request'/'response' bodies. Adjust according to your inference contract.
        for rec in lines:
            # CSV request example
            req = rec.get('request',{}).get('body','')
            if isinstance(req, str) and ',' in req and '\n' not in req and len(clar.get('headers',[]))>0:
                try:
                    row = pd.read_csv(pd.compat.StringIO(req), header=None).iloc[0].to_dict()
                    row = {clar['headers'][i]: v for i, v in enumerate(row.values()) if i < len(clar['headers'])}
                    frames.append(pd.DataFrame([row]))
                except Exception:
                    pass
            # JSON request example
            elif isinstance(req, (dict,list)):
                frames.append(pd.json_normalize(req))
    df_cap = pd.concat(frames, ignore_index=True) if frames else pd.DataFrame()
    if not df_cap.empty and ts_col in df_cap.columns:
        df_cap[ts_col] = pd.to_datetime(df_cap[ts_col], errors='coerce')

    # 3) Time-window summaries & drift
    results = []
    if not base_recent.empty and not df_cap.empty:
        # align columns
        common = [c for c in num_cols if c in df_cap.columns]
        for c in common:
            base_vals = base_recent[c].astype(float).values
            cap_vals = df_cap[c].astype(float).values
            psi = compute_psi(base_vals, cap_vals, buckets=10)
            ks = ks_2samp(base_vals[~np.isnan(base_vals)], cap_vals[~np.isnan(cap_vals)]).statistic
            results.append({'feature': c, 'psi': psi, 'ks_stat': float(ks)})
    drift_df = pd.DataFrame(results)
    out = Path(ARTIFACT_DIR) / 'temporal_drift_summary.csv'
    if not drift_df.empty:
        drift_df.to_csv(out, index=False)
        print('✅ Temporal drift summary saved to:', out)
    else:
        print('ℹ️ Temporal drift summary not computed (insufficient capture or no common columns).')
else:
    print('No timestamp column detected in baseline; temporal drift check skipped.')
