# 📡 Monitoring & Explainability — **Exercise Notebook** (SageMaker)

**Goal:** Configure **data capture**, **data quality**, **bias**, **explainability (SHAP)**, and **temporal drift** checks for a deployed endpoint.

> Fill in every `# <- TODO ✏️` to customize this notebook for your endpoint and datasets.


## 🧰 Prereqs (run once per kernel if needed)
- SageMaker Studio **Code Editor** kernel (Python 3.9+)
- IAM role with permissions for: SageMaker, S3, CloudWatch Logs, Model Monitor
- Deployed endpoint from your `deployment_example.ipynb`

Uncomment if packages are missing:
```python
# %pip install sagemaker boto3 awswrangler s3fs pandas pyarrow numpy scipy matplotlib
```


## ♻️ Reproducibility & Artifact Folder


In [None]:
import os, sys, json, platform, random, hashlib
from datetime import datetime
import numpy as np
import pandas as pd

SEED = 42
random.seed(SEED); np.random.seed(SEED)

RUN_TS = "20251016T225937Z"
RUN_ID = "286bf1016f"
ARTIFACT_DIR = os.environ.get("ARTIFACT_DIR", f"artifacts/monitoring_run_{RUN_TS}_{RUN_ID}")
os.makedirs(ARTIFACT_DIR, exist_ok=True)

env_info = {
    "python": sys.version,
    "platform": platform.platform(),
    "timestamp_utc": RUN_TS,
    "seed": SEED,
}
with open(os.path.join(ARTIFACT_DIR, "env_info.json"), "w") as f:
    json.dump(env_info, f, indent=2)

ARTIFACT_DIR


## ⚙️ Configuration (edit in ONE place)


In [None]:
from pathlib import Path

CONFIG = {
    "deployment": {
        "endpoint_name": os.environ.get("ENDPOINT_NAME", "your-endpoint-name"),  # <- TODO ✏️ set existing endpoint
        "data_capture": {
            "enable": True,
            "sampling_percentage": 50,  # <- TODO ✏️ 1-100
            "s3_prefix": os.environ.get("CAPTURE_PREFIX", "s3://your-bucket/data-capture/your-endpoint/"),  # <- TODO ✏️
            "content_type": "text/csv",   # <- TODO ✏️ match your inference contract ('application/json' or 'text/csv')
            "kms_key_id": None
        }
    },
    "monitoring": {
        "enable": True,
        "instance_type": "ml.m5.large",     # <- TODO ✏️ adjust instance size for monitors
        "volume_size_gb": 20,
        "max_runtime_seconds": 3600,
        "schedule_cron": "cron(0 2 * * ? *)",  # <- TODO ✏️ daily at 02:00 UTC; change as needed
        "baseline_dataset_uri": os.environ.get("BASELINE_URI", ""),  # <- TODO ✏️ CSV with header (recommended)
        "fallback_parquet_uri": os.environ.get("FALLBACK_PARQUET", ""),  # optional parquet for baseline build
        "ground_truth_s3_uri": os.environ.get("GROUND_TRUTH_URI", ""),  # <- TODO ✏️ required for bias/model quality
    },
    "clarify": {
        "enable_bias": True,                # <- TODO ✏️ if you have ground truth
        "enable_explainability": True,
        "label": os.environ.get("LABEL_COL", ""),  # <- TODO ✏️ target/label column in ground-truth
        "facet_cols": [c for c in os.environ.get("FACET_COLS", "sex,age_bucket").split(",") if c],  # <- TODO ✏️ protected attrs
        "headers": [h for h in os.environ.get("CLARIFY_HEADERS", "").split(",") if h],  # <- TODO ✏️ request header names for CSV capture
        "predictor_config": {
            "content_type": os.environ.get("PRED_CONTENT_TYPE", "text/csv"),
            "accept_type": os.environ.get("PRED_ACCEPT", "application/json"),
            "probability_attribute": os.environ.get("PRED_PROBA_ATTR", ""),  # e.g., 'probabilities' (JSON)
            "label_headers": [os.environ.get("LABEL_HEADER", "")] if os.environ.get("LABEL_HEADER") else []
        },
        "explainability": {
            "shap_baseline_rows": int(os.environ.get("SHAP_BASELINE_ROWS", "200")),  # <- TODO ✏️
            "seed": 42
        },
        "positive_class": 1  # <- TODO ✏️ set positive class value for bias metrics
    }
}

CONFIG


## 🔌 AWS & SageMaker Session


In [None]:
import boto3, sagemaker
from sagemaker import get_execution_role

sm_sess = sagemaker.Session()
region = boto3.Session().region_name
try:
    role = get_execution_role()
except Exception:
    role = os.environ.get("SAGEMAKER_ROLE_ARN", "")
print("Region:", region)
print("Role:", role or "<unset>")


## 🧲 Data Capture — Enable/Update on Endpoint


In [None]:
from sagemaker.session import ProductionVariant
from sagemaker.model_monitor import DataCaptureConfig
from sagemaker.predictor import Predictor

ep = CONFIG["deployment"]["endpoint_name"]
cap_cfg = CONFIG["deployment"]["data_capture"]
print("Endpoint:", ep)

if cap_cfg["enable"]:
    try:
        dcc = DataCaptureConfig(
            enable_capture=True,
            sampling_percentage=cap_cfg["sampling_percentage"],
            destination_s3_uri=cap_cfg["s3_prefix"],
            kms_key_id=cap_cfg["kms_key_id"],
            capture_options=["REQUEST", "RESPONSE"],
            csv_content_types=["text/csv"] if cap_cfg["content_type"] == "text/csv" else None,
            json_content_types=["application/json"] if cap_cfg["content_type"] == "application/json" else None,
        )
        sm_client = sm_sess.sagemaker_client
        # Update endpoint capture config
        sm_client.update_endpoint_data_capture(
            EndpointName=ep,
            CaptureOptions=[{"CaptureMode": "Input"}, {"CaptureMode": "Output"}],
            DestinationS3Uri=cap_cfg["s3_prefix"],
            EnableCapture=True,
            InitialSamplingPercentage=cap_cfg["sampling_percentage"],
            KmsKeyId=cap_cfg["kms_key_id"] or ""
        )
        print("✅ Data capture updated for:", ep)
        print("   Destination:", cap_cfg["s3_prefix"])
    except Exception as e:
        print("⚠️ Could not update data capture:", e)
else:
    print("Data capture disabled in CONFIG.")


## 🔎 Data Quality Monitor (DefaultModelMonitor)


In [None]:
from sagemaker.model_monitor import DefaultModelMonitor, DatasetFormat

if CONFIG["monitoring"]["enable"] and cap_cfg["enable"]:
    try:
        mon = DefaultModelMonitor(
            role=role,
            instance_count=1,
            instance_type=CONFIG["monitoring"]["instance_type"],
            volume_size_in_gb=CONFIG["monitoring"]["volume_size_gb"],
            max_runtime_in_seconds=CONFIG["monitoring"]["max_runtime_seconds"],
            sagemaker_session=sm_sess
        )
        baseline = CONFIG["monitoring"]["baseline_dataset_uri"]
        if baseline:
            mon.suggest_baseline(
                baseline_dataset=baseline,
                dataset_format=DatasetFormat.csv(header=True),
                output_s3_uri=f"s3://{sm_sess.default_bucket()}/monitoring/baseline/{RUN_TS}",
                wait=False
            )
            print("ℹ️ Baseline suggestion started:", baseline)
        schedule_name = f"data-quality-{ep}"
        mon.create_monitoring_schedule(
            monitor_schedule_name=schedule_name,
            endpoint_input=ep,
            statistics=None,    # Use suggested statistics path if you persisted one
            constraints=None,   # Use suggested constraints path if you persisted one
            schedule_cron_expression=CONFIG["monitoring"]["schedule_cron"]
        )
        print("✅ Data Quality monitoring schedule created:", schedule_name)
    except Exception as e:
        print("⚠️ Data Quality monitor setup skipped/failed:", e)
else:
    print("Monitoring disabled or data capture off.")


## ⚖️ Bias Monitor (Clarify)


In [None]:
from sagemaker.model_monitor import ModelBiasMonitor
from sagemaker.clarify import BiasConfig, ModelPredictedLabelConfig, ModelConfig

clar = CONFIG["clarify"]
gt_uri = CONFIG["monitoring"]["ground_truth_s3_uri"]
bias_sched_name = f"bias-{ep}"

if CONFIG["monitoring"]["enable"] and clar.get("enable_bias") and gt_uri and clar.get("label"):
    try:
        bias_mon = ModelBiasMonitor(
            role=role,
            instance_count=1,
            instance_type=CONFIG["monitoring"]["instance_type"],
            volume_size_in_gb=CONFIG["monitoring"]["volume_size_gb"],
            max_runtime_in_seconds=CONFIG["monitoring"]["max_runtime_seconds"],
            sagemaker_session=sm_sess
        )

        predicted_label_cfg = ModelPredictedLabelConfig(
            label=clar["label"],
            probability=clar["predictor_config"].get("probability_attribute") or None
        )
        bias_cfg = BiasConfig(
            label_values_or_threshold=[clar.get("positive_class", 1)],  # <- TODO ✏️ set your positive class if not 1
            facet_name=clar["facet_cols"][0] if clar["facet_cols"] else None,
            facet_values_or_threshold=None,
        )
        model_cfg = ModelConfig(
            model_name=None,
            instance_type=CONFIG["monitoring"]["instance_type"],
            instance_count=1,
            accept_type=clar["predictor_config"]["accept_type"],
            content_type=clar["predictor_config"]["content_type"],
            endpoint_name=ep
        )
        bias_mon.create_monitoring_schedule(
            monitor_schedule_name=bias_sched_name,
            endpoint_input=ep,
            ground_truth_input=gt_uri,
            bias_config=bias_cfg,
            model_config=model_cfg,
            model_predicted_label_config=predicted_label_cfg,
            schedule_cron_expression=CONFIG["monitoring"]["schedule_cron"]
        )
        print("✅ Bias monitoring schedule created:", bias_sched_name)
        print("   Ground truth:", gt_uri)
        print("   Facets:", clar["facet_cols"])
    except Exception as e:
        print("⚠️ Bias monitor setup skipped/failed:", e)
else:
    print("Bias monitor disabled or missing label/ground truth.  # <- TODO ✏️ set CONFIG['clarify'] and ground truth")


## 🧩 Explainability Monitor (SHAP)


In [None]:
from sagemaker.model_monitor import ModelExplainabilityMonitor
from sagemaker.clarify import SHAPConfig

expl_sched_name = f"explain-{ep}"

if CONFIG["monitoring"]["enable"] and clar.get("enable_explainability"):
    try:
        shap_cfg = SHAPConfig(
            baseline=None,
            num_samples=clar["explainability"]["shap_baseline_rows"],
            agg_method="mean_abs",
            use_logit=False,
            seed=clar["explainability"]["seed"]
        )
        model_cfg = ModelConfig(
            model_name=None,
            instance_type=CONFIG["monitoring"]["instance_type"],
            instance_count=1,
            accept_type=clar["predictor_config"]["accept_type"],
            content_type=clar["predictor_config"]["content_type"],
            endpoint_name=ep
        )
        exp_mon = ModelExplainabilityMonitor(
            role=role,
            instance_count=1,
            instance_type=CONFIG["monitoring"]["instance_type"],
            volume_size_in_gb=CONFIG["monitoring"]["volume_size_gb"],
            max_runtime_in_seconds=CONFIG["monitoring"]["max_runtime_seconds"],
            sagemaker_session=sm_sess
        )
        exp_mon.create_monitoring_schedule(
            monitor_schedule_name=expl_sched_name,
            endpoint_input=ep,
            explainability_config=shap_cfg,
            model_config=model_cfg,
            schedule_cron_expression=CONFIG["monitoring"]["schedule_cron"]
        )
        print("✅ Explainability monitoring schedule created:", expl_sched_name)
    except Exception as e:
        print("⚠️ Explainability monitor setup skipped/failed:", e)
else:
    print("Explainability monitor disabled.  # <- TODO ✏️ set CONFIG['clarify']['enable_explainability']=True")


## ⏱️ Temporal Drift Checks (PSI & KS)


In [None]:
import re, json, gzip
import numpy as np
import pandas as pd
from scipy.stats import ks_2samp

def compute_psi(expected: np.ndarray, actual: np.ndarray, buckets=10) -> float:
    expected = expected[~np.isnan(expected)]
    actual = actual[~np.isnan(actual)]
    if len(expected) < 10 or len(actual) < 10:
        return np.nan
    quantiles = np.linspace(0, 1, buckets+1)
    cuts = np.unique(np.quantile(expected, quantiles))
    expected_bins = np.digitize(expected, cuts[1:-1], right=False)
    actual_bins = np.digitize(actual, cuts[1:-1], right=False)
    e_counts = np.bincount(expected_bins, minlength=len(cuts)-1).astype(float)
    a_counts = np.bincount(actual_bins, minlength=len(cuts)-1).astype(float)
    e_prop = np.clip(e_counts / e_counts.sum(), 1e-6, None)
    a_prop = np.clip(a_counts / a_counts.sum(), 1e-6, None)
    psi = np.sum((a_prop - e_prop) * np.log(a_prop / e_prop))
    return float(psi)

def load_df(uri: str) -> pd.DataFrame:
    if not uri:
        return pd.DataFrame()
    if uri.endswith(".csv"):
        return pd.read_csv(uri)
    if uri.endswith(".parquet") or ".parquet" in uri:
        return pd.read_parquet(uri)
    return pd.DataFrame()

# 1) Load baseline
baseline_uri = CONFIG["monitoring"].get("baseline_dataset_uri") or CONFIG["monitoring"].get("fallback_parquet_uri")
df_base = load_df(baseline_uri)
ts_col = None
if not df_base.empty:
    # look for time-like columns
    for c in df_base.columns:
        if pd.api.types.is_datetime64_any_dtype(df_base[c]) or re.search(r"(time|date)", c, flags=re.I):
            ts_col = c; break

if ts_col:
    df_base[ts_col] = pd.to_datetime(df_base[ts_col], errors="coerce")
    base_recent = df_base.dropna(subset=[ts_col]).copy()
    num_cols = [c for c in base_recent.columns if c != ts_col and pd.api.types.is_numeric_dtype(base_recent[c])][:10]
    print("Timestamp column:", ts_col)
    print("Numeric features to check:", num_cols)

    # 2) Load captured sample (JSONL in S3)
    import s3fs
    cap_prefix = CONFIG["deployment"]["data_capture"]["s3_prefix"].replace("s3://","")
    bucket, prefix = (cap_prefix.split("/",1)+[""])[:2]
    fs = s3fs.S3FileSystem()
    keys = fs.glob(f"{bucket}/{prefix}/**/*.jsonl*")[-5:]  # recent few
    frames = []
    for k in keys:
        with fs.open(k, "rb") as f:
            raw = f.read()
        try:
            txt = raw.decode("utf-8")
        except Exception:
            txt = gzip.decompress(raw).decode("utf-8")
        lines = [json.loads(l) for l in txt.strip().splitlines() if l.strip()]
        for rec in lines:
            req = rec.get("request",{}).get("body","")
            if isinstance(req, str) and "," in req and "\n" not in req and len(CONFIG["clarify"]["headers"])>0:
                try:
                    row = pd.read_csv(pd.compat.StringIO(req), header=None).iloc[0].to_dict()
                    row = {CONFIG["clarify"]["headers"][i]: v for i, v in enumerate(row.values()) if i < len(CONFIG["clarify"]["headers"])}
                    frames.append(pd.DataFrame([row]))
                except Exception:
                    pass
            elif isinstance(req, (dict,list)):
                frames.append(pd.json_normalize(req))
    df_cap = pd.concat(frames, ignore_index=True) if frames else pd.DataFrame()
    if not df_cap.empty and ts_col in df_cap.columns:
        df_cap[ts_col] = pd.to_datetime(df_cap[ts_col], errors="coerce")

    # 3) Drift metrics
    results = []
    if not base_recent.empty and not df_cap.empty:
        common = [c for c in num_cols if c in df_cap.columns]
        for c in common:
            base_vals = base_recent[c].astype(float).values
            cap_vals = df_cap[c].astype(float).values
            psi = compute_psi(base_vals, cap_vals, buckets=10)
            ks = ks_2samp(base_vals[~np.isnan(base_vals)], cap_vals[~np.isnan(cap_vals)]).statistic
            results.append({"feature": c, "psi": psi, "ks_stat": float(ks)})
    drift_df = pd.DataFrame(results)
    out = Path(ARTIFACT_DIR) / "temporal_drift_summary.csv"
    if not drift_df.empty:
        drift_df.to_csv(out, index=False)
        print("✅ Temporal drift summary saved:", out)
        display(drift_df.sort_values("psi", ascending=False).head(10))
    else:
        print("ℹ️ Temporal drift summary not computed (insufficient capture or columns mismatch).")
else:
    print("No timestamp column detected in baseline; temporal drift check skipped.  # <- TODO ✏️ ensure baseline has a time column")


## 📅 Schedules & Latest Executions


In [None]:
sm = sm_sess.sagemaker_client

def list_schedules(prefix):
    res = sm.list_monitoring_schedules(MonitoringScheduleNameContains=prefix)
    return [s["MonitoringScheduleName"] for s in res.get("MonitoringScheduleSummaries", [])]

for pfx in ["data-quality-", "bias-", "explain-"]:
    names = list_schedules(pfx)
    print(pfx, names)

def last_execution(name):
    res = sm.list_monitoring_executions(MonitoringScheduleName=name, SortBy="ScheduledTime", SortOrder="Descending", MaxResults=1)
    return res.get("MonitoringExecutionSummaries", [None])[0]

for name in sum([list_schedules(pfx) for pfx in ["data-quality-", "bias-", "explain-"]], []):
    ex = last_execution(name)
    if ex:
        print(f"{name}: {ex['MonitoringExecutionStatus']} @ {ex['ScheduledTime']}")


## 🧹 (Optional) Pause / Delete Schedules


In [None]:
# ⚠️ Use with care
# from sagemaker.model_monitor import MonitoringSchedule
# for name in [*list_schedules("data-quality-"), *list_schedules("bias-"), *list_schedules("explain-")]:
#     try:
#         MonitoringSchedule(sagemaker_session=sm_sess, monitoring_schedule_name=name).delete_monitoring_schedule()
#         print("Deleted:", name)
#     except Exception as e:
#         print("Failed to delete", name, e)


## ✅ Student Checklist
- [ ] Set **endpoint name** in `CONFIG['deployment']['endpoint_name']`  # <- TODO ✏️  
- [ ] Choose **data capture** prefix and **content type**  # <- TODO ✏️  
- [ ] Provide **baseline dataset** (CSV with header)  # <- TODO ✏️  
- [ ] (Bias) Provide **ground-truth S3** and **label/positive class**  # <- TODO ✏️  
- [ ] (Bias) List **facet columns** to analyze  # <- TODO ✏️  
- [ ] (Explainability) Set **SHAP sample size**  # <- TODO ✏️  
- [ ] (Temporal drift) Ensure baseline has a **timestamp** & headers mapping  # <- TODO ✏️  
- [ ] Adjust **cron schedule** & **instance type** for monitors  # <- TODO ✏️  
- [ ] Run cells in order; inspect created schedules and outputs.
