# 03 Â· Compliance & Water Quality Index (WQI)
Evaluate compliance vs. thresholds and compute a simple WQI (illustrative).

In [None]:

import pandas as pd
import numpy as np
from pathlib import Path

DATA_DIR = Path("../data")
df = pd.read_csv(DATA_DIR / "water_quality_samples.csv", parse_dates=["timestamp"])
thr = pd.read_csv(DATA_DIR / "lab_thresholds.csv")

# Build a simple compliance score: for each parameter, 1 if within [min_ok, max_ok], else 0.
comp = df[["timestamp","station_id"]].copy()

def within(series, lo, hi):
    return series.between(lo, hi).astype(int)

for _, row in thr.iterrows():
    p = row["parameter"]
    if p in df.columns:
        comp[p+"_ok"] = within(df[p], row["min_ok"], row["max_ok"])

comp["compliance_pct"] = comp.filter(like="_ok").mean(axis=1)*100

# Illustrative WQI: weighted sub-indices (arbitrary weights for demo)
weights = {
    "pH": 0.1, "turbidity_NTU": 0.1, "dissolved_oxygen_mgL": 0.15,
    "nitrate_mgL": 0.15, "phosphate_mgL": 0.1, "ammonia_mgL": 0.1,
    "e_coli_CFU_100mL": 0.2, "chlorine_mgL": 0.1
}

def subindex(param, value, lo, hi, invert=False):
    # Normalize to [0,100] with linear scaling inside [lo,hi]
    if np.isnan(value):
        return np.nan
    if value < lo:
        score = 100 if invert else 0
    elif value > hi:
        score = 0 if invert else 100
    else:
        frac = (value - lo) / (hi - lo + 1e-9)
        score = 100*(1-frac) if invert else 100*frac
    return np.clip(score, 0, 100)

# For microbiological and turbidity, "invert=True" (higher is worse)
invert_map = {"turbidity_NTU": True, "e_coli_CFU_100mL": True, "ammonia_mgL": True, "nitrate_mgL": True, "phosphate_mgL": True}

wqi_scores = []
for i, rowd in df.iterrows():
    score = 0.0; wsum = 0.0
    for _, thr_row in thr.iterrows():
        p = thr_row["parameter"]
        if p in df.columns and p in weights:
            val = rowd[p]
            inv = invert_map.get(p, False)
            si = subindex(p, val, thr_row["min_ok"], thr_row["max_ok"], invert=inv)
            if not np.isnan(si):
                score += weights[p]*si
                wsum += weights[p]
    wqi_scores.append(score/wsum if wsum>0 else np.nan)

comp["WQI"] = wqi_scores
comp[["compliance_pct","WQI"]].describe()
