In [None]:
"""
Run synthetic traffic to a canary endpoint and record latencies + status.
python online_eval.py --host http://canary.chatbot --duration 10m --rps 20
"""
import asyncio, aiohttp, json, time, random, argparse, pathlib, csv
from datetime import datetime, timedelta

def load_mix():
    mix = json.load(open("canary_user_mix.json"))["mix"]
    pool = {}
    for sl in mix:
        pool[sl["slice"]] = json.load(open(f"data/queries_{sl['slice']}.json"))
    return mix, pool

async def worker(session, host, mix, pool, rps, end):
    latencies, errors = [], 0
    while datetime.utcnow() < end:
        slice_ = random.choices([m["slice"] for m in mix],
                                weights=[m["weight"] for m in mix])[0]
        q = random.choice(pool[slice_])
        t0 = time.perf_counter()
        try:
            async with session.post(f"{host}/api/v1/answer",
                                    json={"question": q}, timeout=10) as resp:
                await resp.text()
                if resp.status >= 500:
                    errors += 1
        except Exception:
            errors += 1
        latencies.append((slice_, (time.perf_counter() - t0) * 1000))
        await asyncio.sleep(1 / rps)
    return latencies, errors

async def main(args):
    mix, pool = load_mix()
    end = datetime.utcnow() + timedelta(seconds=args.duration)
    latencies_all, err = [], 0
    async with aiohttp.ClientSession() as sess:
        tasks = [worker(sess, args.host, mix, pool, args.rps, end)
                 for _ in range(args.concurrency)]
        for coro in asyncio.as_completed(tasks):
            lat, e = await coro
            latencies_all.extend(lat)
            err += e

    # write raw CSV
    pathlib.Path("results").mkdir(exist_ok=True)
    with open("results/online_latencies.csv", "w", newline="") as f:
        w = csv.writer(f)
        w.writerow(["slice", "latency_ms"])
        w.writerows(latencies_all)

    # summary
    p95 = percentile([l for _, l in latencies_all], 95)
    p50 = percentile([l for _, l in latencies_all], 50)
    r_tot = len(latencies_all)
    print(f"Total requests={r_tot}, errors={err} ({err/r_tot:.2%})")
    print(f"p50={p50:.0f} ms   p95={p95:.0f} ms")

def percentile(a, p):
    a = sorted(a)
    k = int(len(a) * p / 100)
    return a[min(k, len(a)-1)]

if __name__ == "__main__":
    ap = argparse.ArgumentParser()
    ap.add_argument("--host", required=True, help="http://canary.chatbot")
    ap.add_argument("--duration", type=int, default=600, help="seconds")
    ap.add_argument("--rps", type=int, default=20, help="requests per second / worker")
    ap.add_argument("--concurrency", type=int, default=5, help="# of async workers")
    args = ap.parse_args()
    asyncio.run(main(args))


In [None]:
pip install aiohttp
python online_eval.py --host http://canary.chatbot --duration 900 \
                      --rps 15 --concurrency 10

In [None]:
### Grafana Dashboard code

{
  "title": "Chatbot Canary – Online Evaluation",
  "time": { "from": "now-15m", "to": "now" },
  "refresh": "10s",
  "schemaVersion": 38,
  "version": 1,
  "panels": [
    {
      "title": "Live RPS",
      "type": "timeseries",
      "gridPos": { "x": 0, "y": 0, "w": 8, "h": 6 },
      "targets": [
        { "expr": "rate(http_requests_total{handler=\"/api/v1/answer\"}[30s])", "refId": "A" }
      ],
      "datasource": "Prometheus"
    },
    {
      "title": "Latency (p50 & p95 ms)",
      "type": "timeseries",
      "gridPos": { "x": 8, "y": 0, "w": 8, "h": 6 },
      "targets": [
        {
          "expr": "histogram_quantile(0.5, rate(chatbot_latency_seconds_bucket[30s]))*1000",
          "legendFormat": "p50", "refId": "A"
        },
        {
          "expr": "histogram_quantile(0.95, rate(chatbot_latency_seconds_bucket[30s]))*1000",
          "legendFormat": "p95", "refId": "B"
        }
      ],
      "datasource": "Prometheus"
    },
    {
      "title": "Error Rate",
      "type": "stat",
      "gridPos": { "x": 16, "y": 0, "w": 8, "h": 6 },
      "targets": [
        {
          "expr": "rate(chatbot_errors_total[1m]) / rate(http_requests_total{handler=\"/api/v1/answer\"}[1m])",
          "refId": "A"
        }
      ],
      "datasource": "Prometheus",
      "fieldConfig": {
        "defaults": {
          "unit": "percent",
          "thresholds": {
            "mode": "absolute",
            "steps": [
              { "color": "green", "value": null },
              { "color": "yellow", "value": 0.01 },
              { "color": "red",    "value": 0.03 }
            ]
          }
        }
      }
    },
    {
      "title": "Requests by Slice (top 5)",
      "type": "piechart",
      "gridPos": { "x": 0, "y": 6, "w": 12, "h": 7 },
      "options": { "displayLabels": ["percent"], "legend": { "displayMode": "table" } },
      "targets": [
        {
          "expr": "sum by(slice)(rate(chatbot_slice_total[1m]))",
          "legendFormat": "{{slice}}", "refId": "A"
        }
      ],
      "datasource": "Prometheus"
    },
    {
      "title": "Median Prediction Confidence",
      "type": "timeseries",
      "gridPos": { "x": 12, "y": 6, "w": 12, "h": 7 },
      "targets": [
        {
          "expr": "histogram_quantile(0.5, rate(prediction_confidence_bucket[1m]))",
          "legendFormat": "p50 confidence", "refId": "A"
        }
      ],
      "datasource": "Prometheus"
    }
  ]
}
