# Evaluation Example
This notebook demonstrates how to run `batch_evaluate`, aggregate with `WeightedVoter`, and visualize accuracy vs. cost.

## Batch evaluation

In [None]:
from sdb.evaluation import batch_evaluate


def run_case(cid: str) -> dict[str, str]:
    return {"id": cid, "diagnosis": "flu"}

case_ids = ["1", "2", "3"]
results = batch_evaluate(case_ids, run_case, concurrency=2)
results

## Weighted voting

In [None]:
from sdb.ensemble import DiagnosisResult, WeightedVoter

results = [
    DiagnosisResult("flu", 0.6, run_id="A"),
    DiagnosisResult("cold", 0.9, run_id="B"),
    DiagnosisResult("flu", 0.4, run_id="C"),
]

weights = {"A": 1.5, "B": 1.0, "C": 0.5}

voter = WeightedVoter()
print(voter.vote(results, weights=weights))

## Accuracy vs. cost

In [None]:
import csv
import matplotlib.pyplot as plt

costs, accs = [], []
with open("../example_results.csv") as f:
    for row in csv.DictReader(f):
        costs.append(float(row["cost"]))
        accs.append(float(row["accuracy"]))

plt.plot(costs, accs, marker="o")
plt.xlabel("Cost")
plt.ylabel("Accuracy")
plt.title("Accuracy vs. Cost")
plt.grid(True)
plt.show()