# Group-classifier quick test (100 samples)
This notebook runs a quick accuracy check of the irregular-group classifier on 100 random reference IDs.

In [None]:

import json
import random
import app.utils as utils

ID_TO_GROUP_PATH = "app/model_cache/irregular_id_to_group.json"

with open(ID_TO_GROUP_PATH, "r") as f:
    id_to_group = json.load(f)

model, idx_to_class = utils.load_irregular_model()


In [None]:

random.seed(42)

ids = random.sample(list(id_to_group.keys()), 100)
correct = 0

for sid in ids:
    intensities, wns, _ = utils.get_spectrum_data(sid)
    pred_group, _ = utils.predict_irregular(wns, intensities, model=model, idx_to_class=idx_to_class)
    if pred_group == id_to_group[sid]:
        correct += 1

print(f"Correct: {correct} / {len(ids)}")
print(f"Accuracy: {correct/len(ids):.2%}")


## Accuracy on top plastic groups

In [None]:

from collections import Counter

# Find groups with the most IDs
counts = Counter(id_to_group.values())
top_groups = [g for g, _ in counts.most_common(5)]
print("Top groups:", top_groups)


In [None]:

import random

random.seed(42)

def eval_group_subset(groups, samples_per_group=20):
    total = 0
    correct = 0
    for g in groups:
        ids = [sid for sid, grp in id_to_group.items() if grp == g]
        if not ids:
            continue
        pick = random.sample(ids, min(samples_per_group, len(ids)))
        print(f"
Group: {g} (testing {len(pick)} samples)")
        for sid in pick:
            intensities, wns, _ = utils.get_spectrum_data(sid)
            pred_group, _ = utils.predict_irregular(wns, intensities, model=model, idx_to_class=idx_to_class)
            total += 1
            is_correct = pred_group == g
            if is_correct:
                correct += 1
            status = "OK" if is_correct else "MISS"
            print(f"  {status} | expected: {g} | predicted: {pred_group} | id: {sid}")
        print(f"Group summary: {correct}/{total} correct so far")
    print(f"
Overall for selected groups: {correct} / {total} ({correct/total:.2%})")

# Example: evaluate top 5 groups
example_groups = top_groups

eval_group_subset(example_groups, samples_per_group=20)
