# Eval

- Metrike: Precision, Recall, F1, Accuracy (s TN) pri pragu t

- Rangirne metrike med pozitivnimi: Acc@1, P@5, MRR



In [1]:

# Parameters — prilagodi poti
PREDS_JSON = "notebook_defibrilatorji/result/defibrilatorji_match.json"
GOLD_JSON = "notebook_defibrilatorji/gold/trbovlje_to_defibrilatorji.json"
TOP_K = 5
THRESHOLD = 0.7
SCORE_FIELDS = ["combined_score", "score"]
RUN_TAG = "defibrilatorji"  # uporabi za imena izvozov


In [4]:

# Imports & helpers
import sys
from importlib import reload
import pandas as pd
import matplotlib.pyplot as plt
from pathlib import Path

sys.path.append("notebook_brezice")

import json_eval_helpers as H
reload(H)

print("Na voljo helper funkcije:", [n for n in dir(H) if not n.startswith("_")])


Na voljo helper funkcije: ['Dict', 'List', 'Optional', 'Set', 'Tuple', 'compute_metrics_from_maps', 'compute_open_world_metrics', 'decide_label_with_threshold', 'json', 'load_gold_json', 'load_predictions_json', 'load_predictions_with_scores', 'normalize']


In [5]:

# Naloži gold in predikcije
gold_map = H.load_gold_json(GOLD_JSON)

pred_pairs_map = H.load_predictions_with_scores(
    PREDS_JSON,
    score_field_options=SCORE_FIELDS,
    top_k=TOP_K
)

len(gold_map), len(pred_pairs_map)


(8, 5)

In [6]:

# Open-world metrike (odlocitve + rangiranje med pozitivnimi)
open_world = H.compute_open_world_metrics(
    gold_map,
    pred_pairs_map,
    threshold=THRESHOLD,
    k=TOP_K,
    exclude_no_match_from_mrr=True
)
open_world


{'decision_metrics': {'threshold': 0.7,
  'TP': 1,
  'FP': 0,
  'FN': 0,
  'TN': 7,
  'precision': 1.0,
  'recall': 1.0,
  'f1': 1.0,
  'accuracy': 1.0},
 'ranking_metrics_among_positives': {'count_positives': 1,
  'acc_at_1': 1.0,
  'p_at_5': 0.6666666666666666,
  'mrr': 1.0},
 'details': [{'source': 'Naziv',
   'gold': ['LOKACIJA', 'NASLOV'],
   'chosen': 'NASLOV',
   'outcome': 'TP',
   'candidates': ['NASLOV', 'LOKACIJA', 'ZAP ŠT.'],
   'scores': [0.7033572001890702, 0.6343915224075317, 0.6128214359283447]},
  {'source': 'Lokacijska koordinata (GMAPS - WGS84)',
   'gold': [],
   'chosen': None,
   'outcome': 'TN',
   'candidates': [],
   'scores': []},
  {'source': 'Plačljivo',
   'gold': [],
   'chosen': None,
   'outcome': 'TN',
   'candidates': [],
   'scores': []},
  {'source': 'Cena',
   'gold': [],
   'chosen': None,
   'outcome': 'TN',
   'candidates': [],
   'scores': []},
  {'source': 'Število parkirnih mest',
   'gold': [],
   'chosen': None,
   'outcome': 'TN',
   'candi

In [7]:

# Closed-world primerjava (Acc@1, P@5, MRR) — če helper to podpira
pred_map_labels = {s: [lab for (lab, _sc) in pairs] for s, pairs in pred_pairs_map.items()}
closed_metrics = H.compute_metrics_from_maps(gold_map, pred_map_labels, k=TOP_K)
closed_metrics


{'count': 5,
 'acc_at_1': 0.2,
 'p_at_5': 0.2,
 'mrr': 0.2,
 'details': [{'source_column': 'Naziv',
   'gold': ['LOKACIJA', 'NASLOV', 'ZAP ŠT.'],
   'predictions': {'LOKACIJA', 'NASLOV'},
   'hit': True,
   'rank_hit': 1,
   'hits_in_k': 2},
  {'source_column': 'Lokacijska koordinata',
   'gold': ['LOKACIJA', 'NASLOV', 'ZAP ŠT.'],
   'predictions': [],
   'hit': False,
   'rank_hit': None,
   'hits_in_k': 0},
  {'source_column': 'Število parkirnih mest',
   'gold': ['LOKACIJA', 'NASLOV', 'ZAP ŠT.'],
   'predictions': set(),
   'hit': False,
   'rank_hit': None,
   'hits_in_k': 0},
  {'source_column': 'Število mest z električno polnilnico',
   'gold': ['LOKACIJA', 'NASLOV', 'ZAP ŠT.'],
   'predictions': set(),
   'hit': False,
   'rank_hit': None,
   'hits_in_k': 0},
  {'source_column': 'Število mest za invalide',
   'gold': ['LOKACIJA', 'NASLOV', 'ZAP ŠT.'],
   'predictions': set(),
   'hit': False,
   'rank_hit': None,
   'hits_in_k': 0}]}

In [8]:

# Detajli po stolpcih + CSV izvoz
details_df = pd.DataFrame(open_world["details"])
display(details_df.head(20))

out_dir = Path("notebook_defibrilatorji/results_eval/results_eval_json")
out_dir.mkdir(parents=True, exist_ok=True)
out_csv = out_dir / f"eval_json_details_{RUN_TAG}.csv"
details_df.to_csv(out_csv, index=False, encoding="utf-8")
print("Saved CSV:", out_csv)


Unnamed: 0,source,gold,chosen,outcome,candidates,scores
0,Naziv,"[LOKACIJA, NASLOV]",NASLOV,TP,"[NASLOV, LOKACIJA, ZAP ŠT.]","[0.7033572001890702, 0.6343915224075317, 0.612..."
1,Lokacijska koordinata (GMAPS - WGS84),[],,TN,[],[]
2,Plačljivo,[],,TN,[],[]
3,Cena,[],,TN,[],[]
4,Število parkirnih mest,[],,TN,"[ZAP ŠT., LOKACIJA, NASLOV]","[0.6368028470154466, 0.608995246887207, 0.6072..."
5,Električna polnilnica,[],,TN,[],[]
6,Število mest z električno polnilnico,[],,TN,"[ZAP ŠT., LOKACIJA, NASLOV]","[0.6374204449875409, 0.6119471669197082, 0.611..."
7,Število mest za invalide,[],,TN,"[ZAP ŠT., LOKACIJA, NASLOV]","[0.6407095974491489, 0.6146572589874267, 0.610..."


Saved CSV: notebook_defibrilatorji/results_eval/results_eval_json/eval_json_details_defibrilatorji.csv


In [9]:
# Povzetek ključnih metrik
dm = open_world["decision_metrics"]  # to mora biti slovar!

if isinstance(dm, dict):
    print(f"Precision: {dm.get('precision', 0):.3f}")
    print(f"Recall:    {dm.get('recall', 0):.3f}")
    print(f"F1 score:  {dm.get('f1', 0):.3f}")
    print(f"Accuracy:  {dm.get('accuracy', 0):.3f}")
    print()
    print(f"TP={dm.get('TP')}, FP={dm.get('FP')}, FN={dm.get('FN')}, TN={dm.get('TN')}")
else:
    print("Napaka: open_world['decision_metrics'] je tipa", type(dm))
    print(dm)


Precision: 1.000
Recall:    1.000
F1 score:  1.000
Accuracy:  1.000

TP=1, FP=0, FN=0, TN=7


In [10]:
# Robust JSON export (handles Ellipsis, set/tuple, NaN, numpy/pandas types)
import json, datetime
from pathlib import Path

def to_jsonable(obj):
    # 1) direct fixes
    if obj is Ellipsis:
        return None
    # numpy scalars -> Python
    try:
        import numpy as np
        if isinstance(obj, (np.generic,)):
            return obj.item()
    except Exception:
        pass
    # pandas timestamps -> isoformat
    try:
        import pandas as pd
        if isinstance(obj, pd.Timestamp):
            return obj.isoformat()
        # pandas NA/NaT/NaN -> None
        try:
            if pd.isna(obj):
                return None
        except Exception:
            pass
    except Exception:
        pass

    # 2) recursive structures
    if isinstance(obj, dict):
        return {k: to_jsonable(v) for k, v in obj.items()}
    if isinstance(obj, list):
        return [to_jsonable(x) for x in obj]
    if isinstance(obj, tuple):
        return [to_jsonable(x) for x in obj]
    if isinstance(obj, set):
        # sort for stability; also sanitize each element
        return sorted([to_jsonable(x) for x in obj], key=lambda x: str(x))

    # 3) everything else stays as-is (str, int, float, bool, None)
    return obj

OUT_JSON = "notebook_defibrilatorji/results_eval/eval_open_world_results.json"
Path(OUT_JSON).parent.mkdir(parents=True, exist_ok=True)

out = {
    "params": {
        "gold_json": GOLD_JSON,
        "preds_json": PREDS_JSON,
        "top_k": TOP_K,
        "threshold": THRESHOLD,
        "score_fields": SCORE_FIELDS,
        "timestamp": datetime.datetime.now().isoformat()
    },
    "open_world": open_world,
    "closed_world": closed_metrics
}

with open(OUT_JSON, "w", encoding="utf-8") as f:
    json.dump(to_jsonable(out), f, ensure_ascii=False, indent=2)

print("Saved JSON:", OUT_JSON)



Saved JSON: notebook_defibrilatorji/results_eval/eval_open_world_results.json


  if pd.isna(obj):
