# Eval

- Metrike: Precision, Recall, F1, Accuracy (s TN) pri pragu t

- Rangirne metrike med pozitivnimi: Acc@1, P@5, MRR



In [9]:

# Parameters — prilagodi poti
PREDS_JSON = "notebook_sevnica/result/sevnica_match.json"
GOLD_JSON = "notebook_sevnica/gold/trbovlje_to_sevnica.json"
TOP_K = 5
THRESHOLD = 0.7
SCORE_FIELDS = ["combined_score", "score"]
RUN_TAG = "sevnica"  # uporabi za imena izvozov


In [10]:

# Imports & helpers
import sys
from importlib import reload
import pandas as pd
import matplotlib.pyplot as plt
from pathlib import Path

sys.path.append("notebook_brezice")

import json_eval_helpers as H
reload(H)

print("Na voljo helper funkcije:", [n for n in dir(H) if not n.startswith("_")])


Na voljo helper funkcije: ['Dict', 'List', 'Optional', 'Set', 'Tuple', 'compute_metrics_from_maps', 'compute_open_world_metrics', 'decide_label_with_threshold', 'json', 'load_gold_json', 'load_predictions_json', 'load_predictions_with_scores', 'normalize']


In [11]:

# Naloži gold in predikcije
gold_map = H.load_gold_json(GOLD_JSON)

pred_pairs_map = H.load_predictions_with_scores(
    PREDS_JSON,
    score_field_options=SCORE_FIELDS,
    top_k=TOP_K
)

len(gold_map), len(pred_pairs_map)


(6, 7)

In [12]:

# Open-world metrike (odlocitve + rangiranje med pozitivnimi)
open_world = H.compute_open_world_metrics(
    gold_map,
    pred_pairs_map,
    threshold=THRESHOLD,
    k=TOP_K,
    exclude_no_match_from_mrr=True
)
open_world


{'decision_metrics': {'threshold': 0.7,
  'TP': 1,
  'FP': 5,
  'FN': 0,
  'TN': 0,
  'precision': 0.16666666666666666,
  'recall': 1.0,
  'f1': 0.2857142857142857,
  'accuracy': 0.16666666666666666},
 'ranking_metrics_among_positives': {'count_positives': 4,
  'acc_at_1': 0.25,
  'p_at_5': 0.15000000000000002,
  'mrr': 0.4375},
 'details': [{'source': 'Naziv',
   'gold': ['lokacija_parkirisca'],
   'chosen': 'lastnik',
   'outcome': 'FP',
   'candidates': ['lastnik',
    'opis_lokacije',
    'gps_parkirisca',
    'lokacija_parkirisca',
    'parcele'],
   'scores': [0.7429200887680053,
    0.7187737981478373,
    0.702597763663844,
    0.6970911622047424,
    0.6917900562286377]},
  {'source': 'Lokacijska koordinata (GMAPS - WGS84)',
   'gold': ['gps_parkirisca'],
   'chosen': 'lokacija_parkirisca',
   'outcome': 'FP',
   'candidates': ['lokacija_parkirisca',
    'gps_parkirisca',
    'opis_lokacije',
    'LAT',
    'LON'],
   'scores': [0.7649100865636553,
    0.7420986813657423,
    

In [13]:

# Closed-world primerjava (Acc@1, P@5, MRR) — če helper to podpira
pred_map_labels = {s: [lab for (lab, _sc) in pairs] for s, pairs in pred_pairs_map.items()}
closed_metrics = H.compute_metrics_from_maps(gold_map, pred_map_labels, k=TOP_K)
closed_metrics


{'count': 7,
 'acc_at_1': 0.42857142857142855,
 'p_at_5': 0.42857142857142855,
 'mrr': 0.42857142857142855,
 'details': [{'source_column': 'Naziv',
   'gold': ['gps_parkirisca',
    'lastnik',
    'lokacija_parkirisca',
    'opis_lokacije',
    'parcele'],
   'predictions': {'lokacija_parkirisca'},
   'hit': True,
   'rank_hit': 1,
   'hits_in_k': 1},
  {'source_column': 'Lokacijska koordinata (GMAPS - WGS84)',
   'gold': ['LAT',
    'LON',
    'gps_parkirisca',
    'lokacija_parkirisca',
    'opis_lokacije'],
   'predictions': {'gps_parkirisca'},
   'hit': True,
   'rank_hit': 1,
   'hits_in_k': 1},
  {'source_column': 'Plačljivo',
   'gold': ['LAT',
    'gps_parkirisca',
    'lastnik',
    'parcele',
    'st_parkirnih_mest'],
   'predictions': {'rezim_parkiranja'},
   'hit': False,
   'rank_hit': None,
   'hits_in_k': 0},
  {'source_column': 'Cena',
   'gold': ['LON',
    'gps_parkirisca',
    'oznacevanje_casa_modra_cona',
    'rezim_parkiranja',
    'st_parkirnih_mest'],
   'predic

In [14]:

# Detajli po stolpcih + CSV izvoz
details_df = pd.DataFrame(open_world["details"])
display(details_df.head(20))

out_dir = Path("notebook_sevnica/results_eval/results_eval_json")
out_dir.mkdir(parents=True, exist_ok=True)
out_csv = out_dir / f"eval_json_details_{RUN_TAG}.csv"
details_df.to_csv(out_csv, index=False, encoding="utf-8")
print("Saved CSV:", out_csv)


Unnamed: 0,source,gold,chosen,outcome,candidates,scores
0,Naziv,[lokacija_parkirisca],lastnik,FP,"[lastnik, opis_lokacije, gps_parkirisca, lokac...","[0.7429200887680053, 0.7187737981478373, 0.702..."
1,Lokacijska koordinata (GMAPS - WGS84),[gps_parkirisca],lokacija_parkirisca,FP,"[lokacija_parkirisca, gps_parkirisca, opis_lok...","[0.7649100865636553, 0.7420986813657423, 0.708..."
2,Plačljivo,[rezim_parkiranja],lastnik,FP,"[lastnik, parcele, gps_parkirisca, st_parkirni...","[0.7396677255630493, 0.7030855953693389, 0.675..."
3,Cena,[],rezim_parkiranja,FP,"[rezim_parkiranja, oznacevanje_casa_modra_cona...","[0.7262162053585052, 0.690419985594288, 0.6649..."
4,Število parkirnih mest,[st_parkirnih_mest],st_parkirnih_mest,TP,"[st_parkirnih_mest, gps_parkirisca, z.s., LAT,...","[0.8919463286033044, 0.7759173413117726, 0.678..."
5,Število mest za invalide,[],st_parkirnih_mest,FP,"[st_parkirnih_mest, rezim_parkiranja, z.s., LA...","[0.755936291159653, 0.7384845423698425, 0.6636..."


Saved CSV: notebook_sevnica/results_eval/results_eval_json/eval_json_details_sevnica.csv


In [15]:
# Povzetek ključnih metrik
dm = open_world["decision_metrics"]  # to mora biti slovar!

if isinstance(dm, dict):
    print(f"Precision: {dm.get('precision', 0):.3f}")
    print(f"Recall:    {dm.get('recall', 0):.3f}")
    print(f"F1 score:  {dm.get('f1', 0):.3f}")
    print(f"Accuracy:  {dm.get('accuracy', 0):.3f}")
    print()
    print(f"TP={dm.get('TP')}, FP={dm.get('FP')}, FN={dm.get('FN')}, TN={dm.get('TN')}")
else:
    print("Napaka: open_world['decision_metrics'] je tipa", type(dm))
    print(dm)


Precision: 0.167
Recall:    1.000
F1 score:  0.286
Accuracy:  0.167

TP=1, FP=5, FN=0, TN=0


In [16]:
# Robust JSON export (handles Ellipsis, set/tuple, NaN, numpy/pandas types)
import json, datetime
from pathlib import Path

def to_jsonable(obj):
    # 1) direct fixes
    if obj is Ellipsis:
        return None
    # numpy scalars -> Python
    try:
        import numpy as np
        if isinstance(obj, (np.generic,)):
            return obj.item()
    except Exception:
        pass
    # pandas timestamps -> isoformat
    try:
        import pandas as pd
        if isinstance(obj, pd.Timestamp):
            return obj.isoformat()
        # pandas NA/NaT/NaN -> None
        try:
            if pd.isna(obj):
                return None
        except Exception:
            pass
    except Exception:
        pass

    # 2) recursive structures
    if isinstance(obj, dict):
        return {k: to_jsonable(v) for k, v in obj.items()}
    if isinstance(obj, list):
        return [to_jsonable(x) for x in obj]
    if isinstance(obj, tuple):
        return [to_jsonable(x) for x in obj]
    if isinstance(obj, set):
        # sort for stability; also sanitize each element
        return sorted([to_jsonable(x) for x in obj], key=lambda x: str(x))

    # 3) everything else stays as-is (str, int, float, bool, None)
    return obj

OUT_JSON = "notebook_sevnica/results_eval/eval_open_world_results.json"
Path(OUT_JSON).parent.mkdir(parents=True, exist_ok=True)

out = {
    "params": {
        "gold_json": GOLD_JSON,
        "preds_json": PREDS_JSON,
        "top_k": TOP_K,
        "threshold": THRESHOLD,
        "score_fields": SCORE_FIELDS,
        "timestamp": datetime.datetime.now().isoformat()
    },
    "open_world": open_world,
    "closed_world": closed_metrics
}

with open(OUT_JSON, "w", encoding="utf-8") as f:
    json.dump(to_jsonable(out), f, ensure_ascii=False, indent=2)

print("Saved JSON:", OUT_JSON)



Saved JSON: notebook_sevnica/results_eval/eval_open_world_results.json


  if pd.isna(obj):
