# Eval

- Metrike: Precision, Recall, F1, Accuracy (s TN) pri pragu t

- Rangirne metrike med pozitivnimi: Acc@1, P@5, MRR



In [44]:

# Parameters — prilagodi poti
PREDS_JSON = "notebook_brezice/result/brezice_match.json"
GOLD_JSON = "notebook_brezice/gold/trbovlje_to_brezice.json"
TOP_K = 5
THRESHOLD = 0.7
SCORE_FIELDS = ["combined_score", "score"]
RUN_TAG = "brezice"  # uporabi za imena izvozov


In [46]:

# Imports & helpers
import sys
from importlib import reload
import pandas as pd
import matplotlib.pyplot as plt
from pathlib import Path

sys.path.append("notebook_brezice")

import json_eval_helpers as H
reload(H)

print("Na voljo helper funkcije:", [n for n in dir(H) if not n.startswith("_")])


Na voljo helper funkcije: ['Dict', 'List', 'Optional', 'Set', 'Tuple', 'compute_metrics_from_maps', 'compute_open_world_metrics', 'decide_label_with_threshold', 'json', 'load_gold_json', 'load_predictions_json', 'load_predictions_with_scores', 'normalize']


In [47]:

# Naloži gold in predikcije
gold_map = H.load_gold_json(GOLD_JSON)

pred_pairs_map = H.load_predictions_with_scores(
    PREDS_JSON,
    score_field_options=SCORE_FIELDS,
    top_k=TOP_K
)

len(gold_map), len(pred_pairs_map)


(8, 5)

In [48]:

# Open-world metrike (odlocitve + rangiranje med pozitivnimi)
open_world = H.compute_open_world_metrics(
    gold_map,
    pred_pairs_map,
    threshold=THRESHOLD,
    k=TOP_K,
    exclude_no_match_from_mrr=True
)
open_world


{'decision_metrics': {'threshold': 0.7,
  'TP': 2,
  'FP': 3,
  'FN': 1,
  'TN': 2,
  'precision': 0.4,
  'recall': 0.6666666666666666,
  'f1': 0.5,
  'accuracy': 0.5},
 'ranking_metrics_among_positives': {'count_positives': 5,
  'acc_at_1': 0.4,
  'p_at_5': 0.16,
  'mrr': 0.55},
 'details': [{'source': 'Naziv',
   'gold': ['lokacija_parkirisca'],
   'chosen': 'opis_lokacije',
   'outcome': 'FP',
   'candidates': ['opis_lokacije',
    'LON',
    'gps_parkirisca',
    'lokacija_parkirisca',
    'LAT'],
   'scores': [0.7149663845698038,
    0.7045562982559204,
    0.7004921997848309,
    0.6825292468070983,
    0.6303169131278992]},
  {'source': 'Lokacijska koordinata (GMAPS - WGS84)',
   'gold': ['gps_parkirisca'],
   'chosen': 'lokacija_parkirisca',
   'outcome': 'FP',
   'candidates': ['lokacija_parkirisca',
    'gps_parkirisca',
    'opis_lokacije',
    'LAT',
    'LON'],
   'scores': [0.7516873623643603,
    0.7418441695325514,
    0.7117719025611876,
    0.6774922895431519,
    0.6

In [49]:

# Closed-world primerjava (Acc@1, P@5, MRR) — če helper to podpira
pred_map_labels = {s: [lab for (lab, _sc) in pairs] for s, pairs in pred_pairs_map.items()}
closed_metrics = H.compute_metrics_from_maps(gold_map, pred_map_labels, k=TOP_K)
closed_metrics


{'count': 5,
 'acc_at_1': 0.8,
 'p_at_5': 0.8,
 'mrr': 0.8,
 'details': [{'source_column': 'Naziv',
   'gold': ['LAT',
    'LON',
    'gps_parkirisca',
    'lokacija_parkirisca',
    'opis_lokacije'],
   'predictions': {'lokacija_parkirisca'},
   'hit': True,
   'rank_hit': 1,
   'hits_in_k': 1},
  {'source_column': 'Lokacijska koordinata (GMAPS - WGS84)',
   'gold': ['LAT',
    'LON',
    'gps_parkirisca',
    'lokacija_parkirisca',
    'opis_lokacije'],
   'predictions': {'gps_parkirisca'},
   'hit': True,
   'rank_hit': 1,
   'hits_in_k': 1},
  {'source_column': 'Število parkirnih mest',
   'gold': ['LAT',
    'LON',
    'gps_parkirisca',
    'parkirni prostori',
    'parkirni prostori za invalide'],
   'predictions': {'parkirni prostori'},
   'hit': True,
   'rank_hit': 1,
   'hits_in_k': 1},
  {'source_column': 'Število mest z električno polnilnico',
   'gold': ['LAT',
    'LON',
    'opis_lokacije',
    'parkirni prostori',
    'parkirni prostori za invalide'],
   'predictions': 

In [50]:

# Detajli po stolpcih + CSV izvoz
details_df = pd.DataFrame(open_world["details"])
display(details_df.head(20))

out_dir = Path("notebook_brezice/results_eval/results_eval_json")
out_dir.mkdir(parents=True, exist_ok=True)
out_csv = out_dir / f"eval_json_details_{RUN_TAG}.csv"
details_df.to_csv(out_csv, index=False, encoding="utf-8")
print("Saved CSV:", out_csv)


Unnamed: 0,source,gold,chosen,outcome,candidates,scores
0,Naziv,[lokacija_parkirisca],opis_lokacije,FP,"[opis_lokacije, LON, gps_parkirisca, lokacija_...","[0.7149663845698038, 0.7045562982559204, 0.700..."
1,Lokacijska koordinata (GMAPS - WGS84),[gps_parkirisca],lokacija_parkirisca,FP,"[lokacija_parkirisca, gps_parkirisca, opis_lok...","[0.7516873623643603, 0.7418441695325514, 0.711..."
2,Plačljivo,[Parkrini režim],,FN,[],[]
3,Cena,[],,TN,[],[]
4,Število parkirnih mest,[parkirni prostori],parkirni prostori,TP,"[parkirni prostori, parkirni prostori za inval...","[0.8423683386582593, 0.793498350942836, 0.7738..."
5,Električna polnilnica,[],,TN,[],[]
6,Število mest z električno polnilnico,[],parkirni prostori za invalide,FP,"[parkirni prostori za invalide, parkirni prost...","[0.7477336586438692, 0.7223293441646503, 0.685..."
7,Število mest za invalide,[parkirni prostori za invalide],parkirni prostori za invalide,TP,"[parkirni prostori za invalide, opis_lokacije,...","[0.8586200865934479, 0.7293987138851269, 0.719..."


Saved CSV: notebook_brezice/results_eval/results_eval_json/eval_json_details_brezice.csv


In [51]:
# Povzetek ključnih metrik
dm = open_world["decision_metrics"]  # to mora biti slovar!

if isinstance(dm, dict):
    print(f"Precision: {dm.get('precision', 0):.3f}")
    print(f"Recall:    {dm.get('recall', 0):.3f}")
    print(f"F1 score:  {dm.get('f1', 0):.3f}")
    print(f"Accuracy:  {dm.get('accuracy', 0):.3f}")
    print()
    print(f"TP={dm.get('TP')}, FP={dm.get('FP')}, FN={dm.get('FN')}, TN={dm.get('TN')}")
else:
    print("Napaka: open_world['decision_metrics'] je tipa", type(dm))
    print(dm)


Precision: 0.400
Recall:    0.667
F1 score:  0.500
Accuracy:  0.500

TP=2, FP=3, FN=1, TN=2


In [52]:
# Robust JSON export (handles Ellipsis, set/tuple, NaN, numpy/pandas types)
import json, datetime
from pathlib import Path

def to_jsonable(obj):
    # 1) direct fixes
    if obj is Ellipsis:
        return None
    # numpy scalars -> Python
    try:
        import numpy as np
        if isinstance(obj, (np.generic,)):
            return obj.item()
    except Exception:
        pass
    # pandas timestamps -> isoformat
    try:
        import pandas as pd
        if isinstance(obj, pd.Timestamp):
            return obj.isoformat()
        # pandas NA/NaT/NaN -> None
        try:
            if pd.isna(obj):
                return None
        except Exception:
            pass
    except Exception:
        pass

    # 2) recursive structures
    if isinstance(obj, dict):
        return {k: to_jsonable(v) for k, v in obj.items()}
    if isinstance(obj, list):
        return [to_jsonable(x) for x in obj]
    if isinstance(obj, tuple):
        return [to_jsonable(x) for x in obj]
    if isinstance(obj, set):
        # sort for stability; also sanitize each element
        return sorted([to_jsonable(x) for x in obj], key=lambda x: str(x))

    # 3) everything else stays as-is (str, int, float, bool, None)
    return obj

OUT_JSON = "notebook_brezice/results_eval/eval_open_world_results.json"
Path(OUT_JSON).parent.mkdir(parents=True, exist_ok=True)

out = {
    "params": {
        "gold_json": GOLD_JSON,
        "preds_json": PREDS_JSON,
        "top_k": TOP_K,
        "threshold": THRESHOLD,
        "score_fields": SCORE_FIELDS,
        "timestamp": datetime.datetime.now().isoformat()
    },
    "open_world": open_world,
    "closed_world": closed_metrics
}

with open(OUT_JSON, "w", encoding="utf-8") as f:
    json.dump(to_jsonable(out), f, ensure_ascii=False, indent=2)

print("Saved JSON:", OUT_JSON)



Saved JSON: notebook_brezice/results_eval/eval_open_world_results.json


  if pd.isna(obj):
