In [None]:
from pathlib import Path
from dataclasses import dataclass
import json
from typing import List, Dict, Tuple, Set

# ---------- Data structure ----------
@dataclass(frozen=True)
class BugFixInducingPair:
    repo_name: str
    fix_commit_hash: str
    inducing_commit_hash: str

# ---------- TP / FP counting ----------
def get_tp_fp(file_json: List[Dict]) -> Tuple[Set[BugFixInducingPair],
                                              Set[BugFixInducingPair]]:
    """
    Return unique TP and FP pairs for a parsed JSON list.
    Duplicate inducing hashes for the same fix commit are ignored.
    """
    tp: Set[BugFixInducingPair] = set()
    fp: Set[BugFixInducingPair] = set()

    for obj in file_json:
        repo = obj["repo_name"]
        fix  = obj["fix_commit_hash"]
        oracle = set(obj["bug_commit_hash"])
        preds  = obj.get("inducing_commit_hash", [])

        pred_hashes = {p["commit_hash"] for p in preds}   # de-duplicate

        for h in pred_hashes:
            pair = BugFixInducingPair(repo, fix, h)
            (tp if h in oracle else fp).add(pair)

    return tp, fp

# ---------- Metric helpers ----------
def get_scores(tp: int, fp: int, fn: int) -> Tuple[float, float, float]:
    precision = tp / (tp + fp) if (tp + fp) else 0.0
    recall    = tp / (tp + fn) if (tp + fn) else 0.0
    f1        = (2 * precision * recall) / (precision + recall) if (precision + recall) else 0.0
    return precision, recall, f1

# ----------------------- Comparison helper -----------------------
def compare_select_exclude(select_path: Path, exclude_path: Path) -> None:
    """
    Print TP, FP, Precision, Recall, F1 for the *selected* and *excluded*
    subsets, and show the differences computed AFTER rounding to 3 d.p.
    """
    # ---------- helper ----------
    def _load_counts(p: Path) -> Tuple[int, int, int]:
        with p.open(encoding="utf-8") as f:
            data = json.load(f)
        tp_set, fp_set = get_tp_fp(data)
        tp, fp = len(tp_set), len(fp_set)
        fn = sum(len(obj["bug_commit_hash"]) for obj in data) - tp
        return tp, fp, fn

    # ---------- counts ----------
    tp_sel, fp_sel, fn_sel = _load_counts(select_path)
    tp_exc, fp_exc, fn_exc = _load_counts(exclude_path)

    # ---------- metrics (rounded first) ----------
    prec_sel, rec_sel, f1_sel = (round(x, 3) for x in get_scores(tp_sel, fp_sel, fn_sel))
    prec_exc, rec_exc, f1_exc = (round(x, 3) for x in get_scores(tp_exc, fp_exc, fn_exc))

    # ---------- output ----------
    print(f"=== {select_path.stem.replace('_mv_select','')} ===")
    print("            SELECTED      EXCLUDED      Δ(B−A)")
    print(f"TP        {tp_sel:7d}   {tp_exc:7d}   {tp_exc - tp_sel:+7d}")
    print(f"FP        {fp_sel:7d}   {fp_exc:7d}   {fp_exc - fp_sel:+7d}")
    print(f"Precision {prec_sel:7.3f}   {prec_exc:7.3f}   {prec_exc - prec_sel:+7.3f}")
    print(f"Recall    {rec_sel:7.3f}   {rec_exc:7.3f}   {rec_exc - rec_sel:+7.3f}")
    print(f"F1        {f1_sel:7.3f}   {f1_exc:7.3f}   {f1_exc - f1_sel:+7.3f}\n")


## 1. Developer-Informed Oracle

In [6]:
# ----------------------- File pairs -----------------------
BASE_DIR = Path().resolve().parent.parent

PAIRS_DIO = [
    (
        BASE_DIR / "dataset/pyszz_v2/json-output-raw/discussion/developer-informed-oracle/dio_bic_conf_4token_mv_select.json",
        BASE_DIR / "dataset/pyszz_v2/json-output-raw/discussion/developer-informed-oracle/dio_bic_conf_4token_mv_exclude.json",
    ),
    (
        BASE_DIR / "dataset/pyszz_v2/json-output-raw/discussion/developer-informed-oracle/dio_bic_conf_5token_mv_select.json",
        BASE_DIR / "dataset/pyszz_v2/json-output-raw/discussion/developer-informed-oracle/dio_bic_conf_5token_mv_exclude.json",
    ),
]

# ----------------------- Run comparison -----------------------
for sel, exc in PAIRS_DIO:
    if not sel.is_file() or not exc.is_file():
        print(f"[WARN] Missing pair:\n  {sel}\n  {exc}\n")
        continue
    compare_select_exclude(sel, exc)


=== dio_bic_conf_4token ===
            SELECTED      EXCLUDED      Δ(B−A)
TP             44        44        +0
FP             30        30        +0
Precision   0.595     0.595    +0.000
Recall      0.579     0.579    +0.000
F1          0.587     0.587    +0.000

=== dio_bic_conf_5token ===
            SELECTED      EXCLUDED      Δ(B−A)
TP             43        43        +0
FP             32        30        -2
Precision   0.573     0.589    +0.016
Recall      0.566     0.566    +0.000
F1          0.570     0.577    +0.007



## 2. Defects4j

In [7]:
# ----------------------- File pairs -----------------------
BASE_DIR = Path().resolve().parent.parent

PAIRS_DIO = [
    (
        BASE_DIR / "dataset/pyszz_v2/json-output-raw/discussion/defects4j/d4j_bic_conf_4token_mv_select.json",
        BASE_DIR / "dataset/pyszz_v2/json-output-raw/discussion/defects4j/d4j_bic_conf_4token_mv_exclude.json",
    ),
    (
        BASE_DIR / "dataset/pyszz_v2/json-output-raw/discussion/defects4j/d4j_bic_conf_5token_mv_select.json",
        BASE_DIR / "dataset/pyszz_v2/json-output-raw/discussion/defects4j/d4j_bic_conf_5token_mv_exclude.json",
    ),
]

# ----------------------- Run comparison -----------------------
for sel, exc in PAIRS_DIO:
    if not sel.is_file() or not exc.is_file():
        print(f"[WARN] Missing pair:\n  {sel}\n  {exc}\n")
        continue
    compare_select_exclude(sel, exc)


=== d4j_bic_conf_4token ===
            SELECTED      EXCLUDED      Δ(B−A)
TP             74        68        -6
FP             61        52        -9
Precision   0.548     0.567    +0.019
Recall      0.569     0.523    -0.046
F1          0.558     0.544    -0.014

=== d4j_bic_conf_5token ===
            SELECTED      EXCLUDED      Δ(B−A)
TP             75        70        -5
FP             60        51        -9
Precision   0.556     0.579    +0.023
Recall      0.577     0.538    -0.039
F1          0.566     0.558    -0.008

