# MV-SZZ Precision, Recall, F1

This notebook computes TP, FP, Precision, Recall, and F1 for MV-SZZ on:

1. **Developer-Informed Oracle**  
2. **Defects4J**

The JSON files referenced here are generated by the replication scripts included in the package.

In [1]:
from pathlib import Path
from dataclasses import dataclass
import json
from typing import List, Dict, Tuple, Set

# ---------- Data structure ----------
@dataclass(frozen=True)
class BugFixInducingPair:
    repo_name: str
    fix_commit_hash: str
    inducing_commit_hash: str

# ---------- TP / FP counting ----------
def get_tp_fp(file_json: List[Dict]) -> Tuple[Set[BugFixInducingPair],
                                              Set[BugFixInducingPair]]:
    """
    Return unique TP and FP pairs for a parsed JSON list.
    Duplicate inducing hashes for the same fix commit are ignored.
    """
    tp: Set[BugFixInducingPair] = set()
    fp: Set[BugFixInducingPair] = set()

    for obj in file_json:
        repo = obj["repo_name"]
        fix  = obj["fix_commit_hash"]
        oracle = set(obj["bug_commit_hash"])
        preds  = obj.get("inducing_commit_hash", [])

        pred_hashes = {p["commit_hash"] for p in preds}   # de-duplicate

        for h in pred_hashes:
            pair = BugFixInducingPair(repo, fix, h)
            (tp if h in oracle else fp).add(pair)

    return tp, fp

# ---------- Metric helpers ----------
def get_scores(tp: int, fp: int, fn: int) -> Tuple[float, float, float]:
    precision = tp / (tp + fp) if (tp + fp) else 0.0
    recall    = tp / (tp + fn) if (tp + fn) else 0.0
    f1        = (2 * precision * recall) / (precision + recall) if (precision + recall) else 0.0
    return precision, recall, f1


## 1. Developer-Informed Oracle

In [2]:
BASE_DIR = Path().resolve().parent.parent

EVAL_FILES_DIO = {
    "B-SZZ":  BASE_DIR / "dataset/pyszz_v2/json-output-raw/rq2/developer-informed-oracle/dio_bic_conf_original.json",
    "B-SZZ with Majority Voting":  BASE_DIR / "dataset/pyszz_v2/json-output-raw/rq2/developer-informed-oracle/dio_bic_conf_original_mv.json",
    "MV-SZZ(1 token)": BASE_DIR / "dataset/pyszz_v2/json-output-raw/rq2/developer-informed-oracle/dio_bic_conf_1token_mv.json",
    "MV-SZZ(2 token)": BASE_DIR / "dataset/pyszz_v2/json-output-raw/rq2/developer-informed-oracle/dio_bic_conf_2token_mv.json",
    "MV-SZZ(3 token)": BASE_DIR / "dataset/pyszz_v2/json-output-raw/rq2/developer-informed-oracle/dio_bic_conf_3token_mv.json",
    "MV-SZZ(4 token)": BASE_DIR / "dataset/pyszz_v2/json-output-raw/rq2/developer-informed-oracle/dio_bic_conf_4token_mv.json",
    "MV-SZZ(5 token)": BASE_DIR / "dataset/pyszz_v2/json-output-raw/rq2/developer-informed-oracle/dio_bic_conf_5token_mv.json",
}

for label, path in EVAL_FILES_DIO.items():
    if not path.is_file():
        print(f"[WARN] missing → {label}: {path}")
        continue

    with path.open(encoding="utf-8") as f:
        data = json.load(f)

    tp_set, fp_set = get_tp_fp(data)
    tp, fp = len(tp_set), len(fp_set)
    fn = sum(len(obj["bug_commit_hash"]) for obj in data) - tp

    precision, recall, f1 = get_scores(tp, fp, fn)

    print(f"=== {label} ===")
    print(f"TP: {tp:5d}  FP: {fp:5d}")
    print(f"Precision: {precision:.3f}  Recall: {recall:.3f}  F1: {f1:.3f}\n")


=== B-SZZ ===
TP:    51  FP:   133
Precision: 0.277  Recall: 0.671  F1: 0.392

=== B-SZZ with Majority Voting ===
TP:    42  FP:    35
Precision: 0.545  Recall: 0.553  F1: 0.549

=== MV-SZZ(1 token) ===
TP:    30  FP:    30
Precision: 0.500  Recall: 0.395  F1: 0.441

=== MV-SZZ(2 token) ===
TP:    41  FP:    31
Precision: 0.569  Recall: 0.539  F1: 0.554

=== MV-SZZ(3 token) ===
TP:    43  FP:    30
Precision: 0.589  Recall: 0.566  F1: 0.577

=== MV-SZZ(4 token) ===
TP:    44  FP:    30
Precision: 0.595  Recall: 0.579  F1: 0.587

=== MV-SZZ(5 token) ===
TP:    43  FP:    32
Precision: 0.573  Recall: 0.566  F1: 0.570



## 2. Defects4J

In [3]:
BASE_DIR = Path().resolve().parent.parent

EVAL_FILES_DIO = {
    "B-SZZ":  BASE_DIR / "dataset/pyszz_v2/json-output-raw/rq2/defects4j/d4j_bic_conf_original.json",
    "B-SZZ with Majority Voting":  BASE_DIR / "dataset/pyszz_v2/json-output-raw/rq2/defects4j/d4j_bic_conf_original_mv.json",
    "MV-SZZ(1 token)": BASE_DIR / "dataset/pyszz_v2/json-output-raw/rq2/defects4j/d4j_bic_conf_1token_mv.json",
    "MV-SZZ(2 token)": BASE_DIR / "dataset/pyszz_v2/json-output-raw/rq2/defects4j/d4j_bic_conf_2token_mv.json",
    "MV-SZZ(3 token)": BASE_DIR / "dataset/pyszz_v2/json-output-raw/rq2/defects4j/d4j_bic_conf_3token_mv.json",
    "MV-SZZ(4 token)": BASE_DIR / "dataset/pyszz_v2/json-output-raw/rq2/defects4j/d4j_bic_conf_4token_mv.json",
    "MV-SZZ(5 token)": BASE_DIR / "dataset/pyszz_v2/json-output-raw/rq2/defects4j/d4j_bic_conf_5token_mv.json",
}

for label, path in EVAL_FILES_DIO.items():
    if not path.is_file():
        print(f"[WARN] missing → {label}: {path}")
        continue

    with path.open(encoding="utf-8") as f:
        data = json.load(f)

    tp_set, fp_set = get_tp_fp(data)
    tp, fp = len(tp_set), len(fp_set)
    fn = sum(len(obj["bug_commit_hash"]) for obj in data) - tp

    precision, recall, f1 = get_scores(tp, fp, fn)

    print(f"=== {label} ===")
    print(f"TP: {tp:5d}  FP: {fp:5d}")
    print(f"Precision: {precision:.3f}  Recall: {recall:.3f}  F1: {f1:.3f}\n")


=== B-SZZ ===
TP:    80  FP:   146
Precision: 0.354  Recall: 0.615  F1: 0.449

=== B-SZZ with Majority Voting ===
TP:    62  FP:    63
Precision: 0.496  Recall: 0.477  F1: 0.486

=== MV-SZZ(1 token) ===
TP:    54  FP:    37
Precision: 0.593  Recall: 0.415  F1: 0.489

=== MV-SZZ(2 token) ===
TP:    67  FP:    51
Precision: 0.568  Recall: 0.515  F1: 0.540

=== MV-SZZ(3 token) ===
TP:    72  FP:    58
Precision: 0.554  Recall: 0.554  F1: 0.554

=== MV-SZZ(4 token) ===
TP:    74  FP:    61
Precision: 0.548  Recall: 0.569  F1: 0.558

=== MV-SZZ(5 token) ===
TP:    75  FP:    60
Precision: 0.556  Recall: 0.577  F1: 0.566

