# N-token representaion Precision, Recall, F1

This notebook computes TP, FP, Precision, Recall, and F1 for N-token representation on:

1. **Developer-Informed Oracle**  
2. **Defects4J**

The JSON files referenced here are generated by the replication scripts included in the package.


In [1]:
from pathlib import Path
from dataclasses import dataclass
import json
from typing import List, Dict, Tuple, Set

# ---------- Data structure ----------
@dataclass(frozen=True)
class BugFixInducingPair:
    repo_name: str
    fix_commit_hash: str
    inducing_commit_hash: str

# ---------- TP / FP counting ----------
def get_tp_fp(file_json: List[Dict]) -> Tuple[Set[BugFixInducingPair],
                                              Set[BugFixInducingPair]]:
    """
    Return unique TP and FP pairs for a parsed JSON list.
    Duplicate inducing hashes for the same fix commit are ignored.
    """
    tp: Set[BugFixInducingPair] = set()
    fp: Set[BugFixInducingPair] = set()

    for obj in file_json:
        repo = obj["repo_name"]
        fix  = obj["fix_commit_hash"]
        oracle = set(obj["bug_commit_hash"])
        preds  = obj.get("inducing_commit_hash", [])

        pred_hashes = {p["commit_hash"] for p in preds}   # de-duplicate

        for h in pred_hashes:
            pair = BugFixInducingPair(repo, fix, h)
            (tp if h in oracle else fp).add(pair)

    return tp, fp

# ---------- Metric helpers ----------
def get_scores(tp: int, fp: int, fn: int) -> Tuple[float, float, float]:
    precision = tp / (tp + fp) if (tp + fp) else 0.0
    recall    = tp / (tp + fn) if (tp + fn) else 0.0
    f1        = (2 * precision * recall) / (precision + recall) if (precision + recall) else 0.0
    return precision, recall, f1


## 1. Developer-Informed Oracle

In [2]:
BASE_DIR = Path().resolve().parent.parent

EVAL_FILES_DIO = {
    "B-SZZ":  BASE_DIR / "dataset/pyszz_v2/json-output-raw/rq1/developer-informed-oracle/dio_bic_conf_original.json",
    "1-token": BASE_DIR / "dataset/pyszz_v2/json-output-raw/rq1/developer-informed-oracle/dio_bic_conf_1token.json",
    "2-token": BASE_DIR / "dataset/pyszz_v2/json-output-raw/rq1/developer-informed-oracle/dio_bic_conf_2token.json",
    "3-token": BASE_DIR / "dataset/pyszz_v2/json-output-raw/rq1/developer-informed-oracle/dio_bic_conf_3token.json",
    "4-token": BASE_DIR / "dataset/pyszz_v2/json-output-raw/rq1/developer-informed-oracle/dio_bic_conf_4token.json",
    "5-token": BASE_DIR / "dataset/pyszz_v2/json-output-raw/rq1/developer-informed-oracle/dio_bic_conf_5token.json",
}

for label, path in EVAL_FILES_DIO.items():
    if not path.is_file():
        print(f"[WARN] missing → {label}: {path}")
        continue

    with path.open(encoding="utf-8") as f:
        data = json.load(f)

    tp_set, fp_set = get_tp_fp(data)
    tp, fp = len(tp_set), len(fp_set)
    fn = sum(len(obj["bug_commit_hash"]) for obj in data) - tp

    precision, recall, f1 = get_scores(tp, fp, fn)

    print(f"=== {label} ===")
    print(f"TP: {tp:5d}  FP: {fp:5d}")
    print(f"Precision: {precision:.3f}  Recall: {recall:.3f}  F1: {f1:.3f}\n")


=== B-SZZ ===
TP:    51  FP:   133
Precision: 0.277  Recall: 0.671  F1: 0.392

=== 1-token ===
TP:    43  FP:   131
Precision: 0.247  Recall: 0.566  F1: 0.344

=== 2-token ===
TP:    52  FP:   157
Precision: 0.249  Recall: 0.684  F1: 0.365

=== 3-token ===
TP:    53  FP:   164
Precision: 0.244  Recall: 0.697  F1: 0.362

=== 4-token ===
TP:    55  FP:   193
Precision: 0.222  Recall: 0.724  F1: 0.340

=== 5-token ===
TP:    56  FP:   197
Precision: 0.221  Recall: 0.737  F1: 0.340



## 2. Defects4J

In [3]:
EVAL_FILES_D4J = {
    "B-SZZ":  BASE_DIR / "dataset/pyszz_v2/json-output-raw/rq1/defects4j/d4j_bic_conf_original.json",
    "1-token": BASE_DIR / "dataset/pyszz_v2/json-output-raw/rq1/defects4j/d4j_bic_conf_1token.json",
    "2-token": BASE_DIR / "dataset/pyszz_v2/json-output-raw/rq1/defects4j/d4j_bic_conf_2token.json",
    "3-token": BASE_DIR / "dataset/pyszz_v2/json-output-raw/rq1/defects4j/d4j_bic_conf_3token.json",
    "4-token": BASE_DIR / "dataset/pyszz_v2/json-output-raw/rq1/defects4j/d4j_bic_conf_4token.json",
    "5-token": BASE_DIR / "dataset/pyszz_v2/json-output-raw/rq1/defects4j/d4j_bic_conf_5token.json",
}

for label, path in EVAL_FILES_D4J.items():
    if not path.is_file():
        print(f"[WARN] missing → {label}: {path}")
        continue

    with path.open(encoding="utf-8") as f:
        data = json.load(f)

    tp_set, fp_set = get_tp_fp(data)
    tp, fp = len(tp_set), len(fp_set)
    fn = sum(len(obj["bug_commit_hash"]) for obj in data) - tp

    precision, recall, f1 = get_scores(tp, fp, fn)

    print(f"=== {label} ===")
    print(f"TP: {tp:5d}  FP: {fp:5d}")
    print(f"Precision: {precision:.3f}  Recall: {recall:.3f}  F1: {f1:.3f}\n")


=== B-SZZ ===
TP:    80  FP:   146
Precision: 0.354  Recall: 0.615  F1: 0.449

=== 1-token ===
TP:    66  FP:    95
Precision: 0.410  Recall: 0.508  F1: 0.454

=== 2-token ===
TP:    83  FP:   125
Precision: 0.399  Recall: 0.638  F1: 0.491

=== 3-token ===
TP:    91  FP:   151
Precision: 0.376  Recall: 0.700  F1: 0.489

=== 4-token ===
TP:    95  FP:   173
Precision: 0.354  Recall: 0.731  F1: 0.477

=== 5-token ===
TP:    98  FP:   211
Precision: 0.317  Recall: 0.754  F1: 0.446

