In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import itertools
import numpy as np
import json
import re
import math
from tqdm import tqdm
from scipy.stats import wilcoxon
from lib.experiment_utils import *

# plt.rcParams.update({
#     "text.usetex": True,
#     # "font.family": "sans-serif",
#     # "font.sans-serif": ["Helvetica"]
# })

# input data
CORE_DATA_DIR     = "./data/Defects4J/core"
BIC_GT_DIR        = "./data/Defects4J/BIC_dataset"
BASELINE_DATA_DIR = "./data/Defects4J/baseline"
INDUSTRY_DATA_DIR = "./data/industry"

# output data
RESULTS_DIR = "./experiment_results/"

# load core data
fault_dirs = {}
for fault in os.listdir(CORE_DATA_DIR):
    fault_dir = os.path.join(CORE_DATA_DIR, fault)
    if not os.path.isdir(fault_dir):
        continue
    pid, vid = fault.split('-')
    fault_dirs[(pid, vid[:-1])] = fault_dir
print(f"The core data for {len(fault_dirs)} faults are loaded.")

# load BIC ground-truth data
GT = load_BIC_GT(BIC_GT_DIR)

In [None]:
voting_functions = {
    'max-1': (lambda r: r.score/r.max_rank),
    'max-0': (lambda r: 1/r.max_rank),
    'dense-1': (lambda r: r.score/r.dense_rank),
    'dense-0': (lambda r: 1/r.dense_rank),
    'score': (lambda r: r.score), # baseline
    'equal': (lambda r: 1),       # baseline
}

### Simple Demo :p

In [None]:
display(vote_for_commits(fault_dirs[("Lang", "46")], "git", "Ochiai",
    voting_func=(lambda r: r.score/r.max_rank), decay=0.1,
    use_method_level_score=False, excluded=["5814f50"], adjust_depth=True))
# bic: 868f697

# **Weighted Bisection Example (Math-87b)**

In [None]:
hp = {
    "tool": "git",
    "formula": "Ochiai",
    "voting": "max-0",
    "decay": 0.1,
    "score_level": "line",
    "stage2": True,
}

def draw_score_distributions(pid, vid):
    fault = (pid, vid)
    BIC = GT.set_index(["pid", "vid"]).loc[fault, "commit"]
    fault_dir = fault_dirs[fault]

    # get commit_ranking
    style_change_commits = get_style_change_commits(
        fault_dir, hp["tool"], with_Rewrite=True) if hp["stage2"] else []

    vote_df = vote_for_commits(fault_dir, hp["tool"], hp["formula"],
        hp["decay"], voting_functions[hp["voting"]],
        use_method_level_score=(hp["score_level"] == "method"),
        excluded=style_change_commits)
    rank_of_BIC = vote_df.vote.rank(ascending=False, method="max")[BIC]

    # C
    all_commits = get_all_commits(fault_dir)

    # C_susp
    C_susp = [
        c for c in all_commits if c in vote_df.index
    ]

    # C_BIC
    C_BIC = [
        c for c in all_commits
        if c in vote_df.index and c not in style_change_commits
    ]
    scores = [float(vote_df.loc[c, "vote"]) for c in C_BIC]
    BIC_index = C_BIC.index(BIC)
    plt.figure(figsize=(6, 2.5))
    plt.title(fault)
    plt.bar(range(len(scores)), scores, color=[
        "red" if i == BIC_index else "green" for i in range(len(scores))])
    plt.ylabel("Score")
    plt.xlabel("Commit Index (in Desending Order of Time)")
    # plt.yscale("log")

    return (
        BIC_index,
        standard_bisection(C_BIC, BIC, return_pivots=True)[1],
        weighted_bisection(C_BIC, scores, BIC, return_pivots=True)[1]
    )

BIC_index, standard_pivots, weighted_pivots = draw_score_distributions(
    "Math", "87")
standard_pivots = '$\\rightarrow$'.join(map(str, standard_pivots))
weighted_pivots = '$\\rightarrow$'.join(map(str, weighted_pivots))
plt.text(7, 2, f"Pivot (Standard Bisection): {standard_pivots}")
plt.text(7, 1.5, f"Pivot (Weighted Bisection): {weighted_pivots}")
plt.title(f"Math-87b (BIC index: {BIC_index})")
plt.savefig(os.path.join(RESULTS_DIR, "Math-87b.pdf"), bbox_inches="tight")
plt.show()

# **Research Question 1**

## RQ1-1: Search Space Analysis

In [None]:
tool = "git"
ss_rows = []
for _, row in GT.iterrows():
    fault = (row.pid, row.vid)
    if fault not in fault_dirs:
        continue

    fault_dir = fault_dirs[fault]
    C = get_all_commits(fault_dir)

    commit_df = load_commit_history(fault_dir, tool)
    C_susp = commit_df.commit_hash.unique()
    C_sc = get_style_change_commits(fault_dir, tool, with_Rewrite=True)

    ss_rows.append([row.pid, row.vid,
        len(C), len(C_susp), len(C_susp) - len(C_sc)])
ss_df = pd.DataFrame(data=ss_rows,
    columns=["pid", "vid", "C", "C_susp", "C_BIC"])

# reduction ratio
print("Reduction (C -> C_susp):",
    (ss_df["C_susp"]/ss_df["C"]).mean().round(3))
print("Reduction (C -> C_BIC):",
    (ss_df["C_BIC"]/ss_df["C"]).mean().round(3))

# draw figure
savepath = os.path.join(RESULTS_DIR, "RQ1_SS.pdf")

plt.figure(figsize=(5,1.5))
mean_size = ss_df[["C", "C_susp", "C_BIC"]].mean().values.round(1)
plt_df = ss_df.melt(["pid", "vid"], var_name="Search Space", value_name="Size")
sns.boxplot(data=plt_df, y="Search Space", x="Size", orient="h")
loc, labels = plt.yticks()
plt.yticks(loc, [
    f"{l}\n(mean:{m})" for l, m in zip(["$C$", "$C_{susp}$", "$C_{BIC}$"],
    mean_size)])
plt.xscale("log")
plt.xlabel("Size (log scale)")
plt.savefig(savepath, bbox_inches="tight")
print(f"Saved to {savepath}")

In [None]:
def join_cols(l, width):
    return " & ".join(map(lambda i: f"{i:>{width}}", l))

table = ""
size_of_C_BIC = [1, 2, 3, 5, 10, 20, 30, 50, 100, 200, 300, 500, 600, 700]
n_rows = 2
row_len = math.ceil(len(size_of_C_BIC)/n_rows)
table += "\\toprule\n"
for row_idx in range(n_rows):
    sizes = size_of_C_BIC[row_idx*row_len:(row_idx+1)*row_len]
    table += "$|C_{BIC}|$ & " + join_cols([f"$\\leq {n}$" for n in sizes], width=10) + "\\\\\\midrule\n"
    table += "\# Subjects & " + join_cols(
        [(ss_df["C_BIC"] <= n).sum() for n in sizes], width=10) + "\\\\"
    table += "\\bottomrule\n" if row_idx + 1 == n_rows else "\\midrule\n"
savepath = os.path.join(RESULTS_DIR, "RQ1_C_BIC_size.tex")
tabular = "\\begin{tabular}{" + f"l|{'r'*row_len}" + "}\n" + table + "\\end{tabular}"
with open(savepath, "w") as f:
    f.write(tabular)
print(tabular)
print(f"Saved to {savepath}")

## RQ1-2: Ranking Performance Evaluation

### Our scoring model + max aggregation (baseline)

In [None]:
RANKING_DIR = os.path.join(RESULTS_DIR, "ranking")

def hyperparams_to_path(hp):
    model_path = os.path.join(RANKING_DIR, 
        f"{hp['tool']}_{hp['score_level']}_{hp['formula']}_{hp['model']}{'_C_BIC' if hp['stage2'] else '_C_susp'}")
    if hp['model'] == 'voting':
        if not os.path.exists(model_path):
            os.mkdir(model_path)
        filename = f"{hp['voting']}-{hp['decay']}.csv"
        return os.path.join(model_path, filename)
    elif hp['model'] == 'maxAggr':
        return model_path + ".csv"

hyperparams = [
    {
        "model": ["voting"], # scoring model
        "formula": ["Ochiai"], # SBFL formula
        "score_level": ["line"], # granularity of code elements [line|method]
        "tool": ["git"], # commit history retrieval tool [git|shovel]
        "voting": voting_functions,
        "decay": [0.0, 0.1, 0.2, 0.3], # lamdas for decay
        "stage2": [True, False], # style change filtering
    },
    {
        "model": ["maxAggr"],
        "formula": ["Ochiai"],
        "score_level": ["line"],
        "tool": ["git"],
        "voting": [None],
        "decay": [None],
        "stage2": [True, False],
    },
]

hp_names = list(hyperparams[0].keys())
hp_prod = []

for hyperparams in hyperparams:
    hp_prod += list(
        itertools.product(*[hyperparams[n] for n in hp_names])
    )

print("Evaluating each hyperparam configuration..", flush=True)
errors = set()
for hp_values in tqdm(hp_prod, colour="blue"):
    hp = dict(zip(hp_names, hp_values))
    savepath = hyperparams_to_path(hp)
    if os.path.exists(savepath):
        old_rank_df = pd.read_csv(savepath)
        old_rank_df = old_rank_df.drop_duplicates()
        old_rank_df["vid"] = old_rank_df["vid"].astype(str)
        old_rank_df.set_index(["pid", "vid"], inplace=True)
    else:
        old_rank_df = None
    rank_rows = []

    for _, row in GT.iterrows():
        fault = (row.pid, row.vid)
        if fault not in fault_dirs:
            continue

        if old_rank_df is not None \
            and fault in old_rank_df.index \
            and row.commit == old_rank_df.at[fault, "BIC"]:
            continue

        fault_dir = fault_dirs[fault]
        try:
            num_commits = get_the_number_of_total_commits(fault_dir)
        except FileNotFoundError:
            continue

        if hp["stage2"]:
            style_change_commits = get_style_change_commits(
                fault_dir, hp["tool"], with_Rewrite=True)
        else:
            style_change_commits = []

        if hp["model"] == "voting":
            vote_df = vote_for_commits(fault_dir, hp["tool"], hp["formula"],
                hp["decay"], voting_functions[hp["voting"]],
                use_method_level_score=(hp["score_level"] == "method"),
                excluded=style_change_commits, adjust_depth=True)
        elif hp["model"] == "maxAggr":
            vote_df = max_aggr_for_commits(fault_dir, hp["tool"], hp["formula"],
                use_method_level_score=(hp["score_level"] == "method"),
                excluded=style_change_commits)       
        num_candidates = vote_df.shape[0] - len(style_change_commits)

        vote_df["rank"] = (-vote_df["vote"]).rank(method="max")

        try:
            rank_of_BIC = vote_df.loc[row.commit, "rank"]
        except KeyError as e:
            errors.add((row.pid, row.vid, "KeyError", str(e)))
            continue
        # print(row.pid, row.vid, flush=True)
        result = [
            row.pid, row.vid, row.commit,
            rank_of_BIC, num_candidates, num_commits
        ]
        rank_rows.append(result)

    rank_df = pd.DataFrame(data=rank_rows,
        columns=["pid", "vid", "BIC", "rank", "num_candidates",
            "num_total_commits"])
    rank_df["rank"] = rank_df["rank"].astype(int)
    # display(rank_df)
    if old_rank_df is not None:
        old_rank_df = old_rank_df[old_rank_df.index.isin(
            list(GT[["pid", "vid"]].to_records(index=False)))]
        old_rank_df = old_rank_df[~old_rank_df.index.isin(
            list(rank_df[["pid", "vid"]].to_records(index=False)))]
        rank_df = pd.concat([old_rank_df.reset_index(), rank_df])
    rank_df.sort_values(by=["pid", "vid"], inplace=True)
    rank_df.to_csv(savepath, index=False)
print(f"Saved to {os.path.join(RESULTS_DIR, 'ranking')}")

### Random (baseline) + Worst 

In [None]:
ss_cols = {
    # postfix: column
    "_C_BIC": "C_BIC",
    "_C_susp": "C_susp",
    "": "C",
}

for postfix in ss_cols:    
    space_size = ss_df[ss_cols[postfix]]
    methods = {
        'Worst': space_size,
        'Random': (space_size + 1)/2
    }
    for method in methods:
        tmp_df = ss_df[["pid", "vid"]].copy()
        tmp_df = tmp_df.join(GT.set_index(["pid", "vid"])[["commit"]],
            on=["pid", "vid"])
        tmp_df.rename(columns={"commit": "BIC"}, inplace=True)
        tmp_df["rank"] = methods[method]
        tmp_df["num_candidates"] = space_size
        tmp_df["num_total_commits"] = ss_df["C"]
        savepath = os.path.join(RANKING_DIR, method + postfix + ".csv")
        tmp_df.to_csv(savepath, index=False)
        print(f"Saved to {savepath}")

### FBL-BERT (baseline)

In [None]:
RANKING_FILE_NAME = "ranking_INDEX_FBLBERT_RN_bertoverflow_QARC_q256_d230_dim128_cosine_q256_d230_dim128_commits_token.tsv"

tool = "git"

fbl_rows = []
for dirname in os.listdir(BASELINE_DATA_DIR):
    result_path = os.path.join(BASELINE_DATA_DIR, dirname, RANKING_FILE_NAME)
    if not os.path.exists(result_path):
        continue
    m = re.match("(\w+)-(\d+)b", dirname)
    pid, vid = m.group(1), m.group(2)
    fault_dir = fault_dirs[(pid, vid)]
    commit_df = load_commit_history(fault_dir, tool)

    # load FBL-BERT ranking
    fdf = pd.read_csv(result_path, sep="\t", header=None)[[2, 5]]
    fdf.columns = ["commit", "score"]
    fdf["commit"] = fdf["commit"].apply(lambda x: x[:7])

    # search space
    C = get_all_commits(fault_dir)
    C_susp = commit_df.commit_hash.unique().tolist()
    C_sc = get_style_change_commits(fault_dir, tool, with_Rewrite=True)
    C_BIC = [c for c in C_susp if c not in C_sc]

    # load BIC ground-truth
    BIC = GT[(GT.pid == pid) & (GT.vid == vid)].commit.values[0]

    if (fdf.commit == BIC).any():
        # check if BIC is in the ranking retrieved by FBL-BERT
        fdf["rank"] = (-fdf["score"]).rank(method="max").astype(int)

        fdf_susp = fdf[fdf.commit.isin(C_susp)].copy()
        fdf_susp["rank"] = (-fdf_susp["score"]).rank(method="max").astype(int)

        fdf_BIC = fdf[fdf.commit.isin(C_BIC)].copy()
        fdf_BIC["rank"] = (-fdf_BIC["score"]).rank(method="max").astype(int)

        rank_C = fdf[fdf.commit == BIC]["rank"].values[0]
        rank_C_susp = fdf_susp[fdf_susp.commit == BIC]["rank"].values[0]
        rank_C_BIC = fdf_BIC[fdf_BIC.commit == BIC]["rank"].values[0]

        fbl_rows.append([pid, vid, BIC, rank_C, rank_C_susp, rank_C_BIC])
    else:
        # Worst case: BIC is not in the retrieved ranking
        fbl_rows.append([pid, vid, BIC, len(C), len(C_susp), len(C_BIC)])

fbl_df = pd.DataFrame(fbl_rows,
    columns=["pid", "vid", "BIC", "rank_C", "rank_C_susp", "rank_C_BIC"])
fbl_df.sort_values(by=["pid", "vid"], inplace=True)
fbl_df = fbl_df.join(ss_df.set_index(["pid", "vid"]), on=["pid", "vid"])
print(fbl_df.shape)

ss_cols = {
    # postfix: column
    "_C_BIC": "C_BIC",
    "_C_susp": "C_susp",
    "": "C",
}

for postfix in ss_cols:
    tmp_df = fbl_df[["pid", "vid", "BIC", "rank_" + ss_cols[postfix], ss_cols[postfix], "C"]].copy()
    tmp_df.rename(columns={
        "rank_" + ss_cols[postfix]: "rank",
        ss_cols[postfix]: "num_candidates",
        "C": "num_total_commits"
    }, inplace=True)
    savepath = os.path.join(RANKING_DIR, "FBL-BERT" + postfix + ".csv")
    tmp_df.to_csv(savepath, index=False)
    print(f"Saved to {savepath}")

### Bug2Commit

In [None]:
RANKING_FILE_NAME = "ranking_Bug2Commit.csv"

tool = "git"

b2c_rows = []
for dirname in os.listdir(BASELINE_DATA_DIR):
    result_path = os.path.join(BASELINE_DATA_DIR, dirname, RANKING_FILE_NAME)
    if not os.path.exists(result_path):
        continue
    m = re.match("(\w+)-(\d+)b", dirname)
    pid, vid = m.group(1), m.group(2)
    fault_dir = fault_dirs[(pid, vid)]
    commit_df = load_commit_history(fault_dir, tool)

    # load FBL-BERT ranking
    bdf = pd.read_csv(result_path, header=None)[[0, 3]]
    bdf.columns = ["commit", "score"]
    bdf["commit"] = bdf["commit"].apply(lambda x: x[:7])

    # search space
    C = get_all_commits(fault_dir)
    C_susp = commit_df.commit_hash.unique().tolist()
    C_sc = get_style_change_commits(fault_dir, tool, with_Rewrite=True)
    C_BIC = [c for c in C_susp if c not in C_sc]

    # load BIC ground-truth
    BIC = GT[(GT.pid == pid) & (GT.vid == vid)].commit.values[0]

    bdf["rank"] = (-bdf["score"]).rank(method="max").astype(int)

    bdf_susp = bdf[bdf.commit.isin(C_susp)].copy()
    bdf_susp["rank"] = (-bdf_susp["score"]).rank(method="max").astype(int)

    bdf_BIC = bdf[bdf.commit.isin(C_BIC)].copy()
    bdf_BIC["rank"] = (-bdf_BIC["score"]).rank(method="max").astype(int)

    rank_C = bdf[bdf.commit == BIC]["rank"].values[0]
    rank_C_susp = bdf_susp[bdf_susp.commit == BIC]["rank"].values[0]
    rank_C_BIC = bdf_BIC[bdf_BIC.commit == BIC]["rank"].values[0]

    b2c_rows.append([pid, vid, BIC, rank_C, rank_C_susp, rank_C_BIC])


b2c_df = pd.DataFrame(b2c_rows,
    columns=["pid", "vid", "BIC", "rank_C", "rank_C_susp", "rank_C_BIC"])
b2c_df.sort_values(by=["pid", "vid"], inplace=True)
b2c_df = b2c_df.join(ss_df.set_index(["pid", "vid"]), on=["pid", "vid"])
print(b2c_df.shape)

ss_cols = {
    # postfix: column
    "_C_BIC": "C_BIC",
    "_C_susp": "C_susp",
    "": "C",
}

for postfix in ss_cols:
    tmp_df = b2c_df[["pid", "vid", "BIC", "rank_" + ss_cols[postfix], ss_cols[postfix], "C"]].copy()
    tmp_df.rename(columns={
        "rank_" + ss_cols[postfix]: "rank",
        ss_cols[postfix]: "num_candidates",
        "C": "num_total_commits"
    }, inplace=True)
    savepath = os.path.join(RANKING_DIR, "Bug2Commit" + postfix + ".csv")
    tmp_df.to_csv(savepath, index=False)
    print(f"Saved to {savepath}")

### Comparison

In [None]:
paths = {}
for lam in [0.0, 0.1, 0.2, 0.3]:
    for tau in ["max", "dense"]:
        for alpha in [0, 1]:
            key = (f"$\\alpha={alpha}$, $\\tau={tau}$", lam, "ours")
            paths[key] = os.path.join(
                RANKING_DIR, "git_line_Ochiai_voting_C_BIC",
                f"{tau}-{alpha}-{lam}.csv"
            )
    for vot in ["score", "equal"]:
        paths[(vot, lam, "baseline")] = os.path.join(
            RANKING_DIR, "git_line_Ochiai_voting_C_BIC",
            f"{vot}-{lam}.csv"
        )

MRR_rows = []
for key in paths:
    vot, lam, category = key
    rank_df = pd.read_csv(paths[key])
    MRR = (1/rank_df["rank"]).mean()
    MRR_rows.append([vot, lam, category, MRR])
MRR_df = pd.DataFrame(data=MRR_rows,
    columns=["voting", "lambda", "category", "MRR"])

plt.figure(figsize=(5, 3.5))
hue_order = MRR_df[MRR_df["lambda"] == 0.1].sort_values(
    by="MRR", ascending=False).voting.tolist()
sns.lineplot(data=MRR_df, x="lambda", y="MRR", hue="voting", style="category",
    hue_order=hue_order)
ax = sns.scatterplot(data=MRR_df, x="lambda", y="MRR", hue="voting",
    style="category", hue_order=hue_order)
legend_len = len(hue_order) + 2 + MRR_df.category.unique().shape[0]
handles, labels = ax.get_legend_handles_labels()
plt.legend(handles[:legend_len], labels[:legend_len], loc=(1.01, 0.07))
plt.xticks(MRR_df["lambda"].unique().tolist())
plt.xlabel("$\\lambda$")

savepath = os.path.join(RESULTS_DIR, "RQ1_MRR.pdf")
plt.savefig(savepath, bbox_inches="tight")
print(f"Saved to {savepath}")

In [None]:
paths = {
    "Ours": os.path.join(RANKING_DIR, "git_line_Ochiai_voting_C_BIC", "max-0-0.1.csv"),
    "Ours w/o Stage 2": os.path.join(RANKING_DIR, "git_line_Ochiai_voting_C_susp", "max-0-0.1.csv"),
    "Equal Voting": os.path.join(RANKING_DIR, "git_line_Ochiai_voting_C_BIC", "equal-0.1.csv"),
    "Max Aggr.": os.path.join(RANKING_DIR, "git_line_Ochiai_maxAggr_C_BIC.csv"),
    "FBL-BERT": os.path.join(RANKING_DIR, "FBL-BERT_C_BIC.csv"),
    "Bug2Commit": os.path.join(RANKING_DIR, "Bug2Commit_C_BIC.csv"),
    "Random": os.path.join(RANKING_DIR, "Random_C_BIC.csv"),
    "Worst": os.path.join(RANKING_DIR, "Worst_C_BIC.csv"),
    "FBL-BERT (entire commit)": os.path.join(RANKING_DIR, "FBL-BERT.csv"),
    "Bug2Commit (entire commit)": os.path.join(RANKING_DIR, "Bug2Commit.csv"),
    "Random (entire commit)": os.path.join(RANKING_DIR, "Random.csv"),
    "Worst (entire commit)": os.path.join(RANKING_DIR, "Worst.csv"),
}

N = [1, 2, 3, 5, 10] # for acc@n
eval_rows = []
for method in paths:
    rank_df = pd.read_csv(paths[method])

    num_subjects = rank_df.shape[0]

    eval_rows.append(
        [method]
        + [(1/rank_df["rank"]).mean().round(3)]
        + [int((rank_df["rank"] <= n).sum()) for n in N]
    )

eval_df = pd.DataFrame(data=eval_rows,
    columns=["method", "MRR"] + [f"acc@{n}" for n in N])
eval_df.sort_values(by="MRR", ascending=False, inplace=True)
savepath = os.path.join(RESULTS_DIR, "RQ1_ranking.tex") 
eval_df.to_latex(savepath, index=False)
print(f"Saved to {savepath}")

eval_df

# **Research Question 2**

In [None]:
from scipy.stats import entropy
hp = {
    "tool": "git",
    "formula": "Ochiai",
    "voting": "max-0",
    "decay": 0.1,
    "score_level": "line",
    "stage2": True,
}

savepath = os.path.join(RESULTS_DIR, "RQ2_bisection_simulation.csv")

if os.path.exists(savepath):
    print(f"{savepath} exists")
    simul_df = pd.read_csv(os.path.join(RESULTS_DIR, "RQ2_bisection_simulation.csv"))
    simul_df["vid"] =simul_df["vid"].astype(str)
else:
    simul_rows = []

    for _, row in tqdm(GT.iterrows(), total=GT.shape[0]):
        fault = (row.pid, row.vid)

        if fault not in fault_dirs:
            continue

        BIC = row.commit
        fault_dir = fault_dirs[fault]

        # get commit_ranking
        style_change_commits = get_style_change_commits(
            fault_dir, hp["tool"], with_Rewrite=True) if hp["stage2"] else []

        vote_df = vote_for_commits(fault_dir, hp["tool"], hp["formula"],
            hp["decay"], voting_functions[hp["voting"]],
            use_method_level_score=(hp["score_level"] == "method"),
            excluded=style_change_commits)
        rank_of_BIC = vote_df.vote.rank(ascending=False, method="max")[BIC]

        # C
        all_commits = get_all_commits(fault_dir)
        
        # C_susp
        C_susp = [
            c for c in all_commits if c in vote_df.index
        ]
        scores = [float(vote_df.loc[c, "vote"]) for c in C_susp]

        # C_BIC
        C_BIC = [
            c for c in all_commits
            if c in vote_df.index and c not in style_change_commits
        ]
        simul_rows.append([
            row.pid,
            row.vid,
            rank_of_BIC,
            standard_bisection(all_commits, BIC),
            standard_bisection(C_susp, BIC),
            standard_bisection(C_BIC, BIC),
            weighted_bisection(C_susp, scores, BIC)
        ])
        # print(simul_rows[-1])

    simul_df = pd.DataFrame(data=simul_rows, columns=["pid", "vid",
        "rank_of_BIC",
        "standard_bisection_on_C",
        "standard_bisection_on_C_susp",
        "standard_bisection_on_C_BIC",
        "weighted_bisection"]
    )
    simul_df.to_csv(savepath, index=False)
    print(f"Saved to {savepath}")

In [None]:
simul_jdf = simul_df.sort_values(by="rank_of_BIC").join(
    ss_df.set_index(["pid", "vid"]), on = ["pid", "vid"])
simul_jdf["saving"] = simul_jdf["standard_bisection_on_C_BIC"] - simul_jdf["weighted_bisection"]
simul_jdf["rankPercentage"] = (simul_jdf.rank_of_BIC) / simul_jdf["C_BIC"]

print(simul_jdf[["rankPercentage", "saving"]].corr())

In [None]:
w_col = "weighted_bisection"
for b_col in ["standard_bisection_on_C", "standard_bisection_on_C_BIC"]:
    print("lose,draw,win")
    print((
        (simul_df[b_col] <  simul_df[w_col]).sum(),\
        (simul_df[b_col] == simul_df[w_col]).sum(),\
        (simul_df[b_col] >  simul_df[w_col]).sum()
    ))

    cost_saving = simul_df[b_col] - simul_df[w_col]
    print("Avg. Reduction", 1 - (simul_df[w_col]/simul_df[b_col]).mean().round(2))
    reduced = (cost_saving > 0).mean()
    same = (cost_saving == 0).mean()
    increased = (cost_saving < 0).mean()

    # print(cost_saving.sort_values())

    plt.figure(figsize=(9, 2))
    plt.title("# saved search iterations by changing the search algorithm to the weighted bisection")

    cost_saving = list(reversed(sorted(cost_saving.tolist())))

    w, p = wilcoxon(cost_saving)
    #To confirm that the median of the differences can be assumed to be positive, we use:
    w, p = wilcoxon(cost_saving, alternative='greater')
    print("Wilcoxon signed rank test", w, p)
    N = len(cost_saving)

    plt.bar(range(0, N), cost_saving,
        color=["red" if d < 0 else "green" for d in cost_saving])
    plt.axhline(0, color="black")

    plt.yticks(range(min(cost_saving), max(cost_saving)+1))

    plt.axvspan(-0.5, N * reduced - 0.5, facecolor='green', alpha=0.1)
    plt.axvspan(N * (reduced + same)-0.5, N-0.5, facecolor='red', alpha=0.1)

    if reduced > 0.05:
        plt.text(N * reduced/2 - 0.5, max(cost_saving)-1, f"{reduced*100:.1f}%", horizontalalignment="center")
    if same > 0.05:
        plt.text(N * (reduced + same/2) - 0.5, max(cost_saving)-1, f"{same*100:.1f}%", horizontalalignment="center")
    if increased > 0.05:
        plt.text(N * (reduced + same + increased/2) - 0.5, max(cost_saving)-1, f"{increased*100:.1f}%", horizontalalignment="center")

    plt.xlim((0-0.5, N-0.5))
    ax = plt.gca()
    ax.get_xaxis().set_visible(False)

    if b_col == "standard_bisection_on_C":
        plt.axhline(np.mean(cost_saving), color="black", linestyle="--", label=f"Average Saved Iterations: {np.mean(cost_saving).round(1)}")
        print("Average # Saved Iterations", np.mean(cost_saving))
        plt.legend(loc="upper right")

    savepath = os.path.join(RESULTS_DIR,
        f"RQ2_cost_saving_by_{w_col}_compared_to_{b_col}.pdf")
    plt.savefig(savepath, bbox_inches="tight")
    print(f"Saved to {savepath}")
    plt.show()


# **Research Question 3**

In [None]:
from scipy.stats import entropy
hp = {
    "tool": "git",
    "formula": "Ochiai",
    "voting": "max-0",
    "decay": 0.1,
    "score_level": "line",
    "stage2": True,
}

savepath = os.path.join(RESULTS_DIR, "RQ3_FL_and_Fonte.csv")
if os.path.exists(savepath):
    print(f"{savepath} exists")
    fl_df = pd.read_csv(savepath)
    fl_df["vid"] = fl_df["vid"].astype(str)
else:
    with open("data/Defects4J/buggy_methods.json", "r") as f:
        buggy_method_infos = json.load(f)

    fl_rows = []
    for _, row in tqdm(GT.iterrows(), total=GT.shape[0]):
        fault = (row.pid, row.vid)

        if fault not in fault_dirs:
            continue

        BIC = row.commit
        fault_dir = fault_dirs[fault]

        # get commit_ranking
        style_change_commits = get_style_change_commits(
            fault_dir, hp["tool"], with_Rewrite=True) if hp["stage2"] else []

        for in_class_only in [True, False]:
            vote_df = vote_for_commits(fault_dir, hp["tool"], hp["formula"],
                hp["decay"], voting_functions[hp["voting"]],
                use_method_level_score=(hp["score_level"] == "method"),
                excluded=style_change_commits, in_class_only=in_class_only)
            rank_of_BIC = vote_df.vote.rank(ascending=False, method="max")[BIC]
            path_to_coverage = os.path.join(fault_dir, "coverage.pkl")
            FL_results, cov_df = get_sbfl_scores_from_coverage(path_to_coverage,
                formula=hp["formula"], use_cache=False, in_class_only=in_class_only,
                return_coverage_matrix=True)
            num_used_tests = cov_df.shape[0]
            mFL = FL_results["score"].groupby(
                ["class_file", "method_name", "method_signature"]).max().to_frame()
            mFL["rank"] = mFL["score"].rank(
                ascending=False, method="max")
            mFL["rank_percentage"] = (mFL["rank"]-1)/mFL.shape[0]
            mFL = mFL.reset_index()
            mFL["arg_types"] = mFL["method_signature"].apply(
                lambda s: s.split(')')[0][1:]
            )
            buggy_methods = [
                (bm["class_file"], bm["method_name"], bm["arg_types"])
                for bm in buggy_method_infos[f"{row.pid}-{row.vid}b"]
            ]
            if len(buggy_methods) == 0:
                print(f"{fault}: No buggy method info")
                continue
            mFL = mFL.set_index(["class_file", "method_name", "arg_types"])[
                ["rank", "rank_percentage"]]

            for bm in buggy_methods:
                if bm in mFL.index:
                    fl_rows.append([row.pid, row.vid, in_class_only, num_used_tests, bm, mFL.loc[bm, "rank"].min(), mFL.loc[bm, "rank_percentage"].min(), rank_of_BIC])
                else:
                    print(f"{fault}: {bm} is not in the coverage matrix")
                    display(mFL.loc[bm[0]])

    fl_df = pd.DataFrame(data=fl_rows,
        columns=["pid", "vid", "in_class_only", "num_used_tests", "buggy_method", "buggy_method_rank", "buggy_method_rank_percentage", "BIC_rank"])
    fl_df.to_csv(savepath, index=False)
    print(f"Saved to {savepath}")

In [None]:
fl_mdf = fl_df.groupby(["pid", "vid", "in_class_only"]).min()

pivoted = fl_mdf.reset_index().pivot(["pid", "vid"], "in_class_only")
increased = pivoted.index[pivoted[("buggy_method_rank", False)] < pivoted[("buggy_method_rank", True)]].tolist()
print(f"FL accuracy is increased in {len(increased)} subjects")

BIC_rank = pivoted.loc[increased, ("BIC_rank", True)]
BIC_rank.name = "with_T_prime"
BIC_rank = BIC_rank.to_frame()
BIC_rank["with_T"] = pivoted.loc[increased, ("BIC_rank", False)]
BIC_rank["diff"] = BIC_rank["with_T"] - BIC_rank["with_T_prime"]


savepath = os.path.join(RESULTS_DIR, "RQ3_comparison.pdf")
plt.figure(figsize=(6, 2.5))
plt.plot([0, BIC_rank.max().max()], [0, BIC_rank.max().max()], color="grey",
    linestyle="dashed")
sns.scatterplot(data=BIC_rank[BIC_rank["diff"] < 0],
    y="with_T_prime", x="with_T", color="green")
sns.scatterplot(data=BIC_rank[BIC_rank["diff"] == 0],
    y="with_T_prime", x="with_T", color="grey")
sns.scatterplot(data=BIC_rank[BIC_rank["diff"] > 0],\
    y="with_T_prime", x="with_T", color="red")
plt.xscale("log")
plt.yscale("log")
plt.xlabel("BIC rank w/ more accurate FL")
plt.ylabel("BIC rank w/ less accruate FL")
plt.savefig(savepath, bbox_inches="tight")
plt.show()
print(f"Saved to {savepath}")

w, p = wilcoxon(BIC_rank["diff"], alternative='less')
print("Wilcoxon signed rank test", w, p)

for n in [1,2,3,5,10]:
    print(f"Acc@{n} {(BIC_rank['with_T'] <= n).sum()}/{len(increased)} -> {(BIC_rank['with_T_prime'] <= n).sum()}/{len(increased)}")

# Industrial Application

In [None]:
ind_rows = []
for filename in os.listdir(INDUSTRY_DATA_DIR):
    if not filename.endswith(".csv"):
        continue
    df = pd.read_csv(os.path.join(INDUSTRY_DATA_DIR, filename), float_precision="high")
    assert df.is_BIC.sum() == 1
    df["rank"] = df.score.rank(ascending=False, method="max").astype(int)
    df["submitted"] = pd.to_datetime(df["submitted"])
    df.sort_values(by="submitted", ascending=False, inplace=True)
    BIC = df[df.is_BIC == 1]["change_id"].values[0]
    BIC_rank = df[df.is_BIC == 1]["rank"].values[0]
    changes = df.change_id.tolist()
    scores = df.score.tolist()
    ind_rows.append([filename, BIC_rank, len(changes),
        standard_bisection(changes, BIC),
        weighted_bisection(changes, scores, BIC)])

ind_df = pd.DataFrame(ind_rows, columns=["filename", "rank", "total",
    "standard_bisection", "weighted_bisection"])
ind_df["saving"] = ind_df["standard_bisection"] - ind_df["weighted_bisection"]
print("============= Fonte ==============")
print(f"MRR: {(1/ind_df['rank']).mean():.3f}")
for n in [1,2,3,5,10]:
    print(f"acc@{n}: {(ind_df['rank'] <= n).sum()}/{ind_df.shape[0]} ({(ind_df['rank'] <= n).sum()/ind_df.shape[0]*100:.0f}%)")

print("============= Random ==============")

ind_df["random_rank"] = (ind_df.total + 1)/2
print(f"MRR: {(1/ind_df['random_rank']).mean():.3f}")
for n in [1,2,3,5,10]:
    print(f"acc@{n}: {(ind_df['random_rank'] <= n).sum()}/{ind_df.shape[0]} ({(ind_df['random_rank'] <= n).sum()/ind_df.shape[0]*100:.0f}%)")

print("============= MRR Improvement ==============")
print((1/ind_df['rank']).mean()/(1/ind_df['random_rank']).mean())

print("============= Weighted Bisection ==============")
print("Lose", (ind_df.saving < 0).sum(), (ind_df.saving < 0).mean().round(2))   # Lose
print("Draw", (ind_df.saving == 0).sum(), (ind_df.saving == 0).mean().round(2)) # Draw
print("Win", (ind_df.saving > 0).sum(), (ind_df.saving > 0).mean().round(2))   # Win

print(f"Avg. # commits in a batch: {ind_df.total.mean()}")

ind_df.standard_bisection.mean(), ind_df.weighted_bisection.mean(), ind_df.saving.mean()

"Avg. Reduction", (1 - (ind_df.weighted_bisection/ind_df.standard_bisection).mean()).round(2)