In [1]:
import os
import json
import pandas as pd

# Retrieving the Catalog of Bugs

In [2]:
all_bugs = [
    os.path.splitext(fname)[0]
    for fname in sorted(os.listdir("./bug_data"))
    if fname.endswith(".json")
]
print(f"# total bugs = {len(all_bugs)}")
print(all_bugs)

# total bugs = 54
['Lang-1', 'Lang-10', 'Lang-12', 'Lang-13', 'Lang-14', 'Lang-15', 'Lang-16', 'Lang-17', 'Lang-18', 'Lang-19', 'Lang-20', 'Lang-22', 'Lang-24', 'Lang-26', 'Lang-27', 'Lang-28', 'Lang-3', 'Lang-30', 'Lang-31', 'Lang-32', 'Lang-33', 'Lang-34', 'Lang-35', 'Lang-36', 'Lang-37', 'Lang-38', 'Lang-39', 'Lang-4', 'Lang-40', 'Lang-41', 'Lang-42', 'Lang-43', 'Lang-44', 'Lang-45', 'Lang-46', 'Lang-47', 'Lang-48', 'Lang-49', 'Lang-5', 'Lang-50', 'Lang-53', 'Lang-54', 'Lang-55', 'Lang-57', 'Lang-59', 'Lang-6', 'Lang-60', 'Lang-61', 'Lang-63', 'Lang-64', 'Lang-65', 'Lang-7', 'Lang-8', 'Lang-9']


# Retrieving Bug Information and Test Coverage

The essential details and test coverage data for each specific `bug_id` are stored in the files located at `./bug_data/{bug_id}.json` and `./bug_data/{bug_id}-cov.pkl`, respectively.

In this part, we are going to load the data into these two variables:

- **Bug Info**: The variable `bug_info` (`dict`) comprises details regarding the tests that have failed and the lines of code where the actual bug exists.
- **Coverage**: The variable `coverage` (`pd.DataFrame`) provides information about the code coverage for each test case.
  - Index: Correspond to program lines
  - Columns: Correspond to individual test cases
  - Value: `True` (covered), `False` (not covered)

The following Python code cells demonstrate how to load the data.

In [3]:
bug_id = "Lang-1" # FIXME

bug_info_path = os.path.join(f"./bug_data/{bug_id}.json") 
coverage_path = os.path.join(f"./bug_data/{bug_id}-cov.pkl")

with open(bug_info_path, "r") as f:
    bug_info = json.load(f)
bug_info

{'bug_id': 'Lang-1',
 'failing_tests': ['org.apache.commons.lang3.math.NumberUtilsTest#TestLang747'],
 'buggy_lines': ['org.apache.commons.lang3.math$NumberUtils#createNumber(java.lang.String):467',
  'org.apache.commons.lang3.math$NumberUtils#createNumber(java.lang.String):468',
  'org.apache.commons.lang3.math$NumberUtils#createNumber(java.lang.String):471']}

In [4]:
coverage = pd.read_pickle(coverage_path)
coverage

Unnamed: 0,org.apache.commons.lang3.ValidateTest#testNotBlankNotBlankStringWithNewlinesShouldNotThrow,org.apache.commons.lang3.ValidateTest#testNotBlankMsgEmptyStringShouldThrow,org.apache.commons.lang3.ValidateTest#testNotBlankMsgBlankStringShouldThrow,org.apache.commons.lang3.ValidateTest#testNotBlankReturnValues1,org.apache.commons.lang3.ValidateTest#testNotBlankReturnValues2,org.apache.commons.lang3.ValidateTest#testNotBlankBlankStringWithNewlinesShouldThrow,org.apache.commons.lang3.ValidateTest#testNotBlankMsgNotBlankStringWithWhitespacesShouldNotThrow,org.apache.commons.lang3.ValidateTest#testNotBlankNotBlankStringShouldNotThrow,org.apache.commons.lang3.ValidateTest#testNotBlankMsgNotBlankStringShouldNotThrow,org.apache.commons.lang3.ValidateTest#testNotBlankEmptyStringShouldThrow,...,org.apache.commons.lang3.math.NumberUtilsTest#testStringCreateNumberEnsureNoPrecisionLoss,org.apache.commons.lang3.math.NumberUtilsTest#testMaxByte_nullArray,org.apache.commons.lang3.math.NumberUtilsTest#testCreateDouble,org.apache.commons.lang3.math.NumberUtilsTest#testMinLong_emptyArray,org.apache.commons.lang3.reflect.ConstructorUtilsTest#testInvokeConstructor,org.apache.commons.lang3.reflect.ConstructorUtilsTest#testInvokeExactConstructor,org.apache.commons.lang3.reflect.MethodUtilsTest#testInvokeStaticMethod,org.apache.commons.lang3.reflect.MethodUtilsTest#testInvokeExactMethod,org.apache.commons.lang3.reflect.MethodUtilsTest#testInvokeMethod,org.apache.commons.lang3.reflect.MethodUtilsTest#testInvokeExactStaticMethod
org.apache.commons.lang3$StringUtils#StringUtils():192,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
org.apache.commons.lang3$StringUtils#StringUtils():193,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
org.apache.commons.lang3$StringUtils#<clinit>():181,True,True,True,True,True,True,True,True,True,True,...,True,False,False,False,False,False,False,False,False,False
org.apache.commons.lang3$StringUtils#isEmpty(java.lang.CharSequence):217,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
org.apache.commons.lang3$StringUtils#isNotEmpty(java.lang.CharSequence):236,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
org.apache.commons.lang3.math$NumberUtils#isNumber(java.lang.String):1412,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
org.apache.commons.lang3.math$NumberUtils#isNumber(java.lang.String):1414,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
org.apache.commons.lang3.math$NumberUtils#isNumber(java.lang.String):1417,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
org.apache.commons.lang3.math$NumberUtils#isNumber(java.lang.String):1420,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False


# Tutorial: Computing Spectrum-based Fault Localization Scores

Spectrum-based Fault Localization (SBFL) is a family of techniques that leverage coverage information of test cases and their pass/fail outcomes to pinpoint potential faults in source code. 

Essentially, SBFL converts the test coverage and test results into a concept called the **Spectrum** which consists of four values: `(e_p, n_p, e_f, n_f)`. These values provide a concise summary of how each program element is covered by the tests:

- `e_p`: the number of passing tests that cover a particular program element.
- `n_p`: the number of passing tests that do not cover that program element.
- `e_f`: the number of failing tests that cover a particular program element.
- `n_f`: the number of failing tests that do not cover that program element.

SBFL formulas, such as `Ochiai` or `Tarantula` use these Spectrum values to compute a suspiciousness score for each program element (the formulas are generally designed to give a higher score to a program element with higher `e_f` and lower `n_p`).

The example below illustrates how you can use our variables (`coverage` and `bug_info`) to obtain the Spectrum values and calculate the SBFL scores.

In [5]:
import numpy as np

def get_spectrum(coverage_df, failing_tests):
    """
    Convert coverage data and failed test results to program execution spectrum

    Return: (e_p, n_p, e_f, n_f)
        - e_p: the number of passing tests that cover each elements
        - n_p: the number of passing tests that do not cover each elements
        - e_f: the number of failing tests that cover each elements
        - n_f: the number of failing tests that do not cover each elements
    """
    X = coverage_df.values.transpose()

    is_failing = np.array([test in failing_tests for test in coverage_df.columns])

    e_p = X[~is_failing].sum(axis=0)
    e_f = X[is_failing].sum(axis=0)
    n_p = np.sum(~is_failing) - e_p
    n_f = np.sum(is_failing) - e_f

    return e_p, n_p, e_f, n_f

def sbfl(e_p, n_p, e_f, n_f, formula="Ochiai"):
    if formula == "Ochiai":
        divisor = np.sqrt((e_f + n_f) * (e_f + e_p))
        return np.divide(e_f, divisor, where=divisor!=0)
    elif formula == "Tarantula":
        r_f = e_f/(e_f + n_f)
        r_p = e_p/(e_p + n_p)
        return r_f/(r_f + r_p)
    elif formula == "Jaccard":
        return e_f/(e_f + n_f + e_p)
    else:
        raise Exception(f"Unknown formula: {formula}")

e_p, n_p, e_f, n_f = get_spectrum(coverage, bug_info["failing_tests"])
sbfl_scores = sbfl(e_p, n_p, e_f, n_f, formula="Ochiai")
sbfl_scores

array([0.        , 0.        , 0.04920678, ..., 0.        , 0.        ,
       0.        ])

In [6]:
score_df = pd.DataFrame(data=sbfl_scores, index=coverage.index, columns=["Ochiai"])
score_df["is_buggy_line"] = [line in bug_info["buggy_lines"] for line in score_df.index]
score_df.sort_values(by="is_buggy_line", ascending=False)

Unnamed: 0,Ochiai,is_buggy_line
org.apache.commons.lang3.math$NumberUtils#createNumber(java.lang.String):471,0.5,True
org.apache.commons.lang3.math$NumberUtils#createNumber(java.lang.String):468,0.5,True
org.apache.commons.lang3.math$NumberUtils#createNumber(java.lang.String):467,0.5,True
org.apache.commons.lang3$StringUtils#StringUtils():192,0.0,False
"org.apache.commons.lang3$StringUtils#abbreviate(java.lang.String,int,int):6304",0.0,False
...,...,...
"org.apache.commons.lang3$StringUtils#join(java.lang.Object[],char,int,int):3459",0.0,False
"org.apache.commons.lang3$StringUtils#join(double[],char):3428",0.0,False
"org.apache.commons.lang3$StringUtils#join(double[],char):3426",0.0,False
"org.apache.commons.lang3$StringUtils#join(double[],char):3425",0.0,False
