# Setup 

In this notebook we assess the labeling error made using the existing heuristic based on browness to label flaky jobs

In [1]:
import importlib
import pandas as pd

from src.labeling import load_dataset
from src.utils import constants

importlib.invalidate_caches()
importlib.reload(load_dataset)

<module 'src.labeling.load_dataset' from '/home/henri/Documents/ETS/telus-efficient-flaky-job-failures-detection/src/labeling/load_dataset.py'>

### Labeling Error

In [2]:
def labeling_error(df:pd.DataFrame) -> float:
    """Calculate labeling error ratio."""
    return round(float((df["brown"] - df["flaky"]).abs().mean())*100, 2)


results = []
reasons = []

for project in constants.PROJECTS:
    df = pd.read_csv(f"../data/sampled/{project}.csv")
    labeled_df = load_dataset.load_dataset(f"../data/logs/raw/{project}/")
    labeled_df = labeled_df[["id", "flaky", "flaky_reason"]]
    if df.shape[0] != labeled_df.shape[0]:
        print("ERROR: dataframe lengths don't match!")
        break
    df = df.merge(labeled_df, how="inner", on="id")
    df.to_csv(f"../data/labeled/{project}.csv", index=False)
    results.append(
        {
            "project": project,
            "size": df.shape[0],
            "n_brown": int(df["brown"].value_counts()[1]),
            "n_not_brown": int(df["brown"].value_counts()[0]),
            "n_flaky": int(df["flaky"].value_counts()[1]),
            "n_not_flaky": int(df["flaky"].value_counts()[0]),
            "error_rate": labeling_error(df)
        }
    )
    flaky_reasons = df[df["flaky_reason"].isin(list(range(1, 40)))]["flaky_reason"].value_counts().rename_axis('reason_id').reset_index(name='count')
    flaky_reasons["project"] = project
    reasons.append(flaky_reasons)
    del df

In [3]:
results_df = pd.DataFrame(results)
results_df

Unnamed: 0,project,size,n_brown,n_not_brown,n_flaky,n_not_flaky,error_rate
0,A,366,77,289,175,191,26.78
1,B,346,184,162,240,106,16.18
2,C,347,36,311,173,174,39.48
3,D,369,52,317,237,132,50.14
4,E,331,64,267,206,125,42.9
5,veloren,366,81,285,141,225,16.39


In [4]:
results_df.to_csv("../data/results/labeling_error_results.csv", index=False)

### Failure Reasons of Misclassified Job Failures

In [5]:
reasons_df = pd.concat(reasons, axis=0).reset_index()[["reason_id", "count", "project"]]
reasons_df

Unnamed: 0,reason_id,count,project
0,2,39,A
1,4,12,A
2,8,8,A
3,9,6,A
4,5,5,A
...,...,...,...
74,31,2,veloren
75,33,1,veloren
76,10,1,veloren
77,30,1,veloren


In [6]:
flakiness_causes = pd.read_csv("../data/results/flakiness_causes.csv")
flakiness_causes = flakiness_causes[["code", "failure_reason"]]
flakiness_causes.columns = ["reason_id", "failure_reason"]
flakiness_causes

Unnamed: 0,reason_id,failure_reason
0,0,
1,1,host_resolution_failure
2,2,dependency_installation_failure
3,3,remote_call_timeout
4,4,buggy_dependency
5,5,container_platform_auth_failure
6,6,misconfigured_env_variable
7,7,repository_file_not_found
8,8,image_not_found
9,9,repository_access_denied


In [7]:
reasons_stats = reasons_df.groupby("reason_id").aggregate(
    {
        "count": "sum",
        "project": "count"
    }
).reset_index()
reasons_stats = pd.merge(reasons_stats, flakiness_causes, on="reason_id", how="left")
reasons_stats = reasons_stats.sort_values(by="count", ascending=False)
reasons_stats = reasons_stats[["reason_id", "failure_reason", "count", "project"]]
reasons_stats

Unnamed: 0,reason_id,failure_reason,count,project
5,6,misconfigured_env_variable,273,4
4,5,container_platform_auth_failure,61,4
1,2,dependency_installation_failure,47,3
17,18,dependencies_conflict_error,32,1
28,29,db_table_undefined,29,1
12,13,container_not_found,25,4
22,23,unknown_issue,24,3
3,4,buggy_dependency,17,3
8,9,repository_access_denied,17,3
0,1,host_resolution_failure,15,5


In [8]:
reasons_stats.to_csv("../data/results/mislabeled_failure_reasons.csv", index=False)