In [2]:
import pandas as pd
import os
import re
from dataclasses import dataclass
import json
%matplotlib inline

In [3]:
jobname_regex = re.compile(r"(?<=: \[)(?P<project>[a-z-]+), (?P<noise_type>[a-z-]+)")

In [4]:
test_string = "Test Projects: [shiki, baseline]"

res = jobname_regex.search(test_string)

# Read in all Results

In [5]:
directory = "../artifacts"
jobs_dir = os.listdir(directory)
f"{len(jobs_dir)} jobs found"

'133 jobs found'

In [27]:
@dataclass
class TestResult:
    project: str
    noise_type: str
    dir: str
    test_results: dict[str, dict[str, int]]
    env_vars: dict[str, str]
    job_number: int

    test_df: pd.DataFrame = None

    def __post_init__(self):
        # Compute the test_df
        # Columns: test_name, PASSED, Failed, Skipped, TOTAL RUNS
        test_names = self.test_results.keys()
        passed = [test.get("PASSED", 0) for test in self.test_results.values()]
        failed = [test.get("FAILED", 0) for test in self.test_results.values()]
        skipped = [test.get("SKIPPED", 0) for test in self.test_results.values()]

        self.test_df = pd.DataFrame({
            "test_name": test_names,
            "PASSED": passed,
            "FAILED": failed,
            "SKIPPED": skipped
        })
        self.test_df["TOTAL RUNS"] = self.test_df["PASSED"] + self.test_df["FAILED"]


In [28]:
results: list[TestResult] = []
for job in jobs_dir:
    with open(os.path.join(directory, job, "jobname.txt")) as f:
        job_str = f.read()
    job_re = jobname_regex.search(job_str)

    with open(os.path.join(directory, job, "aggregate-test-results", "aggregated_test_results.json")) as f:
        test_results = json.load(f)

    with open(os.path.join(directory, job, "aggregate-test-results", "environment_variables.json")) as f:
        env_vars = json.load(f)

    results.append(TestResult(
        project=job_re.group("project"),
        noise_type=job_re.group("noise_type"),
        dir=os.path.join(directory, job),
        test_results=test_results,
        env_vars=env_vars,
        job_number=int(job)
    ))

In [29]:
f"parsed {len(results)} results"

'parsed 133 results'

In [30]:
x =results[1].test_df.sort_values("FAILED")

In [31]:
results[50].test_df

Unnamed: 0,test_name,PASSED,FAILED,SKIPPED,TOTAL RUNS
0,/administration-menu.spec.ts:17:2 › administra...,15,0,1,15
1,/administration-menu.spec.ts:24:2 › administra...,0,0,16,0
2,/administration-menu.spec.ts:31:2 › administra...,15,0,1,15
3,/administration-menu.spec.ts:40:3 › administra...,15,0,1,15
4,/administration.spec.ts:22:3 › administration ...,15,0,1,15
...,...,...,...,...,...
179,/video-conference.spec.ts:59:2 › video confere...,0,1,15,1
180,/video-conference.spec.ts:69:3 › video confere...,0,1,15,1
181,/video-conference.spec.ts:75:2 › video confere...,0,1,15,1
182,/video-conference.spec.ts:85:3 › video confere...,1,0,15,1


# Remove job with the same project and noise type

In [38]:
results_dict : dict[tuple[str, str], TestResult] = {}
for result in results:
    key = (result.project, result.noise_type)
    if key in results_dict:
        if results_dict[key].job_number < result.job_number:
            results_dict[key] = result
    else:
        results_dict[key] = result

In [39]:
f"Filtered {len(results) - len(results_dict)} results"

'Filtered 0 results'

In [40]:
results = list(results_dict.values())

# Single Analysis DF of all results

Columns: project
Rows: noise_type
Element: flakyness in %

## Formula

$$
flakyness = \begin{cases}
0, all\ tests\ failed\\
\frac{Failed\ Tests}{Total\ Test\ Runs}, else
\end{cases}
$$

In [41]:
# define a function to compute the flakyness
def compute_flakyness(df: pd.DataFrame) -> float:
    lambda x: 0 if x["PASSED"] == 0 else x["FAILED"] / x["TOTAL RUNS"]

    df["flakyness"] = df.apply(lambda x: 0 if x["PASSED"] == 0 else x["FAILED"] / x["TOTAL RUNS"], axis=1)
    return df["flakyness"].mean()

flakyness = [compute_flakyness(result.test_df) for result in results]

In [53]:
noise_types: set[str] = set()
projects: set[str] = set()

for (pj, nt) in results_dict:
    noise_types.add(nt)
    projects.add(pj)

print(f"Got noise_types: {noise_types} and projects {projects}")

Got noise_types: {'cpu', 'baseline', 'network', 'interrupt', 'packet', 'memory', 'disk', 'bandwidth'} and projects {'lenster', 'gatsby', 'dev-roadmap', 'logseq', 'remix', 'insomnia', 'ionic-framework', 'material-ui', 'storybook', 'quill', 'rocket-chat', 'playwright', 'docs', 'floating-ui', 'shiki'}


In [75]:
overview_dict: dict[str, list[float]] = {}

for noise in noise_types:
    for project in projects:
        curr_val: list[float] = overview_dict.get(project, [])

        val = float("nan")

        if (project, noise) in results_dict:
            val = compute_flakyness(results_dict[(project, noise)].test_df)
        else:
            f"Missing data for project {project} and noise type {noise}"
        curr_val.append(val)
        overview_dict[project] = curr_val

overview = pd.DataFrame(overview_dict, index=list(noise_types))

In [76]:
overview

Unnamed: 0,lenster,gatsby,dev-roadmap,logseq,remix,insomnia,ionic-framework,material-ui,storybook,quill,rocket-chat,playwright,docs,floating-ui,shiki
cpu,0.000106,0.0,0.060256,,0.000521,0.013593,0.000247,0.0,,0.08,0.016944,0.000186,0.0,0.0,0.0
baseline,0.000264,0.0,0.065385,0.123576,0.000429,0.00822,0.002079,,,0.013333,0.035184,7.5e-05,0.0,0.0,0.0
network,0.000785,0.0,0.078205,,0.000107,0.0,,0.000417,,0.0,0.017097,1.6e-05,0.0,0.0,0.0
interrupt,5.2e-05,0.0,0.055641,,,0.006944,0.000721,0.00125,,0.0,0.015985,6.4e-05,0.0,0.0,0.0
packet,0.019015,0.0,0.085897,,0.000475,0.010621,0.00052,0.358122,,0.513333,0.023977,0.000413,0.0,0.0,0.231667
memory,0.000105,0.0,0.078205,0.205375,,0.008239,0.000375,0.0,,0.01,0.017903,5.1e-05,0.0,0.0,0.0
disk,5.3e-05,0.0,0.061795,,0.000322,0.030093,,0.0,0.017771,0.07,0.027694,3.2e-05,0.0,0.0,0.0
bandwidth,0.0,0.0,0.085897,,0.000521,0.006944,,,,0.003333,0.203623,8.2e-05,0.0,0.0,0.0
