RQ1: Is there an association between repository activity characteristics (number of commits and number of contributors) and GUI end-to-end testing intensity (number of GUI test files and number of GUI tests) in non-trivial repositories?

In [154]:
import pandas as pd
import numpy as np
from scipy.stats import spearmanr

### Load CSV files

In [155]:
DATA_DIR = "E2EGit"  # change this

repository = pd.read_csv(f"{DATA_DIR}/repository.csv")
non_trivial = pd.read_csv(f"{DATA_DIR}/non_trivial_repository.csv")

gui_repo = pd.read_csv(f"{DATA_DIR}/gui_testing_repo_details.csv")
gui_tests = pd.read_csv(f"{DATA_DIR}/gui_testing_test_details.csv")

perf_tests = pd.read_csv(f"{DATA_DIR}/performance_testing_test_details.csv")

repository.columns = repository.columns.str.strip()
non_trivial.columns = non_trivial.columns.str.strip()
perf_tests.columns = perf_tests.columns.str.strip()


repository = repository.rename(columns={
    "name": "repository_name",
})

non_trivial = non_trivial.rename(columns={
    "name": "repository_name",
})


  repository = pd.read_csv(f"{DATA_DIR}/repository.csv")


### Restrict to non-trivial repositories with GUI tests

In [156]:
gui_tests_nt = gui_tests.merge(non_trivial[["repository_name"]], on="repository_name", how="inner")

print("Non-trivial repos with GUI tests:",
      gui_tests_nt["repository_name"].nunique())


Non-trivial repos with GUI tests: 472


In [None]:
# Use repo-level GUI aggregates (cleaner than grouping test_details)
gui_repo_nt = gui_repo.merge(non_trivial[["repository_name"]], on="repository_name", how="inner")

gui_by_repo = gui_repo_nt[[
    "repository_name",
    "number_of_files",
    "number_of_tests"
]].rename(columns={
    "number_of_files": "gui_test_files",
    "number_of_tests": "gui_tests"
})

print("Rows:", len(gui_by_repo))
print("Unique repos:", gui_by_repo["repository_name"].nunique())


Rows: 472
Unique repos: 472


### Merge with repository activity data

In [158]:
analysis_df = gui_by_repo.merge(
    repository,
    on="repository_name",
    how="inner"
)

analysis_df.head()


Unnamed: 0,repository_name,gui_test_files,gui_tests,is_fork,commits,branches,releases,forks,main_language,default_branch,...,metrics,last_commit,last_commit_sha,has_wiki,is_archived,is_disabled,is_locked,languages,labels,topics
0,quantumlib/cirq,2,3,0.0,3756.0,89.0,25.0,961.0,Python,main,...,"language:TypeScript, commentLines:744, codeLin...",2024-03-29T07:41:23,2e2ad24380c18afe65f91eea9e2252363665bd1b,1.0,0.0,0.0,0.0,Python; Jupyter Notebook; TypeScript; Shell; D...,announce change; aqt; architecture/ion-trap; a...,cirq; nisq; quantum-algorithms; quantum-circui...
1,wandb/weave,31,31,0.0,2362.0,571.0,0.0,20.0,TypeScript,master,...,"language:Diff, commentLines:6, codeLines:5, bl...",2024-03-26T12:06:31,50611c029730e1be9675c09df2c495b2a849aeda,1.0,0.0,0.0,0.0,TypeScript; Python; C; Jupyter Notebook; HTML;...,bug; dependencies; documentation; duplicate; e...,
2,insightsoftwareconsortium/itk-wasm,37,92,0.0,3489.0,17.0,190.0,45.0,Python,main,...,"language:JSON, commentLines:0, codeLines:29476...",2024-03-26T11:06:27,607eb837124d02024ce6229880c96872db3dce0b,0.0,0.0,0.0,0.0,Python; TypeScript; C++; JavaScript; HTML; CMa...,bug; dependencies; duplicate; enhancement; goo...,asmjs; emscripten; imaging; input-output; insi...
3,codyogden/killedbygoogle,3,8,0.0,2013.0,4.0,0.0,365.0,TypeScript,main,...,"language:YAML, commentLines:0, codeLines:78, b...",2024-03-27T10:24:28,44bfc8873e4c37817641d8f56148d2b2de1a0bc2,0.0,0.0,0.0,0.0,TypeScript; JavaScript; CSS;,bug; dependencies; enhancement; hacktoberfest;...,front-end; google; hacktoberfest; json; open-d...
4,mattermost/mattermost,4,11,0.0,18316.0,688.0,515.0,6795.0,TypeScript,master,...,"language:XML, commentLines:1, codeLines:667, b...",2024-03-28T01:49:42,f34fac77311fca817306c46b37e3933d02e1ae95,0.0,0.0,0.0,0.0,TypeScript; Go; HTML; JavaScript; SCSS; Makefi...,1. ux review; 1: pm review; 2. editor review; ...,collaboration; golang; hacktoberfest; mattermo...


In [159]:
analysis_df = analysis_df[(analysis_df["gui_test_files"] > 0) & (analysis_df["gui_tests"] > 0)]


### Spearman correlations

In [160]:
# commits vs gui_test_files
tmp = analysis_df[["commits", "gui_test_files"]].dropna()
rho_commits_files, p_commits_files = spearmanr(tmp["commits"], tmp["gui_test_files"])

# commits vs gui_tests
tmp = analysis_df[["commits", "gui_tests"]].dropna()
rho_commits_tests, p_commits_tests = spearmanr(tmp["commits"], tmp["gui_tests"])

# contributors vs gui_test_files
tmp = analysis_df[["contributors", "gui_test_files"]].dropna()
rho_contrib_files, p_contrib_files = spearmanr(tmp["contributors"], tmp["gui_test_files"])

# contributors vs gui_tests
tmp = analysis_df[["contributors", "gui_tests"]].dropna()
rho_contrib_tests, p_contrib_tests = spearmanr(tmp["contributors"], tmp["gui_tests"])

rho_commits_files, p_commits_files, rho_commits_tests, p_commits_tests, rho_contrib_files, p_contrib_files, rho_contrib_tests, p_contrib_tests


(0.15275694579741864,
 0.0008700484613597067,
 0.13386480242625695,
 0.0035717190169637205,
 0.08586018595659795,
 0.0623416856635347,
 0.1007771421331323,
 0.028581541613618477)

### Compact results table

In [161]:
results = pd.DataFrame([
    {"activity": "commits", "intensity": "gui_test_files", "rho": rho_commits_files, "p": p_commits_files},
    {"activity": "commits", "intensity": "gui_tests",      "rho": rho_commits_tests, "p": p_commits_tests},
    {"activity": "contributors", "intensity": "gui_test_files", "rho": rho_contrib_files, "p": p_contrib_files},
    {"activity": "contributors", "intensity": "gui_tests",      "rho": rho_contrib_tests, "p": p_contrib_tests},
])

# Multiple testing correction (recommended)
from statsmodels.stats.multitest import multipletests
results["p_holm"] = multipletests(results["p"], method="holm")[1]
results["p_bh"]   = multipletests(results["p"], method="fdr_bh")[1]

results


Unnamed: 0,activity,intensity,rho,p,p_holm,p_bh
0,commits,gui_test_files,0.152757,0.00087,0.00348,0.00348
1,commits,gui_tests,0.133865,0.003572,0.010715,0.007143
2,contributors,gui_test_files,0.08586,0.062342,0.062342,0.062342
3,contributors,gui_tests,0.100777,0.028582,0.057163,0.038109


In [165]:
import pandas as pd
from scipy.stats import shapiro, normaltest

cols = ["commits", "contributors", "gui_test_files", "gui_tests"]

rows = []
for c in cols:
    x = analysis_df[c].dropna()
    n = len(x)

    # Shapiro can error for very large N; safe to cap
    x_shap = x.sample(5000, random_state=0) if n > 5000 else x
    sh_stat, sh_p = shapiro(x_shap)

    nt_stat, nt_p = (normaltest(x) if n >= 8 else (None, None))

    rows.append({
        "var": c,
        "N": n,
        "shapiro_p": sh_p,
        "normaltest_p": nt_p,
    })

pd.DataFrame(rows)


Unnamed: 0,var,N,shapiro_p,normaltest_p
0,commits,472,1.749068e-32,2.618806e-91
1,contributors,472,4.4116700000000006e-23,3.3508529999999995e-19
2,gui_test_files,472,4.1320829999999996e-38,5.26856e-161
3,gui_tests,472,5.983352999999999e-38,3.190689e-143
