RQ2: To what extent do non-trivial open-source repositories that implement both GUI and performance end-to-end tests differ from those that implement only GUI end-to-end tests or only performance end-to-end tests, with respect to project activity metrics such as the number of commits, contributors, issues, and pull requests?

Import Required Libraries

In [20]:
!pip install pandas

import pandas as pd
from scipy import stats
import numpy as np



Load Repository CSV (General Info)

In [21]:
df_repository_general = pd.read_csv('E2EGit\\repository.csv')


df_repository_general = df_repository_general.rename(columns={'name': 'repository_name'})

# Keep only the columns you want
df_repository_general = df_repository_general[['repository_name', 'commits','contributors', 'total_issues', 'total_pull_requests']]

# Apply filtering conditions
df_filtered = df_repository_general[
    (df_repository_general['commits'] >= 2000) &
    (df_repository_general['contributors'] >= 10) &
    (df_repository_general['total_issues'] >= 100) &
    (df_repository_general['total_pull_requests'] >= 50)
].reset_index(drop=True)

df_filtered.head(10)

  df_repository_general = pd.read_csv('E2EGit\\repository.csv')


Unnamed: 0,repository_name,commits,contributors,total_issues,total_pull_requests
0,sparklemotion/nokogiri,7305.0,224.0,1953.0,1095.0
1,junit-team/junit4,2509.0,146.0,829.0,903.0
2,unclebob/fitnesse,6054.0,114.0,763.0,727.0
3,connectbot/connectbot,2207.0,57.0,527.0,855.0
4,bndtools/bndtools,3563.0,28.0,1291.0,643.0
5,caelum/vraptor,3448.0,60.0,455.0,209.0
6,maxcom/lorsource,8917.0,75.0,301.0,758.0
7,rzwitserloot/lombok,3243.0,106.0,2458.0,354.0
8,voldemort/voldemort,4263.0,57.0,136.0,371.0
9,jdbi/jdbi,5615.0,131.0,1051.0,1555.0


Load non_trivial_repository CSV

In [22]:
df_repository_non_trivial = pd.read_csv('E2EGit\\non_trivial_repository.csv')

df_repository_non_trivial = df_repository_non_trivial.rename(columns={'name': 'repository_name'})

df_repository_non_trivial = df_repository_non_trivial[['repository_name']]

Merge general info with non trivial repos

In [23]:
df_repository = pd.merge(df_filtered, df_repository_non_trivial, left_on='repository_name', right_on='repository_name', how='inner')

print(len(df_repository))

print(df_repository)

10727
                         repository_name  commits  contributors  total_issues  \
0                 sparklemotion/nokogiri   7305.0         224.0        1953.0   
1                      junit-team/junit4   2509.0         146.0         829.0   
2                      unclebob/fitnesse   6054.0         114.0         763.0   
3                  connectbot/connectbot   2207.0          57.0         527.0   
4                      bndtools/bndtools   3563.0          28.0        1291.0   
...                                  ...      ...           ...           ...   
10722             jeff-regier/Celeste.jl   2316.0          11.0         313.0   
10723                 denizyuret/Knet.jl   3086.0          33.0         495.0   
10724                numenta/htmresearch   5346.0          27.0         115.0   
10725          huaweicloud/ModelArts-Lab   2541.0         277.0         171.0   
10726  girlscript/winter-of-contributing  10156.0         416.0        4098.0   

       total_pull_req

Clean up the data

In [24]:
numeric_cols = ['commits', 'contributors', 'total_issues', 'total_pull_requests']

for col in numeric_cols:
    df_repository[col] = pd.to_numeric(df_repository[col], errors='coerce')

# Drop rows with NaN (caused by non-numeric values)
df_repository = df_repository.dropna(subset=numeric_cols)

print(df_repository)
print(len(df_repository))

                         repository_name  commits  contributors  total_issues  \
0                 sparklemotion/nokogiri   7305.0         224.0        1953.0   
1                      junit-team/junit4   2509.0         146.0         829.0   
2                      unclebob/fitnesse   6054.0         114.0         763.0   
3                  connectbot/connectbot   2207.0          57.0         527.0   
4                      bndtools/bndtools   3563.0          28.0        1291.0   
...                                  ...      ...           ...           ...   
10722             jeff-regier/Celeste.jl   2316.0          11.0         313.0   
10723                 denizyuret/Knet.jl   3086.0          33.0         495.0   
10724                numenta/htmresearch   5346.0          27.0         115.0   
10725          huaweicloud/ModelArts-Lab   2541.0         277.0         171.0   
10726  girlscript/winter-of-contributing  10156.0         416.0        4098.0   

       total_pull_requests 

Load Gui details CSV

In [25]:
df_gui_repo_details = pd.read_csv('E2EGit\gui_testing_repo_details.csv')

# Keep only the columns you want
df_gui_repo_details = df_gui_repo_details[['repository_name']]

print(df_gui_repo_details)
print(len(df_gui_repo_details))

                        repository_name
0                       quantumlib/cirq
1                           wandb/weave
2    insightsoftwareconsortium/itk-wasm
3              codyogden/killedbygoogle
4                 mattermost/mattermost
..                                  ...
467               wp-graphql/wp-graphql
468               reduxjs/redux-toolkit
469           facebook/create-react-app
470                            zkoss/zk
471                   grafana/pyroscope

[472 rows x 1 columns]
472


Load the performance details CSV

In [26]:
df_performance_test_details = pd.read_csv('E2EGit\performance_testing_test_details.csv')


# Keep only the columns you want
df_performance_test_details = df_performance_test_details[['repository_name']]

print(df_performance_test_details)
print(len(df_performance_test_details))

              repository_name
0               apache/roller
1    nysenate/openlegislation
2    nysenate/openlegislation
3    nysenate/openlegislation
4    nysenate/openlegislation
..                        ...
405  HumanSignal/label-studio
406  HumanSignal/label-studio
407  HumanSignal/label-studio
408       jetty/jetty.project
409       jetty/jetty.project

[410 rows x 1 columns]
410


Merge performance with gui testing to get repositories that implement both

In [27]:
df_both_tests = pd.merge(df_performance_test_details, df_gui_repo_details, left_on='repository_name', right_on='repository_name', how='inner')

print(df_both_tests)
print(len(df_both_tests))


             repository_name
0              apache/roller
1          apache/tapestry-5
2        zkoss/zkspreadsheet
3           eugenp/tutorials
4           eugenp/tutorials
..                       ...
85  HumanSignal/label-studio
86  HumanSignal/label-studio
87  HumanSignal/label-studio
88  HumanSignal/label-studio
89  HumanSignal/label-studio

[90 rows x 1 columns]
90


Get reposorties that implement GUI only

In [28]:
# Not merged repositories (exist in GUI but not in Performance)
df_gui_only = pd.merge(df_gui_repo_details, df_performance_test_details, on='repository_name', how='left', indicator=True)
df_gui_only = df_gui_only[df_gui_only['_merge'] == 'left_only'][['repository_name']]


print(df_gui_only)
print(len(df_gui_only))

                        repository_name
0                       quantumlib/cirq
1                           wandb/weave
2    insightsoftwareconsortium/itk-wasm
3              codyogden/killedbygoogle
4                 mattermost/mattermost
..                                  ...
544               wp-graphql/wp-graphql
545               reduxjs/redux-toolkit
546           facebook/create-react-app
547                            zkoss/zk
548                   grafana/pyroscope

[459 rows x 1 columns]
459


Get reposorties that implement Performance only

In [29]:
# Not merged repositories (exist in Performance but not in GUI)
df_perf_only = pd.merge(df_performance_test_details, df_gui_repo_details, on='repository_name', how='left', indicator=True)
df_perf_only = df_perf_only[df_perf_only['_merge'] == 'left_only'][['repository_name']]
print(df_perf_only)
print(len(df_perf_only))

                                    repository_name
1                          nysenate/openlegislation
2                          nysenate/openlegislation
3                          nysenate/openlegislation
4                          nysenate/openlegislation
5                          nysenate/openlegislation
..                                              ...
400  ballerina-platform/ballerina-performance-cloud
401  ballerina-platform/ballerina-performance-cloud
402  ballerina-platform/ballerina-performance-cloud
408                             jetty/jetty.project
409                             jetty/jetty.project

[320 rows x 1 columns]
320


Merge both Dataframe with the Dataframe that contains project activity

In [30]:
# merged repositories (exist in both GUI and Performance) with repository details
repo_both_with_repository_details = pd.merge(df_both_tests, df_repository, on='repository_name', how='inner')

repo_both_with_repository_details.head(10)

Unnamed: 0,repository_name,commits,contributors,total_issues,total_pull_requests
0,eugenp/tutorials,15392.0,321.0,733.0,15512.0
1,eugenp/tutorials,15392.0,321.0,733.0,15512.0
2,eugenp/tutorials,15392.0,321.0,733.0,15512.0
3,eugenp/tutorials,15392.0,321.0,733.0,15512.0
4,eugenp/tutorials,15392.0,321.0,733.0,15512.0
5,eugenp/tutorials,15392.0,321.0,733.0,15512.0
6,eugenp/tutorials,15392.0,321.0,733.0,15512.0
7,eugenp/tutorials,15392.0,321.0,733.0,15512.0
8,eugenp/tutorials,15392.0,321.0,733.0,15512.0
9,eugenp/tutorials,15392.0,321.0,733.0,15512.0


Not merged repositories (exist in GUI or Performance but not in both) with repository details

In [31]:
# not merged repositories (exist in GUI or Performance but not in both) with repository details

repo_gui_only_with_repository_details = pd.merge(df_gui_only, df_repository, on='repository_name', how='inner')
repo_perf_only_with_repository_details = pd.merge(df_perf_only, df_repository, on='repository_name', how='inner')

print(len(repo_gui_only_with_repository_details))
print(len(repo_perf_only_with_repository_details))


387
200


Add a column to each DataFrame to identify their type

In [32]:
repo_both_with_repository_details['test_type'] = 'Both'
repo_gui_only_with_repository_details['test_type'] = 'GUI'
repo_perf_only_with_repository_details['test_type'] = 'Performance'

Get the final DataFrame

In [33]:
df_all = pd.concat([repo_both_with_repository_details, repo_gui_only_with_repository_details, repo_perf_only_with_repository_details], ignore_index=True)

print(len(df_all))

674


##### Normality Tests

In [34]:
normality_results = {}

for test_type in df_all['test_type'].unique():
    print(f"{test_type}:")
    subset = df_all[df_all['test_type'] == test_type]
    normality_results[test_type] = {}
    
    for metric in numeric_cols:
        stat, p_value = stats.shapiro(subset[metric])
        is_normal = p_value > 0.05
        normality_results[test_type][metric] = is_normal
        print(f"  {metric:15s}: W={stat:.4f}, p={p_value:.4f} | "
              f"{'Normal ✓' if is_normal else 'Non-normal ✗'}")
    print()

# Check if we should use parametric or non-parametric tests
all_normal = all(all(normality_results[tt].values()) for tt in normality_results)
print(f"Recommendation: Use {'Parametric (t-test)' if all_normal else 'Non-parametric (Mann-Whitney U)'} tests")
print("However, we'll perform BOTH tests for comprehensive analysis.\n")

Both:
  commits        : W=0.6977, p=0.0000 | Non-normal ✗
  contributors   : W=0.6164, p=0.0000 | Non-normal ✗
  total_issues   : W=0.5144, p=0.0000 | Non-normal ✗
  total_pull_requests: W=0.6631, p=0.0000 | Non-normal ✗

GUI:
  commits        : W=0.5911, p=0.0000 | Non-normal ✗
  contributors   : W=0.8260, p=0.0000 | Non-normal ✗
  total_issues   : W=0.2263, p=0.0000 | Non-normal ✗
  total_pull_requests: W=0.5786, p=0.0000 | Non-normal ✗

Performance:
  commits        : W=0.9298, p=0.0000 | Non-normal ✗
  contributors   : W=0.8195, p=0.0000 | Non-normal ✗
  total_issues   : W=0.8080, p=0.0000 | Non-normal ✗
  total_pull_requests: W=0.7143, p=0.0000 | Non-normal ✗

Recommendation: Use Non-parametric (Mann-Whitney U) tests
However, we'll perform BOTH tests for comprehensive analysis.



In [35]:
summary_stats = df_all.groupby('test_type').agg({
    'commits': ['mean', 'median', 'std', 'count'],
    'contributors': ['mean', 'median', 'std'],
    'total_issues': ['mean', 'median', 'std'],
    'total_pull_requests': ['mean', 'median', 'std']
}).round(2)

print(summary_stats)

              commits                          contributors                 \
                 mean   median       std count         mean median     std   
test_type                                                                    
Both         10960.68   5690.0  11288.33    87       103.24   74.0   92.21   
GUI           9990.64   5690.0  12489.24   387       138.77   84.0  125.87   
Performance  11786.08  10582.0   6867.70   200       150.23   99.0  128.74   

            total_issues                   total_pull_requests          \
                    mean  median       std                mean  median   
test_type                                                                
Both             2047.72  1353.0   1974.08             3933.71  2307.0   
GUI              3255.45  1271.0  10012.98             4133.12  2293.0   
Performance      3680.16  1616.0   3528.24             4954.02  3481.5   

                      
                 std  
test_type             
Both         4951

In [43]:
gui_only = df_all[df_all['test_type'] == 'GUI']
perf_only = df_all[df_all['test_type'] == 'Performance']
both_tests = df_all[df_all['test_type'] == 'Both']

comparisons = [
    ('Both', 'GUI Only', both_tests, gui_only),
    ('Both', 'Performance Only', both_tests, perf_only),
    ('GUI Only', 'Performance Only', gui_only, perf_only)
]

mw_results = {}

for metric in numeric_cols:
    print(f"{metric.upper()}")
    print("-" * 80)
    mw_results[metric] = {}
    
    for label1, label2, data1, data2 in comparisons:
        values1 = data1[metric]
        values2 = data2[metric]
        
        # Mann-Whitney U test
        u_stat, p_value = stats.mannwhitneyu(values1, values2, alternative='two-sided')
        
        # Calculate effect size (rank-biserial correlation)
        n1, n2 = len(values1), len(values2)
        rank_biserial = 1 - (2*u_stat) / (n1 * n2)
        
        # Calculate medians
        median1 = values1.median()
        median2 = values2.median()
        median_diff = median1 - median2
        pct_diff = (median_diff / median2) * 100 if median2 != 0 else 0
        
        sig_marker = "***" if p_value < 0.001 else "**" if p_value < 0.01 else "*" if p_value < 0.05 else "ns"
        
        mw_results[metric][f"{label1}_vs_{label2}"] = {
            'u_stat': u_stat,
            'p_value': p_value,
            'rank_biserial': rank_biserial,
            'median_diff': median_diff,
            'pct_diff': pct_diff
        }
        
        print(f"  {label1} vs {label2}")
        print(f"    Median 1: {median1:.2f} | Median 2: {median2:.2f}")
        print(f"    Difference: {median_diff:+.2f} ({pct_diff:+.1f}%)")
        print(f"    U-statistic: {u_stat:.2f}")
        print(f"    P-value: {p_value:.4f} {sig_marker}")
        print(f"    Effect size (r): {rank_biserial:.3f}")
    
    print()

COMMITS
--------------------------------------------------------------------------------
  Both vs GUI Only
    Median 1: 5690.00 | Median 2: 5690.00
    Difference: +0.00 (+0.0%)
    U-statistic: 16772.50
    P-value: 0.9575 ns
    Effect size (r): 0.004
  Both vs Performance Only
    Median 1: 5690.00 | Median 2: 10582.00
    Difference: -4892.00 (-46.2%)
    U-statistic: 6421.00
    P-value: 0.0004 ***
    Effect size (r): 0.262
  GUI Only vs Performance Only
    Median 1: 5690.00 | Median 2: 10582.00
    Difference: -4892.00 (-46.2%)
    U-statistic: 25697.00
    P-value: 0.0000 ***
    Effect size (r): 0.336

CONTRIBUTORS
--------------------------------------------------------------------------------
  Both vs GUI Only
    Median 1: 74.00 | Median 2: 84.00
    Difference: -10.00 (-11.9%)
    U-statistic: 15054.00
    P-value: 0.1230 ns
    Effect size (r): 0.106
  Both vs Performance Only
    Median 1: 74.00 | Median 2: 99.00
    Difference: -25.00 (-25.3%)
    U-statistic: 7399.