In [24]:
import os
import pandas as pd
from scipy.stats import levene, f_oneway, ttest_ind, kruskal


In [6]:
# Load data
path = os.path.join(os.getcwd(), 'Final_results/confideces_results.csv')

df = pd.read_csv(path)

In [12]:
from scipy.stats import shapiro

# Shapiro-Wilk test for each row
shapiro_results = []

for index, row in df.iterrows():
    fold_values = row[['Fold 1', 'Fold 2', 'Fold 3', 'Fold 4', 'Fold 5']].values
    stat, p_value = shapiro(fold_values)
    shapiro_results.append({
        'Row Index': index,
        'Confidence Threshold': row['Confidence Threshold'],
        'Budget': row['Budget'],
        'W-Statistic': stat,
        'P-Value': p_value
    })

shapiro_results_df = pd.DataFrame(shapiro_results)

For all treshold for b = 2500 p-value>0.05, so the H0 is accepted, meaning the data is normally distributed.
For the rest the p-value is <0.05, so the H0 is rejected, meaning the data is not normally distributed.

In [5]:
shapiro_results_df

Unnamed: 0,Row Index,Confidence Threshold,Budget,W-Statistic,P-Value
0,0,0.9,500-2500,0.848005,0.188294
1,1,0.9,1000-5000,0.73493,0.021454
2,2,0.9,2000-10000,0.692233,0.007929
3,3,0.95,500-2500,0.967457,0.858695
4,4,0.95,1000-5000,0.91496,0.497964
5,5,0.95,2000-10000,0.892862,0.371698
6,6,0.97,500-2500,0.868355,0.259825
7,7,0.97,1000-5000,0.733437,0.020756
8,8,0.97,2000-10000,0.923549,0.553088


# B=2500

In [8]:
# Extract fold values for each confidence threshold for budget '500-2500'
data_500_2500 = {
    0.90: df[(df['Confidence Threshold'] == 0.90) & (df['Budget'] == '500-2500')][['Fold 1', 'Fold 2', 'Fold 3', 'Fold 4', 'Fold 5']].values.flatten(),
    0.95: df[(df['Confidence Threshold'] == 0.95) & (df['Budget'] == '500-2500')][['Fold 1', 'Fold 2', 'Fold 3', 'Fold 4', 'Fold 5']].values.flatten(),
    0.97: df[(df['Confidence Threshold'] == 0.97) & (df['Budget'] == '500-2500')][['Fold 1', 'Fold 2', 'Fold 3', 'Fold 4', 'Fold 5']].values.flatten(),
}

In [None]:
# Levene's test for equal variances
stat, p_value = levene(data_500_2500[0.90], data_500_2500[0.95], data_500_2500[0.97])

Levene's Test: W=2.7458, p-value=0.1043
Variances are not significantly different


In [15]:
# Extract data for each confidence threshold
data_090 = data_500_2500[0.90]
data_095 = data_500_2500[0.95]
data_097 = data_500_2500[0.97]

# one-way ANOVA
stat, p_value = f_oneway(data_090, data_095, data_097)

print(f"One-Way ANOVA: F={stat:.4f}, p-value={p_value:.4f}")

One-Way ANOVA: F=12.8464, p-value=0.0010


In [None]:
# Define pairs for comparison
data_090 = data_500_2500[0.90]
data_095 = data_500_2500[0.95]
data_097 = data_500_2500[0.97]

pairs = [
    ("0.90 vs 0.95", data_090, data_095),
    ("0.90 vs 0.97", data_090, data_097),
    ("0.95 vs 0.97", data_095, data_097),
]

# Perform t-test for each pair
print("T-Test Results — Budget: 500-2500\n")
for label, group1, group2 in pairs:
    stat, p_value = ttest_ind(group1, group2, equal_var=True)
    print(f"{label}: t={stat:.4f}, p-value={p_value:.4f}")

T-Test Results — Budget: 500-2500

0.90 vs 0.95: t=-3.9535, p-value=0.0042
0.90 vs 0.97: t=-3.3523, p-value=0.0100
0.95 vs 0.97: t=1.5091, p-value=0.1697


There are differeneces for b=2500 betweeb 0.90 and 0.95 and 0.90 and 0.97, so the budget above 0.95 give better resluts than lower treholds 

# B=5000

In [31]:
# Extract fold values for each confidence threshold for budget 1000-5000
data_1000_5000 = {
    0.90: df[(df['Confidence Threshold'] == 0.90) & (df['Budget'] == '1000-5000')][['Fold 1', 'Fold 2', 'Fold 3', 'Fold 4', 'Fold 5']].values.flatten(),
    0.95: df[(df['Confidence Threshold'] == 0.95) & (df['Budget'] == '1000-5000')][['Fold 1', 'Fold 2', 'Fold 3', 'Fold 4', 'Fold 5']].values.flatten(),
    0.97: df[(df['Confidence Threshold'] == 0.97) & (df['Budget'] == '1000-5000')][['Fold 1', 'Fold 2', 'Fold 3', 'Fold 4', 'Fold 5']].values.flatten(),
}

In [32]:
# Kruskal-Wallis test for budget 500-2500
stat, p_value = kruskal(data_1000_5000[0.9], data_1000_5000[0.95], data_1000_5000[0.97])

print(f"Kruskal-Wallis Test (1000-5000): H={stat:.4f}, p-value={p_value:.4f}")

Kruskal-Wallis Test (1000-5000): H=3.6600, p-value=0.1604


# B = 10000

In [33]:
# Extract fold values for each confidence threshold for budget 2000-10000
data_2000_1000 = {
    0.90: df[(df['Confidence Threshold'] == 0.90) & (df['Budget'] == '2000-10000')][['Fold 1', 'Fold 2', 'Fold 3', 'Fold 4', 'Fold 5']].values.flatten(),
    0.95: df[(df['Confidence Threshold'] == 0.95) & (df['Budget'] == '2000-10000')][['Fold 1', 'Fold 2', 'Fold 3', 'Fold 4', 'Fold 5']].values.flatten(),
    0.97: df[(df['Confidence Threshold'] == 0.97) & (df['Budget'] == '2000-10000')][['Fold 1', 'Fold 2', 'Fold 3', 'Fold 4', 'Fold 5']].values.flatten(),
}

In [34]:
# Kruskal-Wallis test for budget 2000-1000
stat, p_value = kruskal(data_1000_5000[0.9], data_1000_5000[0.95], data_1000_5000[0.97])

print(f"Kruskal-Wallis Test (500-2500): H={stat:.4f}, p-value={p_value:.4f}")

Kruskal-Wallis Test (500-2500): H=3.6600, p-value=0.1604
