In [117]:
from ucimlrepo import fetch_ucirepo 
import pandas as pd

# fetch dataset 
spambase = fetch_ucirepo(id=94) 

# data (as pandas dataframes)
X = spambase.data.features 
y = spambase.data.targets 

data = pd.concat([X, y], axis = 1)

In [118]:
# ten fold cv
from sklearn.model_selection import StratifiedKFold

# stratified ten-fold
skf = StratifiedKFold(n_splits=10, shuffle=True, random_state=42)

In [119]:
# random forest

from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, f1_score
import time
import numpy as np

rf_model = RandomForestClassifier(n_estimators=100, random_state=42)

# lst to store metrics
rf_accuracy_scores = []
rf_f1_scores = []
rf_training_times = []

for train_idx, test_idx in skf.split(X, y):
    X_train, X_test = X.iloc[train_idx], X.iloc[test_idx]
    y_train, y_test = y.iloc[train_idx].squeeze(), y.iloc[test_idx].squeeze() #squeeze() to ensure 1D array

    # training time
    start_time = time.time()
    rf_model.fit(X_train, y_train)
    rf_training_times.append(time.time() - start_time)

    # make prediction on test data
    y_pred = rf_model.predict(X_test)

    # compute metrics
    rf_accuracy_scores.append(accuracy_score(y_test, y_pred))
    rf_f1_scores.append(f1_score(y_test, y_pred))



In [120]:
# naive bayes

from sklearn.naive_bayes import GaussianNB

nb_model = GaussianNB()

# lists to store measures
nb_accuracy_scores = []
nb_f1_scores = []
nb_training_times = []

# cross validation

for train_idx, test_idx in skf.split(X, y):
    X_train, X_test = X.iloc[train_idx], X.iloc[test_idx]
    y_train, y_test = y.iloc[train_idx].squeeze(), y.iloc[test_idx].squeeze()

    start_time = time.time()
    nb_model.fit(X_train, y_train)

    nb_training_times.append(time.time() - start_time)

    y_pred = nb_model.predict(X_test)

    nb_accuracy_scores.append(accuracy_score(y_test, y_pred))
    nb_f1_scores.append(f1_score(y_test, y_pred))

In [121]:
# Support Vector Machines

from sklearn.svm import SVC

svm = SVC()
svm_accuracy_scores = []
svm_f1_scores = []
svm_training_times = []

for train_index, test_index in skf.split(X, y):
    X_train, X_test = X.iloc[train_index], X.iloc[test_index]
    y_train, y_test = y.iloc[train_index].squeeze(), y.iloc[test_index].squeeze()
    
    start_time = time.time()
    svm.fit(X_train, y_train)
    training_time = time.time() - start_time
    
    y_pred = svm.predict(X_test)
    svm_accuracy_scores.append(accuracy_score(y_test, y_pred))
    svm_f1_scores.append(f1_score(y_test, y_pred))
    svm_training_times.append(training_time)


In [147]:
# Step 2

# 2.1 training time

print("\nTraining Times:")

results_training_time = pd.DataFrame({
    "Fold": [i+1 for i in range(10)],
    "Random Forest": rf_training_times,
    "Naive Bayes": nb_training_times,
    "Support Vector Machines": svm_training_times
})

results_training_time = pd.concat([
    results_training_time,
    pd.DataFrame({
        "Fold": ["avg", "stdev"], 
        "Random Forest": [np.mean(rf_training_times), np.std(rf_training_times)],
        "Naive Bayes": [np.mean(nb_training_times), np.std(nb_training_times)],
        "Support Vector Machines": [np.mean(svm_training_times), np.std(svm_training_times)]
    })
], ignore_index=True)

print(results_training_time.to_string(index=False, float_format="%.4f"))



Training Times:
 Fold  Random Forest  Naive Bayes  Support Vector Machines
    1         0.3899       0.0023                   0.2445
    2         0.3842       0.0018                   0.2588
    3         0.3835       0.0018                   0.2466
    4         0.3800       0.0021                   0.2406
    5         0.3801       0.0019                   0.2426
    6         0.3755       0.0018                   0.2562
    7         0.3767       0.0019                   0.2476
    8         0.3770       0.0018                   0.2489
    9         0.3850       0.0021                   0.2492
   10         0.3771       0.0020                   0.2393
  avg         0.3809       0.0019                   0.2474
stdev         0.0044       0.0002                   0.0060


In [148]:
# 2.2 accuracy

print("\nAccuracy:")

results_accuracy = pd.DataFrame({
    "Fold": [i+1 for i in range(10)],
    "Random Forest": rf_accuracy_scores,
    "Naive Bayes": nb_accuracy_scores,
    "Support Vector Machines": svm_accuracy_scores
})

results_accuracy = pd.concat([
    results_accuracy,
    pd.DataFrame({
        "Fold": ["avg", "stdev"], 
        "Random Forest": [np.mean(rf_accuracy_scores), np.std(rf_accuracy_scores)],
        "Naive Bayes": [np.mean(nb_accuracy_scores), np.std(nb_accuracy_scores)],
        "Support Vector Machines": [np.mean(svm_accuracy_scores), np.std(svm_accuracy_scores)]
    })
], ignore_index=True)

print(results_accuracy.to_string(index=False, float_format="%.4f"))


Accuracy:
 Fold  Random Forest  Naive Bayes  Support Vector Machines
    1         0.9523       0.8243                   0.7158
    2         0.9565       0.8196                   0.7196
    3         0.9609       0.8043                   0.7348
    4         0.9674       0.8217                   0.7109
    5         0.9457       0.8174                   0.7196
    6         0.9587       0.7891                   0.7391
    7         0.9565       0.8457                   0.6935
    8         0.9565       0.8261                   0.7065
    9         0.9435       0.8413                   0.7022
   10         0.9522       0.8130                   0.7065
  avg         0.9550       0.8203                   0.7148
stdev         0.0067       0.0156                   0.0134


In [155]:
# 2.3 F-measure

print("\nF-score:")

results_fscore = pd.DataFrame({
    "Fold": [i+1 for i in range(10)],
    "Random Forest": rf_f1_scores,
    "Naive Bayes": nb_f1_scores,
    "Support Vector Machines": svm_f1_scores
})

results_fscore = pd.concat([
    results_fscore,
    pd.DataFrame({
        "Fold": ["avg", "stdev"], 
        "Random Forest": [np.mean(rf_f1_scores), np.std(rf_f1_scores)],
        "Naive Bayes": [np.mean(nb_f1_scores), np.std(nb_f1_scores)],
        "Support Vector Machines": [np.mean(svm_f1_scores), np.std(svm_f1_scores)]
    })
], ignore_index=True)

print(results_fscore.to_string(index=False, float_format="%.4f"))


F-score:
 Fold  Random Forest  Naive Bayes  Support Vector Machines
    1         0.9389       0.8103                   0.5529
    2         0.9441       0.8074                   0.5597
    3         0.9500       0.7945                   0.5612
    4         0.9580       0.8102                   0.5723
    5         0.9311       0.8065                   0.5657
    6         0.9471       0.7810                   0.5652
    7         0.9448       0.8297                   0.5466
    8         0.9438       0.8095                   0.5659
    9         0.9266       0.8274                   0.5387
   10         0.9375       0.8018                   0.5455
  avg         0.9422       0.8078                   0.5574
stdev         0.0086       0.0135                   0.0104


In [156]:
# remove avg and stdev from the measures
results_training_time_no_avg_stdev = results_training_time.iloc[:-2, :]

# rank the measures
ranks_training_time = results_training_time_no_avg_stdev.iloc[:, 1:].rank(axis=1, ascending=True, method='min')

# add ranks to the measures
results_training_time_with_ranks = results_training_time_no_avg_stdev.copy()
for col in ["Random Forest", "Naive Bayes", "Support Vector Machines"]:
    results_training_time_with_ranks[col] = results_training_time_with_ranks[col].apply(lambda x: f"{x:.4f}") + " (" + ranks_training_time[col].astype(int).astype(str) + ")"

# calculate avg ranks
average_ranks_training_time = ranks_training_time.mean().round(2)

# create dataframe with avg ranks
average_ranks_training_time_row = pd.DataFrame({
    "Fold": ["avg rank"],
    "Random Forest": [f"{average_ranks_training_time['Random Forest']:.2f}"],
    "Naive Bayes": [f"{average_ranks_training_time['Naive Bayes']:.2f}"],
    "Support Vector Machines": [f"{average_ranks_training_time['Support Vector Machines']:.2f}"]
})

# append the avg rank row to the results for each metric
results_training_time_with_ranks = pd.concat([results_training_time_with_ranks, average_ranks_training_time_row], ignore_index=True)

print("\nTraining Times:")
print(results_training_time_with_ranks.to_string(index=False))



Training Times:
    Fold Random Forest Naive Bayes Support Vector Machines
       1    0.3899 (3)  0.0023 (1)              0.2445 (2)
       2    0.3842 (3)  0.0018 (1)              0.2588 (2)
       3    0.3835 (3)  0.0018 (1)              0.2466 (2)
       4    0.3800 (3)  0.0021 (1)              0.2406 (2)
       5    0.3801 (3)  0.0019 (1)              0.2426 (2)
       6    0.3755 (3)  0.0018 (1)              0.2562 (2)
       7    0.3767 (3)  0.0019 (1)              0.2476 (2)
       8    0.3770 (3)  0.0018 (1)              0.2489 (2)
       9    0.3850 (3)  0.0021 (1)              0.2492 (2)
      10    0.3771 (3)  0.0020 (1)              0.2393 (2)
avg rank          3.00        1.00                    2.00


In [157]:
# remove avg and stdev from the measures
results_accuracy_no_avg_stdev = results_accuracy.iloc[:-2, :]

# rank the measures
ranks_accuracy = results_accuracy_no_avg_stdev.iloc[:, 1:].rank(axis=1, ascending=False, method='min')

# add ranks to the measures
results_accuracy_with_ranks = results_accuracy_no_avg_stdev.copy()
for col in ["Random Forest", "Naive Bayes", "Support Vector Machines"]:
    results_accuracy_with_ranks[col] = results_accuracy_with_ranks[col].apply(lambda x: f"{x:.4f}") + " (" + ranks_accuracy[col].astype(int).astype(str) + ")"

# calculate avg ranks
average_ranks_accuracy = ranks_accuracy.mean().round(2)

# create dataframe with avg ranks
average_ranks_accuracy_row = pd.DataFrame({
    "Fold": ["avg rank"],
    "Random Forest": [f"{average_ranks_accuracy['Random Forest']:.2f}"],
    "Naive Bayes": [f"{average_ranks_accuracy['Naive Bayes']:.2f}"],
    "Support Vector Machines": [f"{average_ranks_accuracy['Support Vector Machines']:.2f}"]
})

# append the avg rank row to the results for each metric
results_accuracy_with_ranks = pd.concat([results_accuracy_with_ranks, average_ranks_accuracy_row], ignore_index=True)

print("\nAccuracy with:")
print(results_accuracy_with_ranks.to_string(index=False))


Accuracy with:
    Fold Random Forest Naive Bayes Support Vector Machines
       1    0.9523 (1)  0.8243 (2)              0.7158 (3)
       2    0.9565 (1)  0.8196 (2)              0.7196 (3)
       3    0.9609 (1)  0.8043 (2)              0.7348 (3)
       4    0.9674 (1)  0.8217 (2)              0.7109 (3)
       5    0.9457 (1)  0.8174 (2)              0.7196 (3)
       6    0.9587 (1)  0.7891 (2)              0.7391 (3)
       7    0.9565 (1)  0.8457 (2)              0.6935 (3)
       8    0.9565 (1)  0.8261 (2)              0.7065 (3)
       9    0.9435 (1)  0.8413 (2)              0.7022 (3)
      10    0.9522 (1)  0.8130 (2)              0.7065 (3)
avg rank          1.00        2.00                    3.00


In [158]:
# remove avg and stdev from the measures
results_fscore_no_avg_stdev = results_fscore.iloc[:-2, :]

# rank the measures
ranks_fscore = results_fscore_no_avg_stdev.iloc[:, 1:].rank(axis=1, ascending=False, method='min')

# add ranks to the measures
results_fscore_with_ranks = results_fscore_no_avg_stdev.copy()
for col in ["Random Forest", "Naive Bayes", "Support Vector Machines"]:
    results_fscore_with_ranks[col] = results_fscore_with_ranks[col].apply(lambda x: f"{x:.4f}") + " (" + ranks_fscore[col].astype(int).astype(str) + ")"

# calculate avg ranks
average_ranks_fscore = ranks_fscore.mean().round(2)

average_ranks_fscore_row = pd.DataFrame({
    "Fold": ["avg rank"],
    "Random Forest": [f"{average_ranks_fscore['Random Forest']:.2f}"],
    "Naive Bayes": [f"{average_ranks_fscore['Naive Bayes']:.2f}"],
    "Support Vector Machines": [f"{average_ranks_fscore['Support Vector Machines']:.2f}"]
})

# append the avg rank row to the results for each metric
results_fscore_with_ranks = pd.concat([results_fscore_with_ranks, average_ranks_fscore_row], ignore_index=True)

print("\nF1-Score:")
print(results_fscore_with_ranks.to_string(index=False))


F1-Score:
    Fold Random Forest Naive Bayes Support Vector Machines
       1    0.9389 (1)  0.8103 (2)              0.5529 (3)
       2    0.9441 (1)  0.8074 (2)              0.5597 (3)
       3    0.9500 (1)  0.7945 (2)              0.5612 (3)
       4    0.9580 (1)  0.8102 (2)              0.5723 (3)
       5    0.9311 (1)  0.8065 (2)              0.5657 (3)
       6    0.9471 (1)  0.7810 (2)              0.5652 (3)
       7    0.9448 (1)  0.8297 (2)              0.5466 (3)
       8    0.9438 (1)  0.8095 (2)              0.5659 (3)
       9    0.9266 (1)  0.8274 (2)              0.5387 (3)
      10    0.9375 (1)  0.8018 (2)              0.5455 (3)
avg rank          1.00        2.00                    3.00


In [163]:
from scipy.stats import chi2

# step 3 friedman test

def friedman_test(average_ranks, ranks, n, k):

    print()
    # calculate average rank
    avg_rank = (k + 1) / 2

    # calculate sum of squared differences
    nominator = 0
    for rank in average_ranks:
        nominator += (rank - avg_rank)**2
    nominator *= n

    denominator = 0
    for rank in ranks.values.flatten():
        denominator += (rank - avg_rank) ** 2

    denominator *= (1 / (n * (k - 1)))

    friedman_statistic = nominator / denominator

    df = k - 1 # degrees of freedom
    alpha = 0.05
    critical_value = chi2.ppf(1 - alpha, df)

    return friedman_statistic, critical_value

training_time_friedman_statistic, training_time_critical_value = friedman_test(
                                                        average_ranks=average_ranks_training_time, 
                                                        ranks=ranks_training_time, n=10, k=3)

print("Training Time:")
print(f"Friedman Statistic: {training_time_friedman_statistic}")


print(f"Training Time Critical Value: {training_time_critical_value:.4f}")

if training_time_friedman_statistic > training_time_critical_value:
    print(f"There are significant differences between algorithms ({training_time_friedman_statistic:.4f} > {training_time_critical_value:.4f})")
else:
    print(f"There are no significant differences between algorithms ({training_time_friedman_statistic:.4f} < {training_time_critical_value:.4f})")

# step 4 nemenyi test

def nemenyi_test(q_alpha, k, n, average_ranks, algorithms):
    
    critical_difference = q_alpha * np.sqrt((k * (k + 1)) / (6 * n))
    print(f"Critical difference: {critical_difference}")
    
    for i in range(k):
        for j in range(i + 1, k):
            # difference between average ranks
            difference = abs(average_ranks[i] - average_ranks[j])
            
            # significance
            if difference > critical_difference:
                print(f"Significant difference between {algorithms[i]} and {algorithms[j]} (abs difference: {difference})")
            else:
                print(f"No significant difference between {algorithms[i]} and {algorithms[j]} (abs difference: {difference})")
    
print("\nNemenyi test")
nemenyi_test(q_alpha=2.343, k=3, n=10, 
             average_ranks=average_ranks_training_time,
             algorithms=["Random Forest", "Naive Bayes", "Support Vector Machines"])


Training Time:
Friedman Statistic: 20.0
Training Time Critical Value: 5.9915
There are significant differences between algorithms (20.0000 > 5.9915)

Nemenyi test
Critical difference: 1.0478214542564015
Significant difference between Random Forest and Naive Bayes (abs difference: 2.0)
No significant difference between Random Forest and Support Vector Machines (abs difference: 1.0)
No significant difference between Naive Bayes and Support Vector Machines (abs difference: 1.0)


  difference = abs(average_ranks[i] - average_ranks[j])


In [164]:
# Accuracy
accuracy_friedman_statistic, accuracy_critical_value = friedman_test(
                                                        average_ranks=average_ranks_accuracy, 
                                                        ranks=ranks_accuracy, n=10, k=3)
print("Accuracy:")
print(f"Friedman Statistic: {accuracy_friedman_statistic}")


print(f"Training Time Critical Value: {accuracy_critical_value:.4f}")

if accuracy_friedman_statistic > accuracy_critical_value:
    print(f"There are significant differences between algorithms ({accuracy_friedman_statistic:.4f} > {accuracy_critical_value:.4f})")
else:
    print(f"There are no significant differences between algorithms ({accuracy_friedman_statistic:.4f} < {accuracy_critical_value:.4f})")

nemenyi_test(q_alpha=2.343, k=3, n=10, 
             average_ranks=average_ranks_accuracy,
             algorithms=["Random Forest", "Naive Bayes", "Support Vector Machines"])





Accuracy:
Friedman Statistic: 20.0
Training Time Critical Value: 5.9915
There are significant differences between algorithms (20.0000 > 5.9915)
Critical difference: 1.0478214542564015
No significant difference between Random Forest and Naive Bayes (abs difference: 1.0)
Significant difference between Random Forest and Support Vector Machines (abs difference: 2.0)
No significant difference between Naive Bayes and Support Vector Machines (abs difference: 1.0)


  difference = abs(average_ranks[i] - average_ranks[j])


In [165]:
# F1 score
fscore_friedman_statistic, fscore_critical_value = friedman_test(
                                                        average_ranks=average_ranks_fscore, 
                                                        ranks=ranks_fscore, n=10, k=3)
print("F1 score:")
print(f"Friedman Statistic: {fscore_friedman_statistic}")

print(f"Training Time Critical Value: {fscore_critical_value:.4f}")

if fscore_friedman_statistic > accuracy_critical_value:
    print(f"There are significant differences between algorithms ({fscore_friedman_statistic:.4f} > {fscore_critical_value:.4f})")
else:
    print(f"There are no significant differences between algorithms ({fscore_friedman_statistic:.4f} < {fscore_critical_value:.4f})")

nemenyi_test(q_alpha=2.343, k=3, n=10, 
             average_ranks=average_ranks_fscore,
             algorithms=["Random Forest", "Naive Bayes", "Support Vector Machines"])



F1 score:
Friedman Statistic: 20.0
Training Time Critical Value: 5.9915
There are significant differences between algorithms (20.0000 > 5.9915)
Critical difference: 1.0478214542564015
No significant difference between Random Forest and Naive Bayes (abs difference: 1.0)
Significant difference between Random Forest and Support Vector Machines (abs difference: 2.0)
No significant difference between Naive Bayes and Support Vector Machines (abs difference: 1.0)


  difference = abs(average_ranks[i] - average_ranks[j])
