In [2]:
from sklearn.model_selection import StratifiedKFold
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, f1_score
import pandas as pd
import time
import numpy as np
import math

# Load the data
data = pd.read_csv(
    'https://archive.ics.uci.edu/ml/machine-learning-databases/spambase/spambase.data', header=None)

# Split the data into features and target
X = data.iloc[:, :-1]
y = data.iloc[:, -1]

# Standardize the features
scaler = StandardScaler()
X = scaler.fit_transform(X)

# Initialize the three algorithms
algorithms = [RandomForestClassifier(), LogisticRegression(), SVC()]

# Initialize the evaluation measures
training_times = []
accuracies = []
f_measures = []

# Initialize the cross-validation object
skf = StratifiedKFold(n_splits=10, shuffle=True, random_state=42)

# Iterate through the folds of the cross-validation
for train_index, test_index in skf.split(X, y):
    # print(train_index,test_index)
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]

    # Iterate through the algorithms
    for algorithm in algorithms:
        # Fit the algorithm on the training data
        start = time.time()
        algorithm.fit(X_train, y_train)
        end = time.time()

        # Record the training time
        training_times.append(end - start)

        # Make predictions on the test data
        y_pred = algorithm.predict(X_test)

        # Record the accuracy and F-measure
        accuracies.append(accuracy_score(y_test, y_pred))
        f_measures.append(f1_score(y_test, y_pred))


fold = ['Fold 1', 'Fold 2', 'Fold 3', 'Fold 4', 'Fold 5',
        'Fold 6', 'Fold 7', 'Fold 8', 'Fold 9', 'Fold 10']
alg = ['Random Forest', 'Logistic Regression', 'SVC']

# Reshape the training times
training_times_array = np.array(training_times)
training_times_table = pd.DataFrame(
    training_times_array.reshape(10, 3), index=fold, columns=alg)

# Reshape the accuracies
accuracies_array = np.array(accuracies)
accuracies_table = pd.DataFrame(
    accuracies_array.reshape(10, 3), index=fold, columns=alg)

# Reshape the F-measures
f_measures_array = np.array(f_measures)
f_measures_table = pd.DataFrame(
    f_measures_array.reshape(10, 3), index=fold, columns=alg)


# Initialize a list of table names
table_names = ['Training Times', 'Accuracies', 'F-Measures']

# Initialize a list of tables
tables = [training_times_table, accuracies_table, f_measures_table]

# Iterate through the tables
for table_name, table in zip(table_names, tables):
    # Print the table name
    print(table_name + ":")
    
    # Compute the average and standard deviation
    table.loc['Average'] = table.mean(axis=0)
    table.loc['Standard Deviation'] = table.std(axis=0)
    
    # Print the table
    print(table)
    print()

training_times_table=training_times_table.drop(['Average','Standard Deviation'])
accuracies_table=accuracies_table.drop(['Average','Standard Deviation'])
f_measures_table=f_measures_table.drop(['Average','Standard Deviation'])


# Function for friedman test
def friedman_test(table,name, strat):
    # Create a copy of the table
    print(f"-----------------------------{name}-----------------------------")
    rankings_table = table.copy()
    k=rankings_table.shape[1]
    n=rankings_table.shape[0]
    # Iterate through the rows
    for i in range(rankings_table.shape[0]):
        # Get the values for the current row
        row = rankings_table.iloc[i, :]
        
        # Sort the values in ascending order
        sorted_row = row.sort_values(ascending=strat)
        
        # Create a ranking for each value
        ranking = {value: rank for rank, value in enumerate(sorted_row, start=1)}
        
        #Update the values in the table with the rankings
        for j in range(rankings_table.shape[1]):
            value = rankings_table.iloc[i, j]
            rankings_table.iloc[i, j] = f'{value:.6f} ({ranking[value]})'

    average_rankings = rankings_table.apply(lambda row: row.str.extract(r'\((\d+)\)').astype(int).mean(axis=0))
    total_avg= average_rankings.mean(axis=1).iloc[0]
    squared_differences = (average_rankings - total_avg)**2
    sum_squared_differences = (squared_differences.sum(axis=1).iloc[0])*n
    squ_sum=0
    for i in range(1,k+1):
      squ_sum= squ_sum+ ((i-total_avg)**2)*n 
    form_2= squ_sum/(n*(k-1))
    #friedman statistic calculation
    fried_stat=sum_squared_differences/form_2
    rankings_table = rankings_table.append(average_rankings, ignore_index=True)
    
    print(rankings_table)
    print(f"friedman statistic : {fried_stat}")
    critical_value=7.8 #The critical value for k = 3 and n = 10 at the alpha = 0.05 level is 7.8 as per textbook
    if critical_value<fried_stat:
      print(f'The critical value for k = 3 and n = 10 at the alpha = 0.05 level is 7.8 i.e {fried_stat} > 7.8 ' )
      print("The null hypothesis is rejected that is all Algorithms doesnot perform equally")
    else:
      print(f'The critical value for k = 3 and n = 10 at the alpha = 0.05 level is 7.8 i.e {fried_stat} < 7.8 ' )
      print("We Failed to reject null hypothesis that all Algorithms perform equally")

    #Nemenyi test
    Q=2.343 #textbook value(for alpha = 0.05 and k = 3 ,q alpha=2.343 )
    alpha = 0.05
    #Critical Difference = Q * sqrt((k*(k+1)) / (6*n))
    CD=Q*math.sqrt(k*(k+1)/(6*n))
    print("The critical difference is :",CD)
    # print(average_rankings)
    if abs(average_rankings.at[0,'Random Forest']-average_rankings.at[0,'Logistic Regression'])>CD:
      print("Random Forest and Logistic Regression do not perform equally ")

      
    if abs(average_rankings.at[0,'Random Forest']-average_rankings.at[0,'SVC'])>CD:
      print("Support vector Machine and Random Forest do not perform equally ")


    if abs(average_rankings.at[0,'Logistic Regression']-average_rankings.at[0,'SVC'])>CD:
      print("Logistic Regression and Support vector Machine do not perform equally ")

#friedman test and nemenyi test    
#true if ranking in ascending order else false 
training_times_rankings = friedman_test(training_times_table,'training times',True)
accuracies_rankings = friedman_test(accuracies_table,'Accuracies',False)
f_measures_rankings=friedman_test(f_measures_table,'F-measure',False)

Training Times:
                    Random Forest  Logistic Regression       SVC
Fold 1                   2.273245             0.121948  1.412198
Fold 2                   2.429039             0.180514  1.341377
Fold 3                   2.670274             0.195757  1.416729
Fold 4                   2.305401             0.123443  1.249013
Fold 5                   2.338433             0.135834  1.249249
Fold 6                   2.353570             0.217222  1.377870
Fold 7                   2.442711             0.154781  1.424545
Fold 8                   2.453760             0.186297  1.312816
Fold 9                   2.364666             0.139255  1.349983
Fold 10                  2.647831             0.174705  1.379654
Average                  2.427893             0.162976  1.351343
Standard Deviation       0.128241             0.031014  0.061092

Accuracies:
                    Random Forest  Logistic Regression       SVC
Fold 1                   0.950108             0.913232  0.919

  rankings_table = rankings_table.append(average_rankings, ignore_index=True)
  rankings_table = rankings_table.append(average_rankings, ignore_index=True)


   Random Forest Logistic Regression           SVC
0   2.273245 (3)        0.121948 (1)  1.412198 (2)
1   2.429039 (3)        0.180514 (1)  1.341377 (2)
2   2.670274 (3)        0.195757 (1)  1.416729 (2)
3   2.305401 (3)        0.123443 (1)  1.249013 (2)
4   2.338433 (3)        0.135834 (1)  1.249249 (2)
5   2.353570 (3)        0.217222 (1)  1.377870 (2)
6   2.442711 (3)        0.154781 (1)  1.424545 (2)
7   2.453760 (3)        0.186297 (1)  1.312816 (2)
8   2.364666 (3)        0.139255 (1)  1.349983 (2)
9   2.647831 (3)        0.174705 (1)  1.379654 (2)
10           3.0                 1.0           2.0
friedman statistic : 20.0
The critical value for k = 3 and n = 10 at the alpha = 0.05 level is 7.8 i.e 20.0 > 7.8 
The null hypothesis is rejected that is all Algorithms doesnot perform equally
The critical difference is : 1.0478214542564015
Random Forest and Logistic Regression do not perform equally 
-----------------------------Accuracies-----------------------------
   Random Fores

  rankings_table = rankings_table.append(average_rankings, ignore_index=True)
