## Standard Algorithms Classifiers without dMeans

In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import KFold
import random
import math
import warnings
from sklearn.exceptions import ConvergenceWarning
from sklearn.metrics import accuracy_score, ConfusionMatrixDisplay, confusion_matrix, classification_report

# Ignore the FutureWarning
warnings.simplefilter(action='ignore', category=FutureWarning)
# Ignore the ConvergenceWarning and UserWarning
warnings.simplefilter(action='ignore', category=ConvergenceWarning)
warnings.simplefilter(action='ignore', category=UserWarning)

In [2]:
# Load the Dataset
url = "https://drive.google.com/file/d/16YkA1qJ4FHcBIvXZc17ifKSUzb_Xihth/view?usp=sharing"
url = "https://drive.google.com/uc?id=" + url.split('/')[-2]

# Dataset visualization as DataFrame
dataset = pd.read_csv(url, header = 0)
print("Dataframe visualization: ")
dataset

Dataframe visualization: 


Unnamed: 0,1,2,3,4,5,6,7,8,9,10,...,1062,1063,1064,1065,1066,1067,1068,1069,1070,1071
0,10.0,18.0,10.0,167.9,10.0,69.3,71.8,10.0,12.2,21.3,...,14.3,126.0,14.4,10.0,18.6,115.6,21.5,129.8,406.8,0
1,195.08,24.8,35.03,148.51,201.51,383.86,126.6,1445.4,116.1,491.04,...,99.97,58.25,236.01,141.16,58.38,173.17,19.15,85.01,330.86,0
2,200.6,144.75,10.0,354.78,30.22,65.74,22.32,160.32,22.55,504.75,...,50.63,229.28,24.16,83.77,77.02,10.0,72.72,10.0,17.87,0
3,133.38,178.0,43.22,158.48,99.18,32.91,20.36,41.74,10.0,205.24,...,44.99,82.91,10.0,69.53,76.13,10.0,10.0,10.0,68.88,0
4,50.82,14.08,20.2,75.3,177.54,257.13,122.24,863.44,178.97,116.32,...,210.4,47.75,60.41,209.18,39.18,10.0,40.0,25.71,70.41,0
5,1121.8,224.44,66.62,410.88,52.57,18.63,10.0,13.43,30.61,551.52,...,107.01,357.16,51.32,100.35,141.57,10.0,86.09,10.0,32.27,0
6,684.12,278.79,19.86,669.72,292.81,150.38,22.19,101.7,19.62,459.92,...,49.76,442.09,51.79,379.56,16.12,10.0,94.07,27.33,125.53,0
7,749.21,146.43,34.77,455.08,17.64,26.67,10.0,10.0,10.0,323.59,...,13.5,565.39,45.58,97.57,66.09,10.0,31.06,10.0,89.46,0
8,70.09,194.13,33.55,322.62,57.08,26.94,10.0,174.85,31.95,154.11,...,35.85,393.48,71.97,113.46,55.62,10.0,34.94,10.0,103.43,0
9,72.31,152.57,34.64,192.65,41.34,199.35,83.93,349.1,10.0,17.16,...,104.75,52.01,10.0,32.55,60.38,10.0,24.7,10.0,42.91,0


## KNN Classifier

In [3]:
from sklearn.neighbors import KNeighborsClassifier

def knn_classifier_with_kfcv(dataset, k_neighbors, num_folds, output_file):
    # Assuming the last column contains the class labels
    X = dataset.iloc[:, :-1]
    y = dataset.iloc[:, -1]

    # Initialize kNN classifier
    knn_classifier = KNeighborsClassifier(n_neighbors = k_neighbors)

    # Initialize k-Fold Cross-Validation
    kf = KFold(n_splits = num_folds, shuffle = True, random_state = 42)

    # Results dictionary to store metrics for each iteration
    results = {'Accuracy': [], 'Relevance_Indices': []}
    # Initialize an empty list to accumulate indices
    accumulated_indices = []
    
    # Initialize variables to keep track of the best accuracy and its corresponding indices
    best_accuracy = 0.0
    best_relevance_indices = None
    best_classification_report = None

    # Lists to store the predicted and actual labels
    predicted_labels = []
    actual_labels = []

   # Iterate through each split in LOOCV
    for (train_index, test_index) in kf.split(X):
        X_train, X_test = X.iloc[train_index], X.iloc[test_index]
        y_train, y_test = y.iloc[train_index], y.iloc[test_index]

        # Fit the algorithm classifier on the training data
        knn_classifier.fit(X_train, y_train)     # -- NaiveBayes

        # Make predictions on the test data
        y_pred = knn_classifier.predict(X_test)    # -- NB

        # Store the predicted and actual labels
        predicted_labels.extend(y_pred)
        actual_labels.extend(y_test)

    # Calculate accuracy and confusion matrix
    labels = ['Clase 0', 'Clase 1']
    #labels = ['Clase 0', 'Clase 1', 'Clase 2', 'Clase 3']
    print(classification_report(actual_labels, predicted_labels, target_names = labels))

    return results

# Run kNN classifier with kFCV and save the best classification report to a file
results = knn_classifier_with_kfcv(dataset, k_neighbors = 1, num_folds = 10, output_file = 'Normal_best_classification_report_1nn.txt')

              precision    recall  f1-score   support

     Clase 0       1.00      0.36      0.53        14
     Clase 1       0.61      1.00      0.76        14

    accuracy                           0.68        28
   macro avg       0.80      0.68      0.64        28
weighted avg       0.80      0.68      0.64        28



In [4]:
# Run kNN classifier with kFCV and save the best classification report to a file
results = knn_classifier_with_kfcv(dataset, k_neighbors = 3, num_folds = 10, output_file = 'Normal_best_classification_report_3nn.txt')

              precision    recall  f1-score   support

     Clase 0       1.00      0.21      0.35        14
     Clase 1       0.56      1.00      0.72        14

    accuracy                           0.61        28
   macro avg       0.78      0.61      0.54        28
weighted avg       0.78      0.61      0.54        28



In [5]:
# Run kNN classifier with kFCV and save the best classification report to a file
results = knn_classifier_with_kfcv(dataset, k_neighbors = 5, num_folds = 10, output_file = 'Normal_best_classification_report_5nn.txt')

              precision    recall  f1-score   support

     Clase 0       1.00      0.29      0.44        14
     Clase 1       0.58      1.00      0.74        14

    accuracy                           0.64        28
   macro avg       0.79      0.64      0.59        28
weighted avg       0.79      0.64      0.59        28



## SVM Classifier

In [6]:
from sklearn.svm import SVC

def svm_classifier_with_kfcv(dataset, num_folds, output_file):
    # Assuming the last column contains the class labels
    X = dataset.iloc[:, :-1]
    y = dataset.iloc[:, -1]

    # Initialize SVM classifier
    svm_classifier = SVC()

    # Initialize k-Fold Cross-Validation
    kf = KFold(n_splits = num_folds, shuffle = True, random_state = 42)

    # Results dictionary to store metrics for each iteration
    results = {'Accuracy': [], 'Relevance_Indices': []}
    
    # Initialize variables to keep track of the best accuracy and its corresponding indices
    best_accuracy = 0.0
    best_classification_report = None

    # Lists to store the predicted and actual labels
    predicted_labels = []
    actual_labels = []

    # Iterate through each fold
    for train_index, test_index in kf.split(X):
        X_train, X_test = X.iloc[train_index], X.iloc[test_index]
        y_train, y_test = y.iloc[train_index], y.iloc[test_index]
            
        # Fit SVM classifier
        svm_classifier.fit(X_train, y_train)
        # Predict on test set
        y_pred = svm_classifier.predict(X_test)
        # Store the predicted and actual labels
        predicted_labels.extend(y_pred)
        actual_labels.extend(y_test)

    # Calculate accuracy and confusion matrix
    labels = ['Clase 0', 'Clase 1']
    #labels = ['Clase 0', 'Clase 1', 'Clase 2', 'Clase 3']
    print(classification_report(actual_labels, predicted_labels, target_names = labels))

    return results

# Run kNN classifier with kFCV and save the best classification report to a file
results = svm_classifier_with_kfcv(dataset, num_folds = 10, output_file = 'Normal_best_classification_report_svm.txt')

              precision    recall  f1-score   support

     Clase 0       1.00      0.86      0.92        14
     Clase 1       0.88      1.00      0.93        14

    accuracy                           0.93        28
   macro avg       0.94      0.93      0.93        28
weighted avg       0.94      0.93      0.93        28



## Random Forest Classifier

In [7]:
from sklearn.ensemble import RandomForestClassifier

def random_forest_classifier_with_kfcv(dataset, num_folds, output_file):
    # Assuming the last column contains the class labels
    X = dataset.iloc[:, :-1]
    y = dataset.iloc[:, -1]

    # Initialize Random Forest classifier
    rf_classifier = RandomForestClassifier(n_estimators=10)

    # Initialize k-Fold Cross-Validation
    kf = KFold(n_splits = num_folds, shuffle = True, random_state = 42)

    # Results dictionary to store metrics for each iteration
    results = {'Accuracy': [], 'Relevance_Indices': []}
    
    # Initialize variables to keep track of the best accuracy and its corresponding indices
    best_accuracy = 0.0
    best_classification_report = None

    # Lists to store the predicted and actual labels
    predicted_labels = []
    actual_labels = []

    # Iterate through each fold
    for train_index, test_index in kf.split(X):
        X_train, X_test = X.iloc[train_index], X.iloc[test_index]
        y_train, y_test = y.iloc[train_index], y.iloc[test_index]
            
        # Fit SVM classifier
        rf_classifier.fit(X_train, y_train)
        # Predict on test set
        y_pred = rf_classifier.predict(X_test)
        # Store the predicted and actual labels
        predicted_labels.extend(y_pred)
        actual_labels.extend(y_test)

    # Calculate accuracy and confusion matrix
    labels = ['Clase 0', 'Clase 1']
    #labels = ['Clase 0', 'Clase 1', 'Clase 2', 'Clase 3']
    print(classification_report(actual_labels, predicted_labels, target_names = labels))

    return results

# Run SVM classifier with kFCV
results = random_forest_classifier_with_kfcv(dataset, num_folds = 10, output_file = 'Normal_best_classification_report_rf.txt')

              precision    recall  f1-score   support

     Clase 0       0.50      0.57      0.53        14
     Clase 1       0.50      0.43      0.46        14

    accuracy                           0.50        28
   macro avg       0.50      0.50      0.50        28
weighted avg       0.50      0.50      0.50        28



## Adaboost Classifier

In [8]:
from sklearn.ensemble import AdaBoostClassifier

def adaboost_classifier_with_kfcv(dataset, num_folds, output_file):
    # Assuming the last column contains the class labels
    X = dataset.iloc[:, :-1]
    y = dataset.iloc[:, -1]

    # Initialize AdaBoost classifier
    adaboost_classifier = AdaBoostClassifier(n_estimators = 50, random_state = 42)

    # Initialize k-Fold Cross-Validation
    kf = KFold(n_splits = num_folds, shuffle = True, random_state = 42)

    # Results dictionary to store metrics for each iteration
    results = {'Accuracy': [], 'Relevance_Indices': []}
    
    # Initialize variables to keep track of the best accuracy and its corresponding indices
    best_accuracy = 0.0
    best_classification_report = None

    # Lists to store the predicted and actual labels
    predicted_labels = []
    actual_labels = []

    # Iterate through each fold
    for train_index, test_index in kf.split(X):
        X_train, X_test = X.iloc[train_index], X.iloc[test_index]
        y_train, y_test = y.iloc[train_index], y.iloc[test_index]
            
        # Fit SVM classifier
        adaboost_classifier.fit(X_train, y_train)
        # Predict on test set
        y_pred = adaboost_classifier.predict(X_test)
        # Store the predicted and actual labels
        predicted_labels.extend(y_pred)
        actual_labels.extend(y_test)

    # Calculate accuracy and confusion matrix
    labels = ['Clase 0', 'Clase 1']
    #labels = ['Clase 0', 'Clase 1', 'Clase 2', 'Clase 3']
    print(classification_report(actual_labels, predicted_labels, target_names = labels))

    return results

# Run MLP classifier with kFCV
results = adaboost_classifier_with_kfcv(dataset, num_folds = 10, output_file = 'Normal_best_classification_report_adaboost.txt')

              precision    recall  f1-score   support

     Clase 0       0.71      0.71      0.71        14
     Clase 1       0.71      0.71      0.71        14

    accuracy                           0.71        28
   macro avg       0.71      0.71      0.71        28
weighted avg       0.71      0.71      0.71        28



## MLP Classifier

In [9]:
from sklearn.neural_network import MLPClassifier

def mlp_classifier_with_kfcv(dataset, num_folds, output_file):
    # Assuming the last column contains the class labels
    X = dataset.iloc[:, :-1]
    y = dataset.iloc[:, -1]

    # Initialize MLP classifier with 10 hidden layers
    mlp_classifier = MLPClassifier(hidden_layer_sizes = (20,), max_iter = 1000, early_stopping = True, random_state = 42)

    # Initialize k-Fold Cross-Validation
    kf = KFold(n_splits = num_folds, shuffle = True, random_state = 42)

    # Results dictionary to store metrics for each iteration
    results = {'Accuracy': [], 'Relevance_Indices': []}
    
    # Initialize variables to keep track of the best accuracy and its corresponding indices
    best_accuracy = 0.0
    best_classification_report = None

    # Lists to store the predicted and actual labels
    predicted_labels = []
    actual_labels = []

    # Iterate through each fold
    for train_index, test_index in kf.split(X):
        X_train, X_test = X.iloc[train_index], X.iloc[test_index]
        y_train, y_test = y.iloc[train_index], y.iloc[test_index]
            
        # Fit SVM classifier
        mlp_classifier.fit(X_train, y_train)
        # Predict on test set
        y_pred = mlp_classifier.predict(X_test)
        # Store the predicted and actual labels
        predicted_labels.extend(y_pred)
        actual_labels.extend(y_test)

    # Calculate accuracy and confusion matrix
    labels = ['Clase 0', 'Clase 1']
    #labels = ['Clase 0', 'Clase 1', 'Clase 2', 'Clase 3']
    print(classification_report(actual_labels, predicted_labels, target_names = labels))

    return results

# Run MLP classifier with kFCV
results = mlp_classifier_with_kfcv(dataset, num_folds = 10, output_file = 'Normal_best_classification_report_mlp.txt')

              precision    recall  f1-score   support

     Clase 0       0.48      0.71      0.57        14
     Clase 1       0.43      0.21      0.29        14

    accuracy                           0.46        28
   macro avg       0.45      0.46      0.43        28
weighted avg       0.45      0.46      0.43        28



## Naive Bayes Classifier

In [10]:
from sklearn.naive_bayes import GaussianNB

def naive_bayes_classifier_with_kfcv(dataset, num_folds, output_file):
    # Assuming the last column contains the class labels
    X = dataset.iloc[:, :-1]
    y = dataset.iloc[:, -1]

    # Initialize Gaussian Naive Bayes classifier
    nb_classifier = GaussianNB()

    # Initialize k-Fold Cross-Validation
    kf = KFold(n_splits = num_folds, shuffle = True, random_state = 42)

    # Results dictionary to store metrics for each iteration
    results = {'Accuracy': [], 'Relevance_Indices': []}
    
    # Initialize variables to keep track of the best accuracy and its corresponding indices
    best_accuracy = 0.0
    best_classification_report = None

    # Lists to store the predicted and actual labels
    predicted_labels = []
    actual_labels = []

    # Iterate through each fold
    for train_index, test_index in kf.split(X):
        X_train, X_test = X.iloc[train_index], X.iloc[test_index]
        y_train, y_test = y.iloc[train_index], y.iloc[test_index]
            
        # Fit SVM classifier
        nb_classifier.fit(X_train, y_train)
        # Predict on test set
        y_pred = nb_classifier.predict(X_test)
        # Store the predicted and actual labels
        predicted_labels.extend(y_pred)
        actual_labels.extend(y_test)

    # Calculate accuracy and confusion matrix
    labels = ['Clase 0', 'Clase 1']
    #labels = ['Clase 0', 'Clase 1', 'Clase 2', 'Clase 3']
    print(classification_report(actual_labels, predicted_labels, target_names = labels))

    return results

# Run NB classifier with kFCV
results = naive_bayes_classifier_with_kfcv(dataset, num_folds = 10, output_file = 'Normal_best_classification_report_nb.txt')

              precision    recall  f1-score   support

     Clase 0       0.59      0.93      0.72        14
     Clase 1       0.83      0.36      0.50        14

    accuracy                           0.64        28
   macro avg       0.71      0.64      0.61        28
weighted avg       0.71      0.64      0.61        28

