## This approach involved training and evaluating our four classifiers on 25 distinct feature sets, each corresponding to a specific handwriting task in the DARWIN data set.

### Here we do it for the Random Forest classifier

In [7]:
'''
We extract the feature vectors from each of the 25 tasks.
'''
 
import pandas as pd
from ucimlrepo import fetch_ucirepo
 
# Fetch dataset 
darwin = fetch_ucirepo(id=732)
 
# Data (as pandas dataframes)
X = darwin.data.features
y = darwin.data.targets
 
X = X.drop(columns=['ID'])
 
# Number of attributes per task
num_attributes_per_task = 18
 
# Number of tasks
num_tasks = 25
 
# Create a dictionary to hold the DataFrames for each task
task_dfs = {}
 
# Create a dictionary to hold the labels for each task
task_labels = {}
 
# Iterate through the number of tasks
for i in range(num_tasks):
    # Column indices for the current task
    start_index = i * num_attributes_per_task
    end_index = start_index + num_attributes_per_task
    # Select columns for the current task
    task_columns = X.columns[start_index:end_index]
    # Create a DataFrame for the current task
    task_df = X[task_columns].copy()
    # Store the DataFrame in the dictionary with the key 'task_i'
    task_dfs[f'task_{i + 1}'] = task_df
    # Select labels for the current task
    task_labels[f'task_{i + 1}'] = y.copy()  # Labels are identical for all tasks, adjust if necessary

### Performing gird search to find the best parameters

There's a "Keyboard Interrupt" down below, as the script was taking too long to terminate.
We executed it on a different machine and then took the best parameters and we inserted it inside of the following sections of this notebook.
(Performance Evaluation using the 20 run method, and then using k-fold cross-validation)

In [4]:
'''
Grid search random forest
'''
 
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import GridSearchCV, train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
 
# Define the classifier
clf_rf = RandomForestClassifier(random_state=42)
 
# Define the parameter grid for RandomForestClassifier
param_grid_rf = {
   'max_depth': list(range(2, 11)),
   'n_estimators': list(range(50, 301, 50)),
   'bootstrap': [True, False],
   'min_samples_split': list(range(2, 6)),
   'min_samples_leaf': list(range(1, 21)),
   'criterion': ['gini', 'entropy']
}
 
# Configure Grid Search with 5-fold Cross Validation
grid_search_rf = GridSearchCV(estimator=clf_rf, param_grid=param_grid_rf, 
                             cv=5, n_jobs=-1, verbose=1, scoring='f1')
 
# Dictionary to store the best parameters for each task
best_params_per_task = {}
 
# Iterate through tasks
for task, task_df in task_dfs.items():
   X_task = task_df
   y_task = task_labels[task]
  
   # Split the data
   X_train, X_test, y_train, y_test = train_test_split(X_task, y_task, test_size=0.2, random_state=42, stratify=y_task)
  
   # Fit Grid Search
   grid_search_rf.fit(X_train, y_train)
  
   # Store the best parameters for this task
   best_params_per_task[task] = grid_search_rf.best_params_
 
   print(f"Best Parameters for {task}: {grid_search_rf.best_params_}")

Fitting 5 folds for each of 17280 candidates, totalling 86400 fits


KeyboardInterrupt: 

## The results given by the grid-search show that the optimal parameters are identical for each of the 25 tasks:

**best_parameters: {'bootstrap': True,
          'criterion': 'gini',
          'max_depth': 2,
          'min_samples_leaf': 1,
          'min_samples_split': 2,
          'n_estimators': 50
}**

## Performance Evaluation of Random Forest classifier, using the 20 runs method

In [8]:
''' Random Forest performance evaluation, using the 20 run method '''

import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
from sklearn.model_selection import train_test_split
 
# Number of runs
n_runs = 20
 
params = {'bootstrap': True,
          'criterion': 'gini',
          'max_depth': 2,
          'min_samples_leaf': 1,
          'min_samples_split': 2,
          'n_estimators': 50
}
 
# Dictionary to store performance metrics for each task
performance_metrics = {}
 
# Iterate through tasks
for task, task_df in task_dfs.items():
   X_task = task_df
   y_task = task_labels[task]
 
   accuracies = []
   precisions = []
   recalls = []
   f1_scores = []
   sensitivities = []
   specificities = []
 
   for run in range(n_runs):
       # Shuffle and split the data
       X_train, X_test, y_train, y_test = train_test_split(
           X_task, y_task, test_size=0.2, random_state=None, stratify=y_task
       )
 
       # Ensure y_train and y_test are 1D
       y_train = y_train.values.ravel()
       y_test = y_test.values.ravel()
 
       # Create Random Forest classifier with the best parameters
       clf_rf = RandomForestClassifier(**params, random_state=42)
 
       # Train the model
       clf_rf.fit(X_train, y_train)
 
       # Predict on test data
       y_pred = clf_rf.predict(X_test)
 
       # Calculate metrics
       accuracy = accuracy_score(y_test, y_pred)
       precision = precision_score(y_test, y_pred, average='binary', pos_label='P')  # Explicitly define pos_label
       recall = recall_score(y_test, y_pred, average='binary', pos_label='P')        # Explicitly define pos_label
       f1 = f1_score(y_test, y_pred, average='binary', pos_label='P')                # Explicitly define pos_label
 
       accuracies.append(accuracy)
       precisions.append(precision)
       recalls.append(recall)
       f1_scores.append(f1)
 
       # Calculate confusion matrix components
       cm = confusion_matrix(y_test, y_pred, labels=['H', 'P'])
       # Correctly identify the elements of the confusion matrix
       if cm.shape == (2, 2):  # Ensure it's a 2x2 matrix for binary classification
           tn, fp, fn, tp = cm.ravel()
           sensitivity = tp / (tp + fn) if (tp + fn) > 0 else 0
           specificity = tn / (tn + fp) if (tn + fp) > 0 else 0
       else:
           # If it's not a 2x2 matrix, something went wrong
           print(f"Unexpected confusion matrix shape: {cm.shape} for task {task}")
           sensitivity = specificity = 0
 
       sensitivities.append(sensitivity)
       specificities.append(specificity)
 
   # Calculate average metrics
   performance_metrics[task] = {
       'mean_accuracy': np.mean(accuracies),
       'mean_precision': np.mean(precisions),
       'mean_recall': np.mean(recalls),
       'mean_f1_score': np.mean(f1_scores),
       'mean_sensitivity': np.mean(sensitivities),
       'mean_specificity': np.mean(specificities),
   }
 
   print(f"Performance Metrics for {task}:")
   print(f"Mean Accuracy: {performance_metrics[task]['mean_accuracy']:.4f}")
   print(f"Mean Precision: {performance_metrics[task]['mean_precision']:.4f}")
   print(f"Mean Recall: {performance_metrics[task]['mean_recall']:.4f}")
   print(f"Mean F1 Score: {performance_metrics[task]['mean_f1_score']:.4f}")
   print(f"Mean Sensitivity: {performance_metrics[task]['mean_sensitivity']:.4f}")
   print(f"Mean Specificity: {performance_metrics[task]['mean_specificity']:.4f}")
   print("\n")

Performance Metrics for task_1:
Mean Accuracy: 0.6171
Mean Precision: 0.6536
Mean Recall: 0.5500
Mean F1 Score: 0.5939
Mean Sensitivity: 0.5500
Mean Specificity: 0.6882


Performance Metrics for task_2:
Mean Accuracy: 0.7143
Mean Precision: 0.7637
Mean Recall: 0.6778
Mean F1 Score: 0.7080
Mean Sensitivity: 0.6778
Mean Specificity: 0.7529


Performance Metrics for task_3:
Mean Accuracy: 0.7143
Mean Precision: 0.7652
Mean Recall: 0.6500
Mean F1 Score: 0.6991
Mean Sensitivity: 0.6500
Mean Specificity: 0.7824


Performance Metrics for task_4:
Mean Accuracy: 0.6829
Mean Precision: 0.7289
Mean Recall: 0.6139
Mean F1 Score: 0.6635
Mean Sensitivity: 0.6139
Mean Specificity: 0.7559


Performance Metrics for task_5:
Mean Accuracy: 0.7057
Mean Precision: 0.7766
Mean Recall: 0.6222
Mean F1 Score: 0.6816
Mean Sensitivity: 0.6222
Mean Specificity: 0.7941


Performance Metrics for task_6:
Mean Accuracy: 0.7086
Mean Precision: 0.7595
Mean Recall: 0.6583
Mean F1 Score: 0.6975
Mean Sensitivity: 0.6583
M

## Performance Evaluation of Random Forest classifier, using the k-fold cross-validation method

In [9]:
''' Random Forest performance evaluation using K-Fold cross-validation '''

import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
from sklearn.model_selection import KFold

# Number of folds
n_splits = 5  # You can adjust the number of folds as needed

params = {
    'bootstrap': True,
    'criterion': 'gini',
    'max_depth': 2,
    'min_samples_leaf': 1,
    'min_samples_split': 2,
    'n_estimators': 50
}

# Dictionary to store performance metrics for each task
performance_metrics = {}

# Iterate through tasks
for task, task_df in task_dfs.items():
    X_task = task_df
    y_task = task_labels[task]

    accuracies = []
    precisions = []
    recalls = []
    f1_scores = []
    sensitivities = []
    specificities = []

    # Initialize KFold
    kf = KFold(n_splits=n_splits, shuffle=True, random_state=None)

    for train_index, test_index in kf.split(X_task):
        X_train, X_test = X_task.iloc[train_index], X_task.iloc[test_index]
        y_train, y_test = y_task.iloc[train_index], y_task.iloc[test_index]

        # Ensure y_train and y_test are 1D
        y_train = y_train.values.ravel()
        y_test = y_test.values.ravel()

        # Create Random Forest classifier with the best parameters
        clf_rf = RandomForestClassifier(**params, random_state=42)

        # Train the model
        clf_rf.fit(X_train, y_train)

        # Predict on test data
        y_pred = clf_rf.predict(X_test)

        # Calculate metrics
        accuracy = accuracy_score(y_test, y_pred)
        precision = precision_score(y_test, y_pred, average='binary', pos_label='P')  # Explicitly define pos_label
        recall = recall_score(y_test, y_pred, average='binary', pos_label='P')        # Explicitly define pos_label
        f1 = f1_score(y_test, y_pred, average='binary', pos_label='P')                # Explicitly define pos_label

        accuracies.append(accuracy)
        precisions.append(precision)
        recalls.append(recall)
        f1_scores.append(f1)

        # Calculate confusion matrix components
        cm = confusion_matrix(y_test, y_pred, labels=['H', 'P'])
        # Correctly identify the elements of the confusion matrix
        if cm.shape == (2, 2):  # Ensure it's a 2x2 matrix for binary classification
            tn, fp, fn, tp = cm.ravel()
            sensitivity = tp / (tp + fn) if (tp + fn) > 0 else 0
            specificity = tn / (tn + fp) if (tn + fp) > 0 else 0
        else:
            # If it's not a 2x2 matrix, something went wrong
            print(f"Unexpected confusion matrix shape: {cm.shape} for task {task}")
            sensitivity = specificity = 0

        sensitivities.append(sensitivity)
        specificities.append(specificity)

    # Calculate average metrics
    performance_metrics[task] = {
        'mean_accuracy': np.mean(accuracies),
        'mean_precision': np.mean(precisions),
        'mean_recall': np.mean(recalls),
        'mean_f1_score': np.mean(f1_scores),
        'mean_sensitivity': np.mean(sensitivities),
        'mean_specificity': np.mean(specificities),
    }

    print(f"Performance Metrics for {task}:")
    print(f"Mean Accuracy: {performance_metrics[task]['mean_accuracy']:.4f}")
    print(f"Mean Precision: {performance_metrics[task]['mean_precision']:.4f}")
    print(f"Mean Recall: {performance_metrics[task]['mean_recall']:.4f}")
    print(f"Mean F1 Score: {performance_metrics[task]['mean_f1_score']:.4f}")
    print(f"Mean Sensitivity: {performance_metrics[task]['mean_sensitivity']:.4f}")
    print(f"Mean Specificity: {performance_metrics[task]['mean_specificity']:.4f}")
    print("\n")


Performance Metrics for task_1:
Mean Accuracy: 0.6607
Mean Precision: 0.7158
Mean Recall: 0.5657
Mean F1 Score: 0.6252
Mean Sensitivity: 0.5657
Mean Specificity: 0.7701


Performance Metrics for task_2:
Mean Accuracy: 0.6961
Mean Precision: 0.7524
Mean Recall: 0.6214
Mean F1 Score: 0.6647
Mean Sensitivity: 0.6214
Mean Specificity: 0.7938


Performance Metrics for task_3:
Mean Accuracy: 0.6610
Mean Precision: 0.7292
Mean Recall: 0.5833
Mean F1 Score: 0.6348
Mean Sensitivity: 0.5833
Mean Specificity: 0.7663


Performance Metrics for task_4:
Mean Accuracy: 0.7183
Mean Precision: 0.7571
Mean Recall: 0.6596
Mean F1 Score: 0.6983
Mean Sensitivity: 0.6596
Mean Specificity: 0.7738


Performance Metrics for task_5:
Mean Accuracy: 0.7188
Mean Precision: 0.7607
Mean Recall: 0.6692
Mean F1 Score: 0.7063
Mean Sensitivity: 0.6692
Mean Specificity: 0.7788


Performance Metrics for task_6:
Mean Accuracy: 0.7183
Mean Precision: 0.7517
Mean Recall: 0.6694
Mean F1 Score: 0.7012
Mean Sensitivity: 0.6694
M