## This approach involved training and evaluating our four classifiers on 25 distinct feature sets, each corresponding to a specific handwriting task in the DARWIN data set.

### Here we do it for the Decision Tree classifier

In [5]:
'''
We extract the feature vectors from each of the 25 tasks.
'''
 
import pandas as pd
from ucimlrepo import fetch_ucirepo
 
# Fetch dataset 
darwin = fetch_ucirepo(id=732)
 
# Data (as pandas dataframes)
X = darwin.data.features
y = darwin.data.targets
 
X = X.drop(columns=['ID'])
 
# Number of attributes per task
num_attributes_per_task = 18
 
# Number of tasks
num_tasks = 25
 
# Create a dictionary to hold the DataFrames for each task
task_dfs = {}
 
# Create a dictionary to hold the labels for each task
task_labels = {}
 
# Iterate through the number of tasks
for i in range(num_tasks):
    # Column indices for the current task
    start_index = i * num_attributes_per_task
    end_index = start_index + num_attributes_per_task
    # Select columns for the current task
    task_columns = X.columns[start_index:end_index]
    # Create a DataFrame for the current task
    task_df = X[task_columns].copy()
    # Store the DataFrame in the dictionary with the key 'task_i'
    task_dfs[f'task_{i + 1}'] = task_df
    # Select labels for the current task
    task_labels[f'task_{i + 1}'] = y.copy()  # Labels are identical for all tasks, adjust if necessary

### Performing grid search to compute the best parameters

In [4]:
'''
grid search
'''
 
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import GridSearchCV, train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
import pandas as pd
 
# Define the parameter grid
param_grid = {
   'criterion': ['gini', 'entropy'],
   'max_depth': list(range(2, 11)),
   'min_samples_split': list(range(2, 6)),
   'min_samples_leaf': list(range(2, 21, 2)),
   'max_leaf_nodes': list(range(2, 21, 2))
}
 
# Dictionary to store the best parameters for each task
best_params_per_task = {}
 
# Iterate through tasks
for task, task_df in task_dfs.items():
   X_task = task_df
   y_task = task_labels[task]
 
   # Split the data
   X_train, X_test, y_train, y_test = train_test_split(X_task, y_task, test_size=0.2, random_state=42, stratify=y_task)
 
   # Initialize Decision Tree Classifier
   clf = DecisionTreeClassifier(random_state=42)
 
   # Grid Search
   grid_search = GridSearchCV(estimator=clf, param_grid=param_grid, cv=5, n_jobs=-1, verbose=1, scoring='recall')
   grid_search.fit(X_train, y_train)
 
   # Store the best parameters for this task
   best_params_per_task[task] = grid_search.best_params_
 
   print(f"Best Parameters for {task}: {grid_search.best_params_}")

Fitting 5 folds for each of 7200 candidates, totalling 36000 fits


  _data = np.array(data, dtype=dtype, copy=copy,


Best Parameters for task_1: {'criterion': 'gini', 'max_depth': 2, 'max_leaf_nodes': 2, 'min_samples_leaf': 2, 'min_samples_split': 2}
Fitting 5 folds for each of 7200 candidates, totalling 36000 fits


  _data = np.array(data, dtype=dtype, copy=copy,


Best Parameters for task_2: {'criterion': 'gini', 'max_depth': 2, 'max_leaf_nodes': 2, 'min_samples_leaf': 2, 'min_samples_split': 2}
Fitting 5 folds for each of 7200 candidates, totalling 36000 fits


  _data = np.array(data, dtype=dtype, copy=copy,


Best Parameters for task_3: {'criterion': 'gini', 'max_depth': 2, 'max_leaf_nodes': 2, 'min_samples_leaf': 2, 'min_samples_split': 2}
Fitting 5 folds for each of 7200 candidates, totalling 36000 fits


  _data = np.array(data, dtype=dtype, copy=copy,


Best Parameters for task_4: {'criterion': 'gini', 'max_depth': 2, 'max_leaf_nodes': 2, 'min_samples_leaf': 2, 'min_samples_split': 2}
Fitting 5 folds for each of 7200 candidates, totalling 36000 fits




Best Parameters for task_5: {'criterion': 'gini', 'max_depth': 2, 'max_leaf_nodes': 2, 'min_samples_leaf': 2, 'min_samples_split': 2}
Fitting 5 folds for each of 7200 candidates, totalling 36000 fits




Best Parameters for task_6: {'criterion': 'gini', 'max_depth': 2, 'max_leaf_nodes': 2, 'min_samples_leaf': 2, 'min_samples_split': 2}
Fitting 5 folds for each of 7200 candidates, totalling 36000 fits


  _data = np.array(data, dtype=dtype, copy=copy,


Best Parameters for task_7: {'criterion': 'gini', 'max_depth': 2, 'max_leaf_nodes': 2, 'min_samples_leaf': 2, 'min_samples_split': 2}
Fitting 5 folds for each of 7200 candidates, totalling 36000 fits


  _data = np.array(data, dtype=dtype, copy=copy,


Best Parameters for task_8: {'criterion': 'gini', 'max_depth': 2, 'max_leaf_nodes': 2, 'min_samples_leaf': 2, 'min_samples_split': 2}
Fitting 5 folds for each of 7200 candidates, totalling 36000 fits


  _data = np.array(data, dtype=dtype, copy=copy,


Best Parameters for task_9: {'criterion': 'gini', 'max_depth': 2, 'max_leaf_nodes': 2, 'min_samples_leaf': 2, 'min_samples_split': 2}
Fitting 5 folds for each of 7200 candidates, totalling 36000 fits




Best Parameters for task_10: {'criterion': 'gini', 'max_depth': 2, 'max_leaf_nodes': 2, 'min_samples_leaf': 2, 'min_samples_split': 2}
Fitting 5 folds for each of 7200 candidates, totalling 36000 fits


  _data = np.array(data, dtype=dtype, copy=copy,


Best Parameters for task_11: {'criterion': 'gini', 'max_depth': 2, 'max_leaf_nodes': 2, 'min_samples_leaf': 2, 'min_samples_split': 2}
Fitting 5 folds for each of 7200 candidates, totalling 36000 fits


  _data = np.array(data, dtype=dtype, copy=copy,


Best Parameters for task_12: {'criterion': 'gini', 'max_depth': 2, 'max_leaf_nodes': 2, 'min_samples_leaf': 2, 'min_samples_split': 2}
Fitting 5 folds for each of 7200 candidates, totalling 36000 fits


  _data = np.array(data, dtype=dtype, copy=copy,


Best Parameters for task_13: {'criterion': 'gini', 'max_depth': 2, 'max_leaf_nodes': 2, 'min_samples_leaf': 2, 'min_samples_split': 2}
Fitting 5 folds for each of 7200 candidates, totalling 36000 fits


  _data = np.array(data, dtype=dtype, copy=copy,


Best Parameters for task_14: {'criterion': 'gini', 'max_depth': 2, 'max_leaf_nodes': 2, 'min_samples_leaf': 2, 'min_samples_split': 2}
Fitting 5 folds for each of 7200 candidates, totalling 36000 fits


  _data = np.array(data, dtype=dtype, copy=copy,


Best Parameters for task_15: {'criterion': 'gini', 'max_depth': 2, 'max_leaf_nodes': 2, 'min_samples_leaf': 2, 'min_samples_split': 2}
Fitting 5 folds for each of 7200 candidates, totalling 36000 fits




Best Parameters for task_16: {'criterion': 'gini', 'max_depth': 2, 'max_leaf_nodes': 2, 'min_samples_leaf': 2, 'min_samples_split': 2}
Fitting 5 folds for each of 7200 candidates, totalling 36000 fits




Best Parameters for task_17: {'criterion': 'gini', 'max_depth': 2, 'max_leaf_nodes': 2, 'min_samples_leaf': 2, 'min_samples_split': 2}
Fitting 5 folds for each of 7200 candidates, totalling 36000 fits




Best Parameters for task_18: {'criterion': 'gini', 'max_depth': 2, 'max_leaf_nodes': 2, 'min_samples_leaf': 2, 'min_samples_split': 2}
Fitting 5 folds for each of 7200 candidates, totalling 36000 fits




Best Parameters for task_19: {'criterion': 'gini', 'max_depth': 2, 'max_leaf_nodes': 2, 'min_samples_leaf': 2, 'min_samples_split': 2}
Fitting 5 folds for each of 7200 candidates, totalling 36000 fits




Best Parameters for task_20: {'criterion': 'gini', 'max_depth': 2, 'max_leaf_nodes': 2, 'min_samples_leaf': 2, 'min_samples_split': 2}
Fitting 5 folds for each of 7200 candidates, totalling 36000 fits




Best Parameters for task_21: {'criterion': 'gini', 'max_depth': 2, 'max_leaf_nodes': 2, 'min_samples_leaf': 2, 'min_samples_split': 2}
Fitting 5 folds for each of 7200 candidates, totalling 36000 fits




Best Parameters for task_22: {'criterion': 'gini', 'max_depth': 2, 'max_leaf_nodes': 2, 'min_samples_leaf': 2, 'min_samples_split': 2}
Fitting 5 folds for each of 7200 candidates, totalling 36000 fits




Best Parameters for task_23: {'criterion': 'gini', 'max_depth': 2, 'max_leaf_nodes': 2, 'min_samples_leaf': 2, 'min_samples_split': 2}
Fitting 5 folds for each of 7200 candidates, totalling 36000 fits




Best Parameters for task_24: {'criterion': 'gini', 'max_depth': 2, 'max_leaf_nodes': 2, 'min_samples_leaf': 2, 'min_samples_split': 2}
Fitting 5 folds for each of 7200 candidates, totalling 36000 fits
Best Parameters for task_25: {'criterion': 'gini', 'max_depth': 2, 'max_leaf_nodes': 2, 'min_samples_leaf': 2, 'min_samples_split': 2}




## The results given by the grid-search show that the optimal parameters are identical for each of the 25 tasks:

**best_parameters: {'criterion': 'gini', 'max_depth': 2, 'max_leaf_nodes': 2, 'min_samples_leaf': 2, 'min_samples_split': 2}**

## Performance Evaluation of Decision Tree classifier, using the 20 runs method

In [6]:
'''
performance evaluation using 20 run method
'''
 
import numpy as np
from sklearn.model_selection import GridSearchCV, train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
 
# Number of runs
n_runs = 20
 
# Dictionary to store performance metrics for each task
performance_metrics = {}

# Using the best parameters computed during grid-search
best_params = {
   'criterion': 'gini', 
   'max_depth': 2,
   'max_leaf_nodes': 2, 
   'min_samples_leaf': 2, 
   'min_samples_split': 2
}
 
# Iterate through tasks
for task, task_df in task_dfs.items():
    X_task = task_df
    y_task = task_labels[task]
    accuracies = []
    precisions = []
    recalls = []
    f1_scores = []
    sensitivities = []
    specificities = []
 
    for run in range(n_runs):
        # Split the data
        X_train, X_test, y_train, y_test = train_test_split(X_task, y_task, test_size=0.2, random_state=None, stratify=y_task)
        # Create classifier with best parameters
        clf = DecisionTreeClassifier(**best_params)
        # Train the model
        clf.fit(X_train, y_train)
        # Predict
        y_pred = clf.predict(X_test)
        # Calculate metrics
        accuracy = accuracy_score(y_test, y_pred)
        precision = precision_score(y_test, y_pred, pos_label='P')
        recall = recall_score(y_test, y_pred, pos_label='P')
        f1 = f1_score(y_test, y_pred, pos_label='P')
        # Calculate confusion matrix components
        cm = confusion_matrix(y_test, y_pred, labels=['H', 'P'])
        tn, fp, fn, tp = cm.ravel()
        sensitivity = tp / (tp + fn) if (tp + fn) != 0 else 0
        specificity = tn / (tn + fp) if (tn + fp) != 0 else 0
        # Append metrics
        accuracies.append(accuracy)
        precisions.append(precision)
        recalls.append(recall)
        f1_scores.append(f1)
        sensitivities.append(sensitivity)
        specificities.append(specificity)
    # Calculate average metrics
    performance_metrics[task] = {
        'mean_accuracy': np.mean(accuracies),
        'mean_precision': np.mean(precisions),
        'mean_recall': np.mean(recalls),
        'mean_f1_score': np.mean(f1_scores),
        'mean_sensitivity': np.mean(sensitivities),
        'mean_specificity': np.mean(specificities)
    }
 
    print(f"Performance Metrics for {task}:")
    print(f"Mean Accuracy: {performance_metrics[task]['mean_accuracy']:.4f}")
    print(f"Mean Precision: {performance_metrics[task]['mean_precision']:.4f}")
    print(f"Mean Recall: {performance_metrics[task]['mean_recall']:.4f}")
    print(f"Mean F1 Score: {performance_metrics[task]['mean_f1_score']:.4f}")
    print(f"Mean Sensitivity: {performance_metrics[task]['mean_sensitivity']:.4f}")
    print(f"Mean Specificity: {performance_metrics[task]['mean_specificity']:.4f}")
    print("\n")

Performance Metrics for task_1:
Mean Accuracy: 0.5871
Mean Precision: 0.6840
Mean Recall: 0.4000
Mean F1 Score: 0.4853
Mean Sensitivity: 0.4000
Mean Specificity: 0.7853


Performance Metrics for task_2:
Mean Accuracy: 0.6671
Mean Precision: 0.7383
Mean Recall: 0.5889
Mean F1 Score: 0.6304
Mean Sensitivity: 0.5889
Mean Specificity: 0.7500


Performance Metrics for task_3:
Mean Accuracy: 0.7171
Mean Precision: 0.8564
Mean Recall: 0.5389
Mean F1 Score: 0.6566
Mean Sensitivity: 0.5389
Mean Specificity: 0.9059


Performance Metrics for task_4:
Mean Accuracy: 0.6443
Mean Precision: 0.7198
Mean Recall: 0.5306
Mean F1 Score: 0.5875
Mean Sensitivity: 0.5306
Mean Specificity: 0.7647


Performance Metrics for task_5:
Mean Accuracy: 0.6757
Mean Precision: 0.7169
Mean Recall: 0.6389
Mean F1 Score: 0.6628
Mean Sensitivity: 0.6389
Mean Specificity: 0.7147


Performance Metrics for task_6:
Mean Accuracy: 0.7343
Mean Precision: 0.7857
Mean Recall: 0.6667
Mean F1 Score: 0.7148
Mean Sensitivity: 0.6667
M

## Performance Evaluation of Decision Tree classifier, using the k-fold cross-validation method

In [8]:
'''
Performance evaluation with K-Fold cross-validation
'''

import numpy as np
from sklearn.model_selection import KFold
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix

# Number of folds
n_splits = 5  # You can adjust the number of folds as needed

# Dictionary to store performance metrics for each task
performance_metrics = {}

# Using the best parameters computed during grid-search
best_params = {
    'criterion': 'gini', 
    'max_depth': 2,
    'max_leaf_nodes': 2, 
    'min_samples_leaf': 2, 
    'min_samples_split': 2
}

# Iterate through tasks
for task, task_df in task_dfs.items():
    X_task = task_df
    y_task = task_labels[task]

    accuracies = []
    precisions = []
    recalls = []
    f1_scores = []
    sensitivities = []
    specificities = []

    # Initialize KFold
    kf = KFold(n_splits=n_splits, shuffle=True, random_state=None)

    for train_index, test_index in kf.split(X_task):
        X_train, X_test = X_task.iloc[train_index], X_task.iloc[test_index]
        y_train, y_test = y_task.iloc[train_index], y_task.iloc[test_index]

        # Create classifier with best parameters
        clf = DecisionTreeClassifier(**best_params)

        # Train the model
        clf.fit(X_train, y_train)

        # Predict
        y_pred = clf.predict(X_test)

        # Calculate metrics
        accuracy = accuracy_score(y_test, y_pred)
        precision = precision_score(y_test, y_pred, pos_label='P')
        recall = recall_score(y_test, y_pred, pos_label='P')
        f1 = f1_score(y_test, y_pred, pos_label='P')

        # Calculate confusion matrix components
        cm = confusion_matrix(y_test, y_pred, labels=['H', 'P'])
        tn, fp, fn, tp = cm.ravel()

        # Calculate sensitivity and specificity
        sensitivity = tp / (tp + fn) if (tp + fn) != 0 else 0
        specificity = tn / (tn + fp) if (tn + fp) != 0 else 0

        # Append metrics
        accuracies.append(accuracy)
        precisions.append(precision)
        recalls.append(recall)
        f1_scores.append(f1)
        sensitivities.append(sensitivity)
        specificities.append(specificity)

    # Calculate average metrics
    performance_metrics[task] = {
        'mean_accuracy': np.mean(accuracies),
        'mean_precision': np.mean(precisions),
        'mean_recall': np.mean(recalls),
        'mean_f1_score': np.mean(f1_scores),
        'mean_sensitivity': np.mean(sensitivities),
        'mean_specificity': np.mean(specificities)
    }

    print(f"Performance Metrics for {task}:")
    print(f"Mean Accuracy: {performance_metrics[task]['mean_accuracy']:.4f}")
    print(f"Mean Precision: {performance_metrics[task]['mean_precision']:.4f}")
    print(f"Mean Recall: {performance_metrics[task]['mean_recall']:.4f}")
    print(f"Mean F1 Score: {performance_metrics[task]['mean_f1_score']:.4f}")
    print(f"Mean Sensitivity: {performance_metrics[task]['mean_sensitivity']:.4f}")
    print(f"Mean Specificity: {performance_metrics[task]['mean_specificity']:.4f}")
    print("\n")


Performance Metrics for task_1:
Mean Accuracy: 0.5459
Mean Precision: 0.6383
Mean Recall: 0.3428
Mean F1 Score: 0.4127
Mean Sensitivity: 0.3428
Mean Specificity: 0.7560


Performance Metrics for task_2:
Mean Accuracy: 0.6780
Mean Precision: 0.7857
Mean Recall: 0.5498
Mean F1 Score: 0.6291
Mean Sensitivity: 0.5498
Mean Specificity: 0.8117


Performance Metrics for task_3:
Mean Accuracy: 0.7356
Mean Precision: 0.8642
Mean Recall: 0.5712
Mean F1 Score: 0.6870
Mean Sensitivity: 0.5712
Mean Specificity: 0.9035


Performance Metrics for task_4:
Mean Accuracy: 0.6499
Mean Precision: 0.6784
Mean Recall: 0.6380
Mean F1 Score: 0.6479
Mean Sensitivity: 0.6380
Mean Specificity: 0.6791


Performance Metrics for task_5:
Mean Accuracy: 0.6612
Mean Precision: 0.6697
Mean Recall: 0.6717
Mean F1 Score: 0.6659
Mean Sensitivity: 0.6717
Mean Specificity: 0.6557


Performance Metrics for task_6:
Mean Accuracy: 0.7358
Mean Precision: 0.8082
Mean Recall: 0.6617
Mean F1 Score: 0.7141
Mean Sensitivity: 0.6617
M