In [1]:
import numpy as np

In [6]:
conf_matrix = np.array([[15406.,92.,976.,70.,187.],
 [ 1018.,25.,629.,25.,48.],
 [ 2625.,120.,3748.,204.,306.],
 [  430.,23.,789.,54.,60.],
 [ 1354.,37.,990.,43.,78.]])

In [23]:
def calculate_metrics(confusion_matrices, class_mapping):
    n_classes = len(class_mapping)
    
    # Initialize arrays to store metrics
    precision = np.zeros((len(confusion_matrices), n_classes))
    recall = np.zeros((len(confusion_matrices), n_classes))
    f1_score = np.zeros((len(confusion_matrices), n_classes))
    
    for i, cm in enumerate(confusion_matrices):
        for class_name, class_index in class_mapping.items():
            tp = cm[class_index, class_index]
            fp = np.sum(cm[:, class_index]) - tp
            fn = np.sum(cm[class_index, :]) - tp
            
            # Calculate precision
            precision[i, class_index] = tp / (tp + fp) if (tp + fp) > 0 else 0
            
            # Calculate recall
            recall[i, class_index] = tp / (tp + fn) if (tp + fn) > 0 else 0
            
            # Calculate F1-score
            f1_score[i, class_index] = 2 * (precision[i, class_index] * recall[i, class_index]) / (precision[i, class_index] + recall[i, class_index]) if (precision[i, class_index] + recall[i, class_index]) > 0 else 0
    
    # Calculate mean and standard deviation for each metric
    mean_precision = np.mean(precision, axis=0) * 100
    std_precision = np.std(precision, axis=0) * 100
    mean_recall = np.mean(recall, axis=0) * 100
    std_recall = np.std(recall, axis=0) * 100
    mean_f1 = np.mean(f1_score, axis=0) * 100
    std_f1 = np.std(f1_score, axis=0) * 100
    
    # Print results
    print("Class\t\tPrecision\t\tRecall\t\t\tF1-Score")
    print("-" * 70)
    for class_name, class_index in class_mapping.items():
        print(f"{class_name}\t\t{mean_precision[class_index]:.2f}±{std_precision[class_index]:.2f}\t\t{mean_recall[class_index]:.2f}±{std_recall[class_index]:.2f}\t\t{mean_f1[class_index]:.2f}±{std_f1[class_index]:.2f}")

    
class_mapping = {
    "Awake": 0,
    "NREM1": 1,
    "NREM2": 2,
    "NREM3": 3,
    "REM": 4
}

class_mapping

{'Awake': 0, 'NREM1': 1, 'NREM2': 2, 'NREM3': 3, 'REM': 4}

In [24]:
conf_matrix

array([[15406.,    92.,   976.,    70.,   187.],
       [ 1018.,    25.,   629.,    25.,    48.],
       [ 2625.,   120.,  3748.,   204.,   306.],
       [  430.,    23.,   789.,    54.,    60.],
       [ 1354.,    37.,   990.,    43.,    78.]])

In [29]:
calculate_metrics([conf_matrix, conf_matrix + np.random.normal(loc=10, size=(5, 5))], class_mapping)

Class		Precision		Recall			F1-Score
----------------------------------------------------------------------
Awake		73.88±0.07		91.97±0.11		81.94±0.08
NREM1		9.38±0.97		1.72±0.28		2.90±0.45
NREM2		52.44±0.11		53.41±0.11		52.92±0.11
NREM3		13.91±0.27		4.23±0.25		6.48±0.32
REM		11.81±0.33		3.30±0.18		5.15±0.25


629.0