In [1]:
import numpy as np
import pandas as pd

In [2]:
np.random.seed(51)

number_of_samples = 10000

# Randomly generate classes 1, 2, 3, 4, 5, and 6 at the rates 60%, 20%, 10%, 5%, 2.5%, and 2.5% respectively

id = range(number_of_samples)

classes = np.random.choice(
    [1, 2, 3, 4, 5, 6], size=number_of_samples, p=[0.6, 0.2, 0.1, 0.05, 0.025, 0.025]
)

class_accuracies = {
    1: 0.90, # Accuracy for Class 1
    2: 0.65, # Accuracy for Class 2
    3: 0.50, # Accuracy for Class 3
    4: 0.40, # Accuracy for Class 4
    5: 0.20, # Accuracy for Class 5
    6: 0.15  # Accuracy for Class 6
}

def predict_with_class_weights(actual, accuracies):
    predictions = []
    for class_label in actual:
        class_accuracy = accuracies[class_label]
        if np.random.rand() < class_accuracy:
            predictions.append(class_label)
        else:
            other_classes = [c for c in range(1, 7) if c != class_label]
            other_weights = [accuracies[c] for c in other_classes]
            other_weights = other_weights / np.sum(other_weights)
            predictions.append(np.random.choice(other_classes, p=other_weights))
    return np.array(predictions)

predictions = predict_with_class_weights(classes, class_accuracies)

classes = list(classes)
predictions = list(predictions)

In [3]:
# Create dataframe with id, classes and predicted_classes

df = pd.DataFrame({'id': id, 'classes': classes, 'predicted_classes': predictions})

In [4]:
df

Unnamed: 0,id,classes,predicted_classes
0,0,2,2
1,1,1,1
2,2,1,1
3,3,2,3
4,4,1,1
...,...,...,...
9995,9995,3,3
9996,9996,1,1
9997,9997,1,6
9998,9998,2,3


In [5]:
from sklearn.metrics import accuracy_score, confusion_matrix

accuracy = accuracy_score(classes, predictions)
conf_matrix = confusion_matrix(classes, predictions)

print("Accuracy:", accuracy)
print("Confusion Matrix:\n", conf_matrix)


Accuracy: 0.7429
Confusion Matrix:
 [[5345  202  182  142   68   45]
 [ 297 1301  160  135   51   45]
 [ 199  147  502  103   39   28]
 [ 110   92   61  197   25   18]
 [  65   50   40   33   47    9]
 [  85   45   39   33   23   37]]


In [6]:
# Calculate accuracy, precision, recall, specificity, negative predictive value,
# and F1 score for each class

import numpy as np

def calculate_metrics(y_true, y_pred):

    # Convert list to NumPy array
    y_true = np.array(y_true)
    y_pred = np.array(y_pred)

    # Calculate accuracy
    accuracy = np.sum(y_true == y_pred) / len(y_true)

    # Calculate precision
    precision = np.sum((y_pred == 1) & (y_true == 1)) / np.sum(y_pred == 1)

    # Calculate recall
    recall = np.sum((y_pred == 1) & (y_true == 1)) / np.sum(y_true == 1)

    # Calculate specificity
    specificity = np.sum((y_pred == 0) & (y_true == 0)) / np.sum(y_true == 0)

    # Calculate negative predictive value
    npv = np.sum((y_pred == 0) & (y_true == 0)) / np.sum(y_pred == 0)

    # Calculate F1 score
    f1_score = 2 * precision * recall / (precision + recall)

    return accuracy, precision, recall, specificity, npv, f1_score

list_classes = [1, 2, 3, 4, 5, 6]

accuracy_df = pd.DataFrame(columns=["class", "n",  "np", "accuracy", "precision", "recall", "specificity", "npv", "f1_score"])

for c in list_classes:

    # Create a list of true and predicted labels for class c
    y_true = [1 if y == c else 0 for y in classes]
    y_pred = [1 if y == c else 0 for y in predictions]

    # Calculate metrics for class c
    accuracy, precision, recall, specificity, npv, f1_score = calculate_metrics(
        y_true, y_pred
    )

    row = {
        "class": c, "n": np.sum(y_true), "np": np.sum(y_pred),
        "accuracy": accuracy, "precision": precision, "recall": recall,
           "specificity": specificity, "npv": npv, "f1_score": f1_score}
    
    accuracy_df = pd.concat([accuracy_df, pd.DataFrame(row, index=[0])]).reset_index(drop=True)

In [7]:
accuracy_df

Unnamed: 0,class,n,np,accuracy,precision,recall,specificity,npv,f1_score
0,1,5984,6101,0.8605,0.876086,0.893215,0.811753,0.836112,0.884568
1,2,1989,1837,0.8776,0.70822,0.654098,0.933092,0.915717,0.680084
2,3,1018,984,0.9002,0.510163,0.493124,0.946337,0.942768,0.501499
3,4,503,643,0.9248,0.306376,0.39165,0.953038,0.967297,0.343805
4,5,244,253,0.9597,0.185771,0.192623,0.978885,0.979789,0.189135
5,6,262,182,0.963,0.203297,0.141221,0.98511,0.977083,0.166667


In [27]:
num_classes = len(list_classes)

accuracy_group_df = pd.DataFrame(columns=["class", "n", "np", "accuracy", "precision", "recall", "specificity", "npv", "f1_score"])

for i in range(1, num_classes-1):
    group1 = list_classes[0:i+1]
    group2 = list_classes[i+1:num_classes]

    y_true = [1 if y in group1 else 0 for y in classes]
    y_pred = [1 if y in group1 else 0 for y in predictions]

    # Calculate metrics for class c
    accuracy, precision, recall, specificity, npv, f1_score = calculate_metrics(
        y_true, y_pred
    )

    row = {
        "class": group1, "n": np.sum(y_true), "np": np.sum(y_pred),
        "accuracy": accuracy, "precision": precision, "recall": recall,
           "specificity": specificity, "npv": npv, "f1_score": f1_score}
    
    accuracy_group_df = pd.concat([accuracy_group_df, pd.DataFrame([row])]).reset_index(drop=True)

# Now do reverse order
for i in range(1, num_classes-1):
    group1 = list_classes[i:num_classes]
    group2 = list_classes[0:i]

    y_true = [1 if y in group1 else 0 for y in classes]
    y_pred = [1 if y in group1 else 0 for y in predictions]

    # Calculate metrics for class c
    accuracy, precision, recall, specificity, npv, f1_score = calculate_metrics(
        y_true, y_pred
    )

    row = {
        "class": group1, "n": np.sum(y_true), "np": np.sum(y_pred),
        "accuracy": accuracy, "precision": precision, "recall": recall,
           "specificity": specificity, "npv": npv, "f1_score": f1_score}
    
    accuracy_group_df = pd.concat([accuracy_group_df, pd.DataFrame([row])]).reset_index(drop=True)

In [28]:
accuracy_group_df

Unnamed: 0,class,n,np,accuracy,precision,recall,specificity,npv,f1_score
0,"[1, 2]",7973,7938,0.8379,0.900101,0.89615,0.608781,0.598448,0.898121
1,"[1, 2, 3]",8991,8922,0.8757,0.934208,0.927038,0.418236,0.391466,0.930609
2,"[1, 2, 3, 4]",9494,9565,0.9291,0.959226,0.9664,0.229249,0.266667,0.9628
3,"[1, 2, 3, 4, 5]",9738,9818,0.963,0.977083,0.98511,0.141221,0.203297,0.98108
4,"[2, 3, 4, 5, 6]",4016,3899,0.8605,0.836112,0.811753,0.893215,0.876086,0.823752
5,"[3, 4, 5, 6]",2027,2062,0.8379,0.598448,0.608781,0.89615,0.900101,0.603571
6,"[4, 5, 6]",1009,1078,0.8757,0.391466,0.418236,0.927038,0.934208,0.404408
7,"[5, 6]",506,435,0.9291,0.266667,0.229249,0.9664,0.959226,0.246546
