# Having different test sets: boulder and athlete specific

**The goal is to have different test sets to simulate how the model would perform on a boulder it hasn't seen before and also how it would perform on athletes it hasn't seen before.**

- Boulder specific test set: boulders W1, W2, W4 are used for training and boulder W3 for testing
- Athlete specific test set: training set with 17 athletes per boulder and 4 athletes in the test set. The athletes in the test set are: "Ai Mori", "Brooke Raboutou", "Oceania Mackenzie" and "Mia Krampl". There are 4 different nations and also a wide variability in athlete size. 

In [None]:
import pandas as pd

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, OneHotEncoder, StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix

import matplotlib.pyplot as plt

import seaborn as sns

import numpy as np

In [None]:
data = pd.read_csv('../../data/dataframes/labels_and_coordinates_preprocessed.csv')

data.head()

In [None]:
data.shape

In [None]:
data['camera'].unique()

In [None]:
data['participant'].unique()

In [None]:
data['boulder'].unique()

In [None]:
data['repetition'].unique()

In [None]:
# Check for NaN values in the dataframe
nan_counts = data.isna().sum()

# sum of all NaN values in each column
print("NaN values in each column:")
print(nan_counts)

# total number of NaN values in the entire dataframe
total_nan_values = data.isna().sum().sum()
print(f"\nTotal number of NaN values in the dataframe: {total_nan_values}")

In [None]:
print(f"\nTotal number of frames before dropping NaN values: {data.shape[0]}")

# dropping all the colums with NaN values
data.dropna(inplace=True)

# how many frames are left
print(f"\nTotal number of frames after dropping NaN values: {data.shape[0]}")

## 1 - Classic ML - Using boulder specific data splitting

In [None]:
# ----------- some pre-processing -----------------
# Define the manual mappings for each categorical column -> boulder, camera, participant, repetition
boulder_mapping = {'W1': 1, 'W2': 2, 'W3': 3, 'W4': 4}

camera_mapping = {'Cam21': 21, 'Cam22': 22, 'Cam24': 24}

participant_mapping = {'Ai Mori': 1, 'Anastasia Sanders': 2, 'Ayala Kerem': 3, 'Brooke Raboutou': 4,
                       'Chaehyun Seo': 5, 'Helene Janicot': 6, 'Jain Kim': 7, 'Janja Garnbret': 8,
                       'Jessica Pilz': 9, 'Kyra Condie': 10, 'Laura Rogora': 11, 'Manon Hily': 12,
                       'Mia Krampl': 13, 'Miho Nonaka': 14, 'Molly Thompsonsmith': 15,
                       'Natalia Grossman': 16, 'Oceania Mackenzie': 17, 'Oriane Bertone': 18,
                       'Vita Lukan': 19, 'Yejoo Seo': 20, 'Zelia Avezou': 21}

repetition_mapping = {'V1': 1, 'V2': 2, 'V3': 3, 'V4': 4, 'V5': 5, 'V6': 6, 'V7': 7, 'V8': 8, 'V9': 9, 'V10': 10}

# Map the categorical columns using the defined mappings
data['boulder'] = data['boulder'].map(boulder_mapping)
data['camera'] = data['camera'].map(camera_mapping)
data['participant'] = data['participant'].map(participant_mapping)
data['repetition'] = data['repetition'].map(repetition_mapping)

# Separate features and target variable
X = data.drop(columns=['frame', 'label'])
y = data['label']

# Split by boulders: W1, W2, W4 for training and W3 for testing
train_boulders = [1, 2, 4]  # W1, W2, W4
test_boulders = [3]         # W3

X_train = X[X['boulder'].isin(train_boulders)]
y_train = y[X['boulder'].isin(train_boulders)]
X_test = X[X['boulder'].isin(test_boulders)]
y_test = y[X['boulder'].isin(test_boulders)]

# Ensure the datasets are not empty
print(f"Training set size: {X_train.shape[0]}")
print(f"Test set size: {X_test.shape[0]}")

In [None]:
X_test.head()

In [None]:
X_train['boulder'].unique()

In [None]:
X_train['participant'].unique()

In [None]:
X_test['boulder'].unique()

In [None]:
X_test['participant'].unique()

In [None]:
y_train

In [None]:
y_test

### 1.1 - Training the different models: Logistic Regression, Decision Tree, KNN and Random Forest

Now it could be, that the test set might contain labels that are not present in the training set, since the data was splitted accordingly to the different boulders. The test set contains all frames of boulder W3, and it's possible that there are new movement patterns that were not included in the training set containing boulder W1, W2 and W4.

In [None]:
# Initialize models
models = {
    "Logistic Regression": LogisticRegression(max_iter=1000),
    "Decision Tree": DecisionTreeClassifier(),
    "KNN": KNeighborsClassifier(),
    "Random Forest": RandomForestClassifier()
}

# Scale the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Train and evaluate models
for name, model in models.items():
    print(f"Training {name}...")
    if name == "Logistic Regression":
        model.fit(X_train_scaled, y_train)
    else:
        model.fit(X_train, y_train)
    y_pred = model.predict(X_test_scaled if name == "Logistic Regression" else X_test)
    print(f"Evaluating {name}...")
    print(classification_report(y_test, y_pred, zero_division=0))  # Set zero_division = 0 to avoid the warning

**The occurrence of many 0.00 values in the precision, recall, and f1-score columns of the classification report might indicate that the corresponding classes have no predicted samples or true samples. This situation can arise for several reasons:**
- **Class Imbalance:** If certain classes have very few instances in the dataset compared to others, the model might struggle to make accurate predictions for those classes.

- **Data Quality:** Poor data quality, such as missing or noisy data, can affect the model's ability to learn patterns for certain classes.

- **Model Complexity:** The chosen model might not be able to effectively capture the underlying patterns in the data, especially for minority classes.

### 1.2 - Evaluation

In [None]:
# --------------- evaluation plot -------------------------------
# Define the metrics for each model manually (replace with your actual results)
metrics = {
    "Logistic Regression": [0.34, 0.32, 0.34, 0.29],
    "Decision Tree": [0.22, 0.29, 0.22, 0.22],
    "KNN": [0.33, 0.24, 0.33, 0.26],
    "Random Forest": [0.37, 0.33, 0.37, 0.30]
}

# Define the metrics labels
metric_labels = ["Accuracy", "Precision", "Recall", "F1-score"]

# Plot the metrics for each model
fig, axes = plt.subplots(nrows=1, ncols=4, figsize=(20, 5))

teal_colors = ['#008080', '#009090', '#00A0A0', '#00B0B0']  # Teal color

for i, (name, metric_values) in enumerate(metrics.items()):
    ax = axes[i]
    ax.bar(metric_labels, metric_values, color=teal_colors)
    ax.set_title(name)
    ax.set_ylim(0, 1)  # Setting y-axis limit to [0, 1] for better visualization
    ax.grid(axis='y', linestyle='--', alpha=0.7)

plt.tight_layout()
plt.show()

for name, metric_values in metrics.items():
    print(f"{name}: {metric_values}")

**The weighted average was used for the plot**

The weighted average takes into account the support (number of samples) for each class when calculating the average, which can be particularly useful when dealing with imbalanced datasets. Since the weighted average gives more weight to classes with larger support, it provides a more representative measure of the overall performance of the model.

#### Confusion matrix for the best classifier - Random Forest

In [None]:
# Generate and display the confusion matrix
cm = confusion_matrix(y_test, y_pred)
plt.figure(figsize=(10, 7))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=sorted(y_test.unique()), yticklabels=sorted(y_test.unique()))
plt.title(f'Confusion Matrix for {name}')
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.show()

## 2 - Classic ML - Using athlete specific data splitting

In [None]:
# ----------- some pre-processing -----------------
# Define the manual mappings for each categorical column -> boulder, camera, participant, repetition
boulder_mapping = {'W1': 1, 'W2': 2, 'W3': 3, 'W4': 4}

camera_mapping = {'Cam21': 21, 'Cam22': 22, 'Cam24': 24}

participant_mapping = {'Ai Mori': 1, 'Anastasia Sanders': 2, 'Ayala Kerem': 3, 'Brooke Raboutou': 4,
                       'Chaehyun Seo': 5, 'Helene Janicot': 6, 'Jain Kim': 7, 'Janja Garnbret': 8,
                       'Jessica Pilz': 9, 'Kyra Condie': 10, 'Laura Rogora': 11, 'Manon Hily': 12,
                       'Mia Krampl': 13, 'Miho Nonaka': 14, 'Molly Thompsonsmith': 15,
                       'Natalia Grossman': 16, 'Oceania Mackenzie': 17, 'Oriane Bertone': 18,
                       'Vita Lukan': 19, 'Yejoo Seo': 20, 'Zelia Avezou': 21}

repetition_mapping = {'V1': 1, 'V2': 2, 'V3': 3, 'V4': 4, 'V5': 5, 'V6': 6, 'V7': 7, 'V8': 8, 'V9': 9, 'V10': 10}

# Map the categorical columns using the defined mappings
data['boulder'] = data['boulder'].map(boulder_mapping)
data['camera'] = data['camera'].map(camera_mapping)
data['participant'] = data['participant'].map(participant_mapping)
data['repetition'] = data['repetition'].map(repetition_mapping)

# Separate features and target variable
X = data.drop(columns=['frame', 'label'])
y = data['label']

# Define the athletes for the test set
test_athletes = ['Ai Mori', 'Brooke Raboutou', 'Oceania Mackenzie', 'Mia Krampl']

# Get the participant IDs for the test athletes
test_athlete_ids = [participant_mapping[athlete] for athlete in test_athletes]

# Separate the data into training and test sets based on athletes
X_train = X[~(X['participant'].isin(test_athlete_ids))]
y_train = y[~(X['participant'].isin(test_athlete_ids))]
X_test = X[X['participant'].isin(test_athlete_ids)]
y_test = y[X['participant'].isin(test_athlete_ids)]

# Ensure the datasets are not empty
print(f"Training set size: {X_train.shape[0]}")
print(f"Test set size: {X_test.shape[0]}")

In [None]:
X_test.head()

In [None]:
X_train['boulder'].unique()

In [None]:
X_train['participant'].unique()

In [None]:
X_test['boulder'].unique()

In [None]:
X_test['participant'].unique()

In [None]:
y_train

In [None]:
y_test

### 2.1 - Training the different models: Logistic Regression, Decision Tree, KNN and Random Forest

In [None]:
# Initialize models
models = {
    "Logistic Regression": LogisticRegression(max_iter=1000),
    "Decision Tree": DecisionTreeClassifier(),
    "KNN": KNeighborsClassifier(),
    "Random Forest": RandomForestClassifier()
}

# Scale the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Train and evaluate models
for name, model in models.items():
    print(f"Training {name}...")
    if name == "Logistic Regression":
        model.fit(X_train_scaled, y_train)
    else:
        model.fit(X_train, y_train)
    y_pred = model.predict(X_test_scaled if name == "Logistic Regression" else X_test)
    print(f"Evaluating {name}...")
    print(classification_report(y_test, y_pred, zero_division=0))  # Set zero_division = 0 to avoid the warning

**Interpreting the results:**
- **Logistic Regression:** Achieved relatively high precision and recall for some classes like "no_movement_of_interest" and "before_start_position" but lower performance for others.
- **Decision Tree:** Shows varying performance across different classes, with lower precision and recall compared to Logistic Regression.
- **KNN:** Generally lower precision, recall, and accuracy compared to other classifiers, indicating weaker performance overall.
- **Random Forest:** Achieved the highest accuracy among the classifiers, with relatively high precision and recall for many classes, especially for "no_movement_of_interest" and "before_start_position".

### 2.2 - Evaluation

In [None]:
# --------------- evaluation plot -------------------------------
# Define the metrics for each model manually (replace with your actual results)
metrics = {
    "Logistic Regression": [0.65, 0.63, 0.65, 0.63],
    "Decision Tree": [0.56, 0.56, 0.56, 0.55],
    "KNN": [0.40, 0.40, 0.40, 0.40],
    "Random Forest": [0.72, 0.70, 0.72, 0.70]
}

# Define the metrics labels
metric_labels = ["Accuracy", "Precision", "Recall", "F1-score"]

# Plot the metrics for each model
fig, axes = plt.subplots(nrows=1, ncols=4, figsize=(20, 5))

teal_colors = ['#008080', '#009090', '#00A0A0', '#00B0B0']  # Teal color

for i, (name, metric_values) in enumerate(metrics.items()):
    ax = axes[i]
    ax.bar(metric_labels, metric_values, color=teal_colors)
    ax.set_title(name)
    ax.set_ylim(0, 1)  # Setting y-axis limit to [0, 1] for better visualization
    ax.grid(axis='y', linestyle='--', alpha=0.7)

plt.tight_layout()
plt.show()

for name, metric_values in metrics.items():
    print(f"{name}: {metric_values}")

#### Confusion matrix for the best classifier - Random Forest

In [None]:
# Generate and display the confusion matrix
cm = confusion_matrix(y_test, y_pred)
plt.figure(figsize=(10, 7))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=sorted(y_test.unique()), yticklabels=sorted(y_test.unique()))
plt.title(f'Confusion Matrix for {name}')
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.show()

## 3 - RNN - Using boulder specific data splitting

In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Input, LSTM, Dense, Dropout, Bidirectional, Attention, Concatenate, Flatten

In [None]:
data = pd.read_csv('../../data/dataframes/labels_and_coordinates_preprocessed.csv')

In [None]:
# Remove missing values
data = data.dropna()

# Normalize keypoint data
# Standardize features by removing the mean and scaling to unit variance
scaler = StandardScaler()

# Gather all keypoint columns (all columns with a "_x", "_y", "_z", "_v", "_p", "com" or "angle" in their name)
keypoint_columns = [col for col in data.columns if '_x' in col or '_y' in col or '_z' in col or '_v' in col or '_p' in col or 'com' in col or 'angle' in col]
# Apply fit_transform to keypoint column data only
data[keypoint_columns] = scaler.fit_transform(data[keypoint_columns])

# One-hot encode categorical variables
data = pd.get_dummies(data, columns=['boulder', 'camera', 'participant', 'repetition'])

# Encode labels
if 'label' in data.columns:
    label_encoder = LabelEncoder() 
    data['label'] = label_encoder.fit_transform(data['label'])

In [None]:
# Convert column names to an array
column_names = data.columns.to_numpy()

# Print all column names
print(column_names)

In [None]:
# Split the dataset based on boulder-specific criteria
train_data = data[data['boulder_W3'] == 0]  # Rows where 'boulder_W3' is False
test_data = data[data['boulder_W3'] == 1]   # Rows where 'boulder_W3' is True

# Print the shapes of the training and testing data
print("Train Data Shape:", train_data.shape)
print("Test Data Shape:", test_data.shape)

In [None]:
# Parameters for reshaping
timesteps = 2 
total_features = data.drop('label', axis=1).shape[1] 

# A check to ensure that each sequence fed into the model has a consistent shape
if total_features % timesteps != 0:
    raise ValueError(f"Number of total features ({total_features}) is not divisible by defined timesteps ({timesteps}).")
features_per_timestep = total_features // timesteps

# Reshape data
X_train = train_data.drop('label', axis=1).values.reshape(-1, timesteps, features_per_timestep).astype(np.float32)
y_train = train_data['label'].values.astype(np.int32)
X_test = test_data.drop('label', axis=1).values.reshape(-1, timesteps, features_per_timestep).astype(np.float32)
y_test = test_data['label'].values.astype(np.int32)

# Define the TensorFlow datasets
train_dataset = tf.data.Dataset.from_tensor_slices((X_train, y_train)).batch(32)
test_dataset = tf.data.Dataset.from_tensor_slices((X_test, y_test)).batch(32)

In [None]:
def create_model(timesteps, features_per_timestep, nr_classes):
    """
    This model contains bidirectional LSTMs and self-attention layers
    """
    inputs = Input(shape=(timesteps, features_per_timestep))

    x1 = Bidirectional(LSTM(64, return_sequences=True))(inputs)
    x1 = Dropout(0.3)(x1)
    attention_layer_1 = Attention()([x1, x1])  

    x2 = Bidirectional(LSTM(128, return_sequences=True))(x1)
    x2 = Dropout(0.3)(x2)
    attention_layer_2 = Attention()([x2, x2])  

    x3 = Bidirectional(LSTM(64, return_sequences=False))(x2)  
    x3 = Dropout(0.3)(x3)

    concatenated = Concatenate()([Flatten()(attention_layer_1), Flatten()(attention_layer_2), x3])

    x_final = Dense(128, activation='relu')(concatenated)
    x_final = Dropout(0.3)(x_final)
    x_final = Dense(64, activation='relu')(x_final)
    outputs = Dense(nr_classes, activation='softmax')(x_final)

    model = Model(inputs=inputs, outputs=outputs)
    model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    return model

In [None]:
model = create_model(timesteps, features_per_timestep, len(np.unique(y_train)))
model.summary()

# Train the model
history = model.fit(train_dataset, epochs=10, verbose=1)

In [None]:
# Evaluate the model
test_loss, test_accuracy = model.evaluate(test_dataset)
print(f"Test Accuracy: {test_accuracy}, Test Loss: {test_loss}")

In [None]:
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
import seaborn as sns

# Predict the labels for the test dataset
y_pred = model.predict(test_dataset)
y_pred_classes = np.argmax(y_pred, axis=1)

# Map numeric labels to their corresponding names
y_test_labels = label_encoder.inverse_transform(y_test)
y_pred_labels = label_encoder.inverse_transform(y_pred_classes)

# Compute the confusion matrix
cm = confusion_matrix(y_test_labels, y_pred_labels)

# Get unique labels
unique_labels = np.unique(np.concatenate((y_test_labels, y_pred_labels)))

# Set figure size
plt.figure(figsize=(10, 7))

# Generate and display the confusion matrix
plt.figure(figsize=(10, 7))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=sorted(unique_labels), yticklabels=sorted(unique_labels))
plt.title('Confusion Matrix')
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.show()

## 4 - RNN - Using athlete specific data splitting

In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Input, LSTM, Dense, Dropout, Bidirectional, Attention, Concatenate, Flatten

In [None]:
# Load dataset
data = pd.read_csv('../../data/dataframes/labels_and_coordinates_preprocessed.csv')

In [None]:
# Remove missing values
data = data.dropna()

# Normalize keypoint data
# Standardize features by removing the mean and scaling to unit variance
scaler = StandardScaler()

# Gather all keypoint columns (all columns with a "_x", "_y", "_z", "_v", "_p", "com" or "angle" in their name)
keypoint_columns = [col for col in data.columns if '_x' in col or '_y' in col or '_z' in col or '_v' in col or '_p' in col or 'com' in col or 'angle' in col]
# Apply fit_transform to keypoint column data only
data[keypoint_columns] = scaler.fit_transform(data[keypoint_columns])

# One-hot encode categorical variables
data = pd.get_dummies(data, columns=['boulder', 'camera', 'participant', 'repetition'])

# Encode labels
if 'label' in data.columns:
    label_encoder = LabelEncoder() 
    data['label'] = label_encoder.fit_transform(data['label'])

In [None]:
# Convert column names to an array
column_names = data.columns.to_numpy()

# Print all column names
print(column_names)

In [None]:
# Define the athletes for the test set
test_athletes = ["participant_Ai Mori", "participant_Brooke Raboutou", "participant_Oceania Mackenzie", "participant_Mia Krampl"]

# Split the dataset based on athlete-specific criteria
if any(athlete in data.columns for athlete in test_athletes):
    test_data = data[data[test_athletes].any(axis=1)]   # Rows where any test athlete is True
    train_data = data[~data[test_athletes].any(axis=1)] # Rows where no test athlete is True
else:
    raise ValueError("Test athlete columns not found in the DataFrame.")

# Print the shapes of the training and testing data
print("Train Data Shape:", train_data.shape)
print("Test Data Shape:", test_data.shape)

In [None]:
# Parameters for reshaping
timesteps = 2 
total_features = data.drop('label', axis=1).shape[1] 

# A check to ensure that each sequence fed into the model has a consistent shape
if total_features % timesteps != 0:
    raise ValueError(f"Number of total features ({total_features}) is not divisible by defined timesteps ({timesteps}).")
features_per_timestep = total_features // timesteps

# Reshape data
X_train = train_data.drop('label', axis=1).values.reshape(-1, timesteps, features_per_timestep).astype(np.float32)
y_train = train_data['label'].values.astype(np.int32)
X_test = test_data.drop('label', axis=1).values.reshape(-1, timesteps, features_per_timestep).astype(np.float32)
y_test = test_data['label'].values.astype(np.int32)

# Define the TensorFlow datasets
train_dataset = tf.data.Dataset.from_tensor_slices((X_train, y_train)).batch(32)
test_dataset = tf.data.Dataset.from_tensor_slices((X_test, y_test)).batch(32)

In [None]:
def create_model(timesteps, features_per_timestep, nr_classes):
    """
    This model contains bidirectional LSTMs and self-attention layers
    """
    inputs = Input(shape=(timesteps, features_per_timestep))

    x1 = Bidirectional(LSTM(64, return_sequences=True))(inputs)
    x1 = Dropout(0.3)(x1)
    attention_layer_1 = Attention()([x1, x1])  

    x2 = Bidirectional(LSTM(128, return_sequences=True))(x1)
    x2 = Dropout(0.3)(x2)
    attention_layer_2 = Attention()([x2, x2])  

    x3 = Bidirectional(LSTM(64, return_sequences=False))(x2)  
    x3 = Dropout(0.3)(x3)

    concatenated = Concatenate()([Flatten()(attention_layer_1), Flatten()(attention_layer_2), x3])

    x_final = Dense(128, activation='relu')(concatenated)
    x_final = Dropout(0.3)(x_final)
    x_final = Dense(64, activation='relu')(x_final)
    outputs = Dense(nr_classes, activation='softmax')(x_final)

    model = Model(inputs=inputs, outputs=outputs)
    model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    return model

In [None]:
model = create_model(timesteps, features_per_timestep, len(np.unique(y_train)))
model.summary()

# Train the model
history = model.fit(train_dataset, epochs=10, verbose=1)

In [None]:
# Evaluate the model
test_loss, test_accuracy = model.evaluate(test_dataset)
print(f"Test Accuracy: {test_accuracy}, Test Loss: {test_loss}")

In [None]:
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
import seaborn as sns

# Predict the labels for the test dataset
y_pred = model.predict(test_dataset)
y_pred_classes = np.argmax(y_pred, axis=1)

# Map numeric labels to their corresponding names
y_test_labels = label_encoder.inverse_transform(y_test)
y_pred_labels = label_encoder.inverse_transform(y_pred_classes)

# Compute the confusion matrix
cm = confusion_matrix(y_test_labels, y_pred_labels)

# Get unique labels
unique_labels = np.unique(np.concatenate((y_test_labels, y_pred_labels)))

# Set figure size
plt.figure(figsize=(10, 7))

# Generate and display the confusion matrix
plt.figure(figsize=(10, 7))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=sorted(unique_labels), yticklabels=sorted(unique_labels))
plt.title('Confusion Matrix')
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.show()

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay

# Compute the confusion matrix
cm = confusion_matrix(y_test, y_pred_classes)

# Normalize the confusion matrix by row (true labels)
cm_normalized = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
cm_normalized = np.round(cm_normalized, 2)  # Round to 2 decimal places

# Display the normalized confusion matrix
disp = ConfusionMatrixDisplay(confusion_matrix=cm_normalized, display_labels=label_encoder.classes_)
fig, ax = plt.subplots(figsize=(14, 14))  # Increase figure size for better readability
disp.plot(xticks_rotation='vertical', ax=ax, cmap='viridis')  # Choose a colormap for better contrast

# Adjust font size
plt.xticks(fontsize=10, ha='center')
plt.yticks(fontsize=10, va='center')

# Update text properties in the matrix
for text in disp.text_.ravel():
    text.set_fontsize(10)  # Set font size for the numbers

plt.tight_layout(pad=3.0)  # Add padding to ensure elements are not overlapping

plt.title('Normalized Confusion Matrix')
plt.show()