# **Before running the notebook do go to session options in the right hand side panel and select accelerator to gpu T4 or P100 or notebook will take a lot of time to run**

# Import Libraries
Purpose: Imports all necessary libraries for the project, including TensorFlow for deep learning, pandas for data manipulation, scikit-learn for machine learning models, XGBoost for gradient boosting, and Matplotlib for visualization. Suppresses warnings to keep the output clean. If you get ant error with the libraries just run pip install "respective library name in your cmd"

In [None]:
import tensorflow as tf
import pandas as pd
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.layers import RandomRotation, RandomTranslation, RandomZoom
from tensorflow.keras.optimizers import AdamW
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau
from keras.utils import to_categorical
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, ConfusionMatrixDisplay
from xgboost import XGBClassifier
from tensorflow.keras.regularizers import l2
import numpy as np
import matplotlib.pyplot as plt
import warnings

warnings.filterwarnings('ignore')


# Check TensorFlow and GPU Setup
Purpose: Prints the TensorFlow version and checks for GPU availability. If a GPU is detected, it enables memory growth to optimize GPU memory usage. If no GPU is found, it alerts the user to check their CUDA/cuDNN configuration. If you run this cell in jupyter notebook and it shows no GPU then it will take hours to run the cnn model so its advicable to try it out from the Kaggle link available in the readme file.

In [None]:
print("TensorFlow version:", tf.__version__)
gpus = tf.config.list_physical_devices('GPU')
if gpus:
    print("GPU detected!")
    for gpu in gpus:
        print("â†’", gpu)
    try:
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
        print("Memory growth enabled for GPU.")
    except RuntimeError as e:
        print("Error setting memory growth:", e)
else:
    print("No GPU detected. Check your CUDA/cuDNN and environment config.")


# Load and Inspect Training Data
Purpose: Loads the EMNIST balanced training dataset into a pandas DataFrame. Displays the first few rows, statistical summary, and shape of the dataset to understand its structure and contents.

In [None]:
train_df = pd.read_csv("/kaggle/input/emnist/emnist-balanced-train.csv", header=None)
print(train_df.head())
print(train_df.describe())
print(train_df.shape)


# Create Label Dictionary
Purpose: Loads the EMNIST label mapping file and creates a dictionary mapping label indices (10 to 35) to corresponding ASCII characters (A-Z). Prints the dictionary for verification.

In [None]:
label_map = pd.read_csv("/kaggle/input/emnist/emnist-balanced-mapping.txt", 
                        delimiter=' ', index_col=0, header=None)

In [None]:
label_dictionary = {i : chr(label) for i, label in enumerate(label_map[1][10:36])}
print(label_dictionary)

# Filter and Preprocess Training Data
Purpose: Filters the training dataset to include only samples with labels 10 to 35 (corresponding to letters A-Z). Resets the index and adjusts labels by subtracting 10 to map them to 0-25. Displays the filtered data's head, label statistics, and shape.

In [None]:
train_df_new = train_df[train_df[0].isin(np.arange(10, 36))]
train_df_new = train_df_new.reset_index(drop=True)

In [None]:
train_df_new[0] = train_df_new[0] - 10
print(train_df_new.head())
print(train_df_new[0].describe())
print(train_df_new.shape)


# Split Features and Labels
Purpose: Separates the training data into features (x_train, pixel values) and labels (y_train). Prints their shapes to confirm the split.

In [None]:
x_train = train_df_new.loc[:, 1:]
y_train = train_df_new.loc[:, 0]
print(x_train.shape, y_train.shape)

# Inspect a Sample Image
Purpose: Selects the 1000th sample image and its label from the training set. Prints their shapes and values for inspection.

In [None]:
sample_image = x_train.iloc[1000]
sample_label = y_train.iloc[1000]
print(sample_image.shape, sample_label)

# Image Preprocessing Functions
Purpose: Defines two functions: dimension reshapes a flattened image into a 28x28 matrix, and flip_and_rotate flips the image horizontally and rotates it 90 degrees to correct orientation for EMNIST images.

In [None]:
def dimension(image):
    W = 28
    H = 28
    image = np.array(image).reshape(W, H)
    return image

def flip_and_rotate(image):
    image = np.fliplr(image)
    image = np.rot90(image)
    return image

# Visualize Original Sample Image
Purpose: Displays the 1000th training image in its original form (after reshaping to 28x28) with its corresponding label from the dictionary, using a grayscale colormap.

In [None]:
print("Label entry No. 1000:", label_dictionary[sample_label])
plt.imshow(dimension(sample_image), cmap=plt.cm.gray)
plt.show()

# Visualize Preprocessed Sample Image
Purpose: Displays the 1000th training image after applying the flip_and_rotate preprocessing, along with its label, to verify the preprocessing steps.

In [None]:
print("Label entry 1000:", label_dictionary[sample_label])
plt.imshow(flip_and_rotate(dimension(sample_image)), cmap=plt.cm.gray)
plt.show()

# Reshape Training Images
Purpose: Reshapes all training images from flattened vectors to 28x28 matrices using the dimension function. Prints the new shape to confirm the transformation.

In [None]:
x_train = np.apply_along_axis(dimension, 1, x_train)
print(x_train.shape)


# Visualize Multiple Training Images (Pre-Reshape)
Purpose: Plots a grid of 18 training images (indices 100 to 117) with their corresponding labels to visualize the dataset after reshaping.

In [None]:
plt.figure(figsize=(10, 8), dpi=80)
for i in range(100, 118):
    plt.subplot(3, 6, i-99)
    plt.imshow(x_train[i], cmap=plt.cm.gray)
    plt.title(label_dictionary[y_train[i]])
plt.show()

# Apply Flip and Rotate to Training Images and Visualize Preprocessed Training Images
Purpose: Applies the flip_and_rotate preprocessing to all training images to correct their orientation.Plots a grid of 18 preprocessed training images (indices 100 to 117) with their labels to verify the flip and rotate transformation.

In [None]:
x_train = np.array([flip_and_rotate(img) for img in x_train])

plt.figure(figsize=(10, 8), dpi=80)
for i in range(100, 118):
    plt.subplot(3, 6, i-99)
    plt.imshow(x_train[i], cmap=plt.cm.gray)
    plt.title(label_dictionary[y_train[i]])
plt.show()

# Load and Inspect Test Data
Purpose: Loads the EMNIST balanced test dataset into a DataFrame and displays its head and statistical summary to inspect its structure.

In [None]:
test_df = pd.read_csv("/kaggle/input/emnist/emnist-balanced-test.csv", header=None)
print(test_df.head())
print(test_df.describe())


# Filter and Preprocess Test Data
Purpose: Filters the test dataset to include only samples with labels 10 to 35 (A-Z), resets the index, and adjusts labels to 0-25. Displays the head of the filtered data.

In [None]:
test_df_new = test_df[test_df[0].isin(np.arange(10, 36))]
test_df_new = test_df_new.reset_index(drop=True)

In [None]:
test_df_new[0] = test_df_new[0] - 10
print(test_df_new.head())

# Prepare Test Features and Labels
Purpose: Separates test data into features (x_test) and labels (y_test), reshapes images to 28x28, and applies flip and rotate preprocessing. Prints shapes to confirm.

In [None]:
x_test = test_df_new.loc[:, 1:]
y_test = test_df_new.loc[:, 0]

In [None]:
x_test = np.array([dimension(img) for img in x_test.to_numpy()])
x_test = np.array([flip_and_rotate(img) for img in x_test])
print("x_test:", x_test.shape)
print("y_test:", y_test.shape)

# Visualize Test Images
Purpose: Plots a grid of 18 test images with their corresponding labels to visualize the preprocessed test dataset.

In [None]:
plt.figure(figsize=(10, 8), dpi=80)
for i in range(18):
    plt.subplot(3, 6, i+1)
    plt.imshow(x_test[i], cmap=plt.cm.gray)
    plt.title(label_dictionary[y_test.iloc[i]])
plt.show()

# Compare Train and Test Sample Sizes
Purpose: Creates a bar chart comparing the number of samples in the training and test datasets.

In [None]:
plt.figure(figsize=(6, 4))
plt.bar(['Train', 'Test'], [y_train.shape[0], y_test.shape[0]], 
        color=['skyblue', 'salmon'])
plt.title('Number of EMNIST Letters Samples (A-Z)')
plt.ylabel('Number of Samples')
plt.show()


# Normalize Data
Purpose: Normalizes pixel values of training and test images to the range [0, 1] by converting to float32 and dividing by 255. This will ensure that all pixel values are between 0 and 1 as models train better if the data is between -1 and 1.

In [None]:
x_train = x_train.astype('float32') / 255
x_test = x_test.astype('float32') / 255

# Check Number of Classes
Purpose: Calculates and prints the number of unique classes (26, corresponding to A-Z) in the training labels.

In [None]:
number_of_classes = y_train.nunique()
print(number_of_classes)

# Reshape Data for Models
Purpose: Reshapes training and test data for CNN (28x28x1) and other models (flattened to 784 features). Prints shapes to confirm.

In [None]:
W = 28
H = 28
x_train_cnn = x_train.reshape(-1, W, H, 1)
x_test_cnn = x_test.reshape(-1, W, H, 1)
x_train_flat = x_train.reshape(x_train.shape[0], -1)
x_test_flat = x_test.reshape(x_test.shape[0], -1)
print(x_train_cnn.shape)
print(x_test_cnn.shape)
print(x_train_flat.shape)
print(x_test_flat.shape)

# Visualize Training Label Distribution
Purpose: Plots a bar chart showing the distribution of labels (A-Z) in the training dataset.

In [None]:
unique_labels, counts = np.unique(y_train, return_counts=True)
letter_labels = [label_dictionary[label] for label in unique_labels]
plt.figure(figsize=(12, 6))
plt.bar(letter_labels, counts, color=plt.cm.get_cmap('viridis')(np.linspace(0, 1, len(unique_labels))))
plt.xlabel("Label (A-Z)")
plt.ylabel("Count")
plt.title("Distribution of Labels in Training Data (A-Z)")
plt.show()

# Visualize Test Label Distribution
Purpose: Plots a bar chart showing the distribution of labels (A-Z) in the test dataset.

In [None]:
unique_labels, counts = np.unique(y_test, return_counts=True)
letter_labels = [label_dictionary[label] for label in unique_labels]
plt.figure(figsize=(12, 6))
plt.bar(letter_labels, counts, color=plt.cm.get_cmap('viridis')(np.linspace(0, 1, len(unique_labels))))
plt.xlabel("Label (A-Z)")
plt.ylabel("Count")
plt.title("Distribution of Labels in Testing Data (A-Z)")
plt.show()

# Define and Train Neural Network(estimated runtime is 2 mins)
THIS MODEL IS FOR EXPERIMENTATION ONLY THIS IS NOT THE ACTUAL MODEL THAT WILL BE USED IN GUI ITS JUST USED TO SHOW HOW CNN MODEL OUTPERFORMS ALL THE COMMON MODELS FOR IMAGE PROCESSING TASKS.
Purpose: Defines a simple feedforward neural network with two hidden layers (128 and 64 units) and a softmax output layer. Compiles the model with the Adam optimizer and sparse categorical crossentropy loss. Trains the model for 10 epochs with a batch size of 32, using the test set for validation.

In [None]:
nn_model = keras.Sequential([
    layers.Dense(128, activation='relu', input_shape=(784,)),
    layers.Dense(64, activation='relu'),
    layers.Dense(number_of_classes, activation='softmax')
])

In [None]:
nn_model.compile(optimizer='adam',
                 loss='sparse_categorical_crossentropy',
                 metrics=['accuracy'])


In [None]:
history_nn = nn_model.fit(x_train_flat, y_train, epochs=10, batch_size=32,
                          validation_data=(x_test_flat, y_test))

# Evaluate Neural Network
Purpose: Evaluates the neural network on the test set and prints the test accuracy.

In [None]:
loss_nn, accuracy_nn = nn_model.evaluate(x_test_flat, y_test)
print(f"Neural Network Model Accuracy on Test Set: {accuracy_nn}")

# Plot Neural Network Accuracy
Purpose: Plots the training and validation accuracy of the neural network over epochs.

In [None]:
plt.figure(figsize=(12, 6))
plt.plot(history_nn.history['accuracy'], label='Training Accuracy')
plt.plot(history_nn.history['val_accuracy'], label='Validation Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.title('Neural Network Training and Validation Accuracy')
plt.legend()
plt.show()

# Plot Neural Network Loss
Purpose: Plots the training and validation loss of the neural network over epochs.

In [None]:
plt.figure(figsize=(12, 6))
plt.plot(history_nn.history['loss'], label='Training Loss')
plt.plot(history_nn.history['val_loss'], label='Validation Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.title('Neural Network Training and Validation Loss')
plt.legend()
plt.show()

# Train and Evaluate Decision Tree(estimated runtime is 1 min)
THIS MODEL IS FOR EXPERIMENTATION ONLY THIS IS NOT THE ACTUAL MODEL THAT WILL BE USED IN GUI ITS JUST USED TO SHOW HOW CNN MODEL OUTPERFORMS ALL THE COMMON MODELS FOR IMAGE PROCESSING TASKS.
Purpose: Trains a Decision Tree classifier with entropy criterion on the flattened training data. Evaluates it on the test set and prints the accuracy.

In [None]:
dt_model = DecisionTreeClassifier(criterion='entropy', random_state=42)
dt_model.fit(x_train_flat, y_train)
y_pred_dt = dt_model.predict(x_test_flat)
accuracy_dt = accuracy_score(y_test, y_pred_dt)
print(f"Decision Tree Model Accuracy on Test Set: {accuracy_dt}")


# Train and Evaluate Random Forest(estimated runtime is 2 mins)
THIS MODEL IS FOR EXPERIMENTATION ONLY THIS IS NOT THE ACTUAL MODEL THAT WILL BE USED IN GUI ITS JUST USED TO SHOW HOW CNN MODEL OUTPERFORMS ALL THE COMMON MODELS FOR IMAGE PROCESSING TASKS.
Purpose: Trains a Random Forest classifier with 30 trees on the flattened training data. Evaluates it on the test set and prints the accuracy.

In [None]:
rf_model = RandomForestClassifier(n_estimators=30, n_jobs=-1, random_state=42)
rf_model.fit(x_train_flat, y_train)
y_pred_rf = rf_model.predict(x_test_flat)
accuracy_rf = accuracy_score(y_test, y_pred_rf)
print(f"Random Forest Model Accuracy on Test Set: {accuracy_rf}")

# Train and Evaluate XGBoost(estimated runtime is 8 mins)
THIS MODEL IS FOR EXPERIMENTATION ONLY THIS IS NOT THE ACTUAL MODEL THAT WILL BE USED IN GUI ITS JUST USED TO SHOW HOW CNN MODEL OUTPERFORMS ALL THE COMMON MODELS FOR IMAGE PROCESSING TASKS.
Purpose: Trains an XGBoost classifier with 30 estimators on the flattened training data. Evaluates it on the test set and prints the accuracy.

In [None]:
xgb_model = XGBClassifier(n_estimators=30, random_state=42, n_jobs=-1)
xgb_model.fit(x_train_flat, y_train)
y_pred_xgb = xgb_model.predict(x_test_flat)
accuracy_xgb = accuracy_score(y_test, y_pred_xgb)
print(f"XGBoost Model Accuracy on Test Set: {accuracy_xgb}")

# Define CNN Model(estimated runtime is 15 mins)
Purpose: Defines a convolutional neural network (CNN) with three convolutional blocks, batch normalization, dropout, and a residual connection. Compiles the model with AdamW optimizer and sparse categorical crossentropy loss. Prints the model summary.

In [None]:
def cnn(input_shape=(28, 28, 1), num_classes=26):
    inputs = layers.Input(shape=input_shape)
    
    # Conv Block 1
    x = layers.Conv2D(60, (3, 3), padding='same', activation='relu', kernel_regularizer=l2(0.01))(inputs)
    x = layers.BatchNormalization()(x)
    x = layers.Conv2D(60, (3, 3), padding='same', activation='relu', kernel_regularizer=l2(0.01))(x)
    x = layers.BatchNormalization()(x)
    x = layers.MaxPooling2D((2, 2))(x)
    x = layers.Dropout(0.3)(x)
    
    # Conv Block 2
    x = layers.Conv2D(100, (3, 3), padding='same', activation='relu', kernel_regularizer=l2(0.01))(x)
    x = layers.BatchNormalization()(x)
    x = layers.Conv2D(100, (3, 3), padding='same', activation='relu', kernel_regularizer=l2(0.01))(x)
    x = layers.BatchNormalization()(x)
    x = layers.MaxPooling2D((2, 2))(x)
    x = layers.Dropout(0.3)(x)
    
    # Conv Block 3
    x = layers.Conv2D(200, (3, 3), padding='same', activation='relu', kernel_regularizer=l2(0.01))(x)
    x = layers.BatchNormalization()(x)
    x = layers.Conv2D(200, (3, 3), padding='same', activation='relu', kernel_regularizer=l2(0.01))(x)
    x = layers.BatchNormalization()(x)
    x = layers.MaxPooling2D((2, 2))(x)
    x = layers.Dropout(0.4)(x)
    
    # Residual Connection
    shortcut = layers.Conv2D(200, (1, 1), padding='same')(inputs)
    shortcut = layers.MaxPooling2D((8, 8))(shortcut)
    shortcut = layers.BatchNormalization()(shortcut)
    
    x = layers.Add()([x, shortcut])
    x = layers.Activation('relu')(x)
    
    # Fully Connected Layers
    x = layers.Flatten()(x)
    x = layers.Dense(400, activation='relu', kernel_regularizer=l2(0.01))(x)
    x = layers.BatchNormalization()(x)
    x = layers.Dropout(0.5)(x)
    outputs = layers.Dense(num_classes, activation='softmax')(x)
    
    model = keras.Model(inputs, outputs)
    
    model.compile(
        optimizer=AdamW(learning_rate=0.001, weight_decay=0.01),
        loss='sparse_categorical_crossentropy',
        metrics=['accuracy']
    )
    return model

In [None]:
cnn_model = cnn(input_shape=(28, 28, 1), num_classes=number_of_classes)
cnn_model.summary()

# Train CNN Model
Purpose: Sets up callbacks for saving the best model based on validation accuracy and reducing the learning rate on plateau. Trains the CNN model for 40 epochs with a batch size of 512.

In [None]:
model_checkpoint = ModelCheckpoint('best_cnn_model.h5', monitor='val_accuracy', save_best_only=True)
lr_scheduler = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=5, min_lr=1e-6)

history = cnn_model.fit(
    x_train_cnn, y_train,
    batch_size=512,
    epochs=50,
    validation_data=(x_test_cnn, y_test),
    callbacks=[model_checkpoint, lr_scheduler]



    
)

# Evaluate CNN Model
THE CNN MODEL ACHIEVES 96 PERCENT ACCURACY IN VALIDATION DATA WHICH IS REALLY IMPRESSIVE
Purpose: Evaluates the CNN on the test set and prints the test accuracy.

In [None]:
loss_cnn, accuracy_cnn = cnn_model.evaluate(x_test_cnn, y_test)
print(f"CNN Model Accuracy on Test Set: {accuracy_cnn}")

# Plot CNN Accuracy
Purpose: Plots the training and validation accuracy of the CNN over epochs.

In [None]:
plt.figure(figsize=(12, 6))
plt.plot(history.history['accuracy'], label='Training Accuracy')
plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.title('CNN Training and Validation Accuracy')
plt.legend()
plt.show()

# Plot CNN Loss
Purpose: Plots the training and validation loss of the CNN over epochs.

In [None]:
plt.figure(figsize=(12, 6))
plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.title('CNN Training and Validation Loss')
plt.legend()
plt.show()

# Compare Model Accuracies
Purpose: Plots a bar chart comparing the test accuracies of all models (Decision Tree, Random Forest, XGBoost, Neural Network, CNN).

In [None]:
model_names = ['Decision Tree', 'Random Forest', 'XGBoost', 'Neural Network', 'CNN']
accuracy_scores = [accuracy_dt, accuracy_rf, accuracy_xgb, accuracy_nn, accuracy_cnn]

plt.figure(figsize=(10, 6))
plt.bar(model_names, accuracy_scores, color=['blue', 'green', 'red', 'purple', 'orange'])
plt.xlabel("Model")
plt.ylabel("Accuracy")
plt.title("Comparison of Model Accuracies (A-Z)")
plt.ylim(0, 1)
plt.show()

# Define Confusion Matrix Plotting Function
Purpose: Defines a function to plot a confusion matrix for model predictions, with true and predicted labels displayed as letters (A-Z).

In [None]:
def plot_confusion_matrix(y_true, y_pred, title):
    cm = confusion_matrix(y_true, y_pred)
    disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=[label_dictionary[i] for i in range(number_of_classes)])
    disp.plot(cmap='Blues')
    plt.title(title, fontsize=14)
    plt.xlabel("Predicted Label", fontsize=12)
    plt.ylabel("True Label", fontsize=12)
    plt.gcf().set_size_inches(14, 14)
    plt.xticks(fontsize=11)
    plt.yticks(fontsize=11)
    plt.tight_layout()
    plt.show()

# Plot Decision Tree Confusion Matrix

In [None]:
plot_confusion_matrix(y_test, y_pred_dt, "Confusion Matrix - Decision Tree")


# Plot Random Forest Confusion Matrix

In [None]:
plot_confusion_matrix(y_test, y_pred_rf, "Confusion Matrix - Random Forest")


# Plot XGBoost Confusion Matrix

In [None]:
plot_confusion_matrix(y_test, y_pred_xgb, "Confusion Matrix - XGBoost")


# Plot Neural Network Confusion Matrix


In [None]:
y_pred_nn = np.argmax(nn_model.predict(x_test_flat), axis=1)
plot_confusion_matrix(y_test, y_pred_nn, "Confusion Matrix - Neural Network")

# Plot CNN Confusion Matrix
Classifies all letters other than I and L almost perfectly misclassification in I and L is because of their shapes.

In [None]:
y_pred_cnn = np.argmax(cnn_model.predict(x_test_cnn), axis=1)
plot_confusion_matrix(y_test, y_pred_cnn, "Confusion Matrix - CNN")

# Define Class-wise Accuracy Plotting Function
Purpose: Defines a function to plot class-wise accuracy for each model, showing per-letter (A-Z) accuracy.

In [None]:
def plot_class_wise_accuracy(y_true, y_pred, model_name):
    cm = confusion_matrix(y_true, y_pred)
    correct_predictions = np.diag(cm)
    total_per_class = np.sum(cm, axis=1)
    class_wise_accuracy = correct_predictions / total_per_class
    letter_labels = [label_dictionary[i] for i in range(number_of_classes)]
    plt.figure(figsize=(12, 6))
    plt.bar(letter_labels, class_wise_accuracy, color=plt.cm.get_cmap('viridis')(np.linspace(0, 1, len(letter_labels))))
    plt.xlabel("Label (A-Z)", fontsize=12)
    plt.ylabel("Accuracy", fontsize=12)
    plt.title(f"Class-wise Accuracy - {model_name}", fontsize=14)
    plt.ylim(0, 1)
    plt.xticks(fontsize=11)
    plt.yticks(fontsize=11)
    plt.tight_layout()
    plt.show()

# Plot Decision Tree Class-wise Accuracy

In [None]:
plot_class_wise_accuracy(y_test, y_pred_dt, "Decision Tree")


# Plot Random Forest Class-wise Accuracy

In [None]:
plot_class_wise_accuracy(y_test, y_pred_rf, "Random Forest")


# Plot XGBoost Class-wise Accuracy

In [None]:
plot_class_wise_accuracy(y_test, y_pred_xgb, "XGBoost")


# Plot Neural Network Class-wise Accuracy

In [None]:
plot_class_wise_accuracy(y_test, y_pred_nn, "Neural Network")


# Plot CNN Class-wise Accuracy

In [None]:
plot_class_wise_accuracy(y_test, y_pred_cnn, "CNN")


# Visualize Misclassified Images
Purpose: Identifies images misclassified by both the CNN and Neural Network models, randomly selects 10, and displays them with their true and predicted labels for all models.

In [None]:
misclassified_indices = np.where((y_pred_cnn != y_test) & (y_pred_nn != y_test))[0]
selected_misclassified_indices = np.random.choice(misclassified_indices, 10, replace=False)

plt.figure(figsize=(20, 10))
for i, index in enumerate(selected_misclassified_indices):
    plt.subplot(2, 5, i + 1)
    image = x_test_flat[index].reshape(28, 28)
    plt.imshow(image, cmap='gray')
    true_label = label_dictionary[y_test[index]]
    pred_dt = label_dictionary[y_pred_dt[index]]
    pred_rf = label_dictionary[y_pred_rf[index]]
    pred_xgb = label_dictionary[y_pred_xgb[index]]
    pred_nn = label_dictionary[y_pred_nn[index]]
    pred_cnn = label_dictionary[y_pred_cnn[index]]
    plt.title(f"True: {true_label}\nDT: {pred_dt}, RF: {pred_rf}\nXGB: {pred_xgb}\nNN: {pred_nn}\nCNN: {pred_cnn}", fontsize=12)
    plt.axis('off')
plt.tight_layout()
plt.show()

# Visualize Correctly Classified Images
Purpose: Identifies images correctly classified by all models, randomly selects 10, and displays them with their true labels.

In [None]:
correctly_classified_indices = np.where((y_pred_dt == y_test) &
                                       (y_pred_rf == y_test) &
                                       (y_pred_xgb == y_test) &
                                       (y_pred_nn == y_test) &
                                       (y_pred_cnn == y_test))[0]
selected_correctly_classified_indices = np.random.choice(correctly_classified_indices, 10, replace=False)

plt.figure(figsize=(20, 10))
for i, index in enumerate(selected_correctly_classified_indices):
    plt.subplot(2, 5, i + 1)
    image = x_test_flat[index].reshape(28, 28)
    plt.imshow(image, cmap='gray')
    true_label = label_dictionary[y_test[index]]
    plt.title(f"True: {true_label}")
    plt.axis('off')
plt.tight_layout()
plt.show()

# Visualize Decision Tree Feature Importance
Purpose: Visualizes the feature importance of the Decision Tree model as a 28x28 heatmap, highlighting important pixels.

In [None]:
plt.figure(figsize=(8, 8))
dt_feature_importance = dt_model.feature_importances_.reshape(28, 28)
plt.imshow(dt_feature_importance, cmap='hot', interpolation='nearest')
plt.title("Decision Tree Feature Importance")
plt.colorbar()
plt.axis('off')
plt.show()

# Visualize Random Forest Feature Importance

In [None]:
plt.figure(figsize=(8, 8))
rf_feature_importance = rf_model.feature_importances_.reshape(28, 28)
plt.imshow(rf_feature_importance, cmap='hot', interpolation='nearest')
plt.title("Random Forest Feature Importance")
plt.colorbar()
plt.axis('off')
plt.show()

# Visualize XGBoost Feature Importance
Purpose: Visualizes the feature importance of the XGBoost model as a 28x28 heatmap.

In [None]:
plt.figure(figsize=(8, 8))
xgb_feature_importance = xgb_model.feature_importances_.reshape(28, 28)
plt.imshow(xgb_feature_importance, cmap='hot', interpolation='nearest')
plt.title("XGBoost Feature Importance")
plt.colorbar()
plt.axis('off')
plt.show()

**After you run the notebook fully and reach this cell you will find the best_model_cnn.h5 file in the output directory of this notebook in kaggle/working download that file if you dont already have it in the project that you downloaded from github and if you have it then well and fine. Just run the eminst_letter_gui and test the model for yourself.**