In [1]:
# Mount Google drive
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [12]:
# Import necessary libraries and packages
import os
import cv2
import pandas as pd
import numpy as np

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Input, Dense, Conv2D, MaxPool2D, Flatten, Dropout, BatchNormalization
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau

from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.utils.class_weight import compute_class_weight

In [13]:
# Set directory path
data_dir= '/content/drive/My Drive/CNN-MultiClass-Classification/Data/training_data'

In [14]:
# Define a function to get the data from the directory
def get_data(data_dir, img_size=224):
    '''
    Loads images from the directory and assigns labels according to their folder name.
    Args:
    - data_dir: Directory where all classes will have folders with images.
    - img_size: Size to which images will be resized

    Returns:
    - images: A list of the image arrays.
    - labels: A list of corresponding labels.
    '''

    # List of class labels
    labels= ['driving_license', 'social_security', 'others']

    # Lists to store the image data and labels separately
    images= []
    image_labels= []

    # Loop through each class label folder and load the images
    for label in labels:
        path= os.path.join(data_dir, label)
        class_num= labels.index(label)

        for img in os.listdir(path):
            try:

                # Convert BGR to RGB
                img_arr= cv2.imread(os.path.join(path, img))[..., ::-1]

                # Resize the image
                resized_arr= cv2.resize(img_arr, (img_size, img_size))

                # Append the image to the list
                images.append(resized_arr)

                # Append the label (0, 1, or 2)
                image_labels.append(class_num)
            except Exception as e:
                print(f'Error loading image {img}: {e}')

    return np.array(images), np.array(image_labels)

In [15]:
# Define the ImageDataGenerator for data augmentation and split the data
datagen= ImageDataGenerator(
    rotation_range= 45,
    zoom_range= 0.3,
    width_shift_range= 0.2,
    height_shift_range= 0.2,
    horizontal_flip= True,
    validation_split= 0.2
)

In [16]:
# Load the dataset. X contains the images, y contains the labels
X, y= get_data(data_dir)

# Normalize the image data to range [0,1]
X = X / 255.0

# Split the data into training and validation sets
X_train, X_val, y_train, y_val= train_test_split(X, y, test_size= 0.2, stratify= y, random_state= 42)

# Verify the shapes of the datasets
print(f'X_train Shape: {X_train.shape}, y_train Shape: {y_train.shape}')
print(f'X_val Shape: {X_val.shape}, y_val Shape: {y_val.shape}')

X_train Shape: (480, 224, 224, 3), y_train Shape: (480,)
X_val Shape: (120, 224, 224, 3), y_val Shape: (120,)


### Data Summary:

- **X_train Shape**: (480, 224, 224, 3) — 480 training images (224x224, RGB), where `3` is the number of color channels.
- **y_train Shape**: (480,) — 480 corresponding labels for training images (3 classes: driving_license, social_security, others).
- **X_val Shape**: (120, 224, 224, 3) — 120 validation images (224x224, RGB), where `3` is the number of color channels.
- **y_val Shape**: (120,) — 120 corresponding labels for validation images (3 classes: driving_license, social_security, others).


In [25]:
# Build the CNN model
model= Sequential()

# Input layer
model.add(Input(shape= (224,224,3)))

# First convolutional layer
model.add(Conv2D(32, 3, padding= 'same', activation= 'relu'))

# Batch normalization to normalize the inputs to the next layer
model.add(BatchNormalization())

# Max pooling layer to reduce spacial dimensions of the feature map
model.add(MaxPool2D())

# Dropout layer to prevent overfitting
model.add(Dropout(0.4))

# Flatten the feature map into a 1D vector for the fully connected layer
model.add(Flatten())

# Fully connected layer
model.add(Dense(128, activation= 'relu'))

# Output layer with 3 units (one for each class) and softmax activation for multi-class classification
model.add(Dense(3, activation= 'softmax'))

# Compile the model
model.compile(optimizer= Adam(learning_rate= 0.00001), loss= tf.keras.losses.SparseCategoricalCrossentropy(from_logits= True), metrics= ['accuracy'])

In [26]:
# Calculate the class weights
class_weights= compute_class_weight('balanced', classes= np.unique(y_train), y= y_train)

# Convert to a dictionary format as required by Keras
class_weights_dict= {i: class_weights[i] for i in range(len(class_weights))}

print(f'Class Weights; {class_weights_dict}')

# Use the ImageDataGenerator to fit the model
train_gen= datagen.flow(X_train, y_train, batch_size= 32, subset= 'training')
val_gen= datagen.flow(X_val, y_val, batch_size= 32, subset= 'validation')

# Early stopping
early_stop= EarlyStopping(monitor= 'val_loss', patience= 10, restore_best_weights= True)

# Learning rate scheduler
lr_scheduler= ReduceLROnPlateau(monitor= 'val_loss', factor= 0.1, patience= 5)

# Train the model
history= model.fit(train_gen, epochs= 200, validation_data= val_gen, callbacks= [early_stop, lr_scheduler], class_weight= class_weights_dict)

Class Weights; {0: 1.0, 1: 1.0, 2: 1.0}
Epoch 1/200


  output, from_logits = _get_logits(
  self._warn_if_super_not_called()


[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 251ms/step - accuracy: 0.4542 - loss: 1.4056 - val_accuracy: 0.2917 - val_loss: 1.9146 - learning_rate: 1.0000e-05
Epoch 2/200
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 172ms/step - accuracy: 0.5574 - loss: 1.0808 - val_accuracy: 0.2917 - val_loss: 1.5614 - learning_rate: 1.0000e-05
Epoch 3/200
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 153ms/step - accuracy: 0.6495 - loss: 0.8894 - val_accuracy: 0.2917 - val_loss: 1.6398 - learning_rate: 1.0000e-05
Epoch 4/200
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 179ms/step - accuracy: 0.6821 - loss: 0.8143 - val_accuracy: 0.2917 - val_loss: 1.5118 - learning_rate: 1.0000e-05
Epoch 5/200
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 149ms/step - accuracy: 0.7053 - loss: 0.6833 - val_accuracy: 0.2917 - val_loss: 1.7403 - learning_rate: 1.0000e-05
Epoch 6/200
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[

In [27]:
# Evaluate the model on the validation data
val_preds= np.argmax(model.predict(X_val), axis= -1)

# Print classification report and confusion matrix
print(classification_report(y_val, val_preds, target_names= ['driving_license', 'social_security', 'others']))
print(confusion_matrix(y_val, val_preds))

[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 53ms/step
                 precision    recall  f1-score   support

driving_license       0.89      0.85      0.87        40
social_security       0.76      0.95      0.84        40
         others       0.97      0.78      0.86        40

       accuracy                           0.86       120
      macro avg       0.87      0.86      0.86       120
   weighted avg       0.87      0.86      0.86       120

[[34  5  1]
 [ 2 38  0]
 [ 2  7 31]]


### Final Model Performance Analysis:

The model's performance on the validation set is summarized below:

- **Overall Accuracy**: The model achieved an accuracy of **86%** on the validation set. This indicates that 86% of the predictions made by the model were correct.

#### Class-wise Performance:
1. **Driving License**:
   - **Precision**: 0.89 — The model correctly identified 89% of the `driving_license` cases it predicted. A few misclassifications occurred, but the precision is strong.
   - **Recall**: 0.85 — The model was able to correctly detect 85% of all actual `driving_license` cases. This indicates a slight miss in identifying some of the true positive cases.
   - **F1-Score**: 0.87 — The balance between precision and recall for this class is very good, reflecting a solid overall performance.

2. **Social Security**:
   - **Precision**: 0.76 — The model is less confident in predicting `social_security`, as 24% of the cases it predicted were incorrect (false positives).
   - **Recall**: 0.95 — The recall is very high, meaning the model successfully identified 95% of the actual `social_security` cases. This shows a strong ability to detect most true positives, though the lower precision suggests it is still misclassifying some other classes as `social_security`.
   - **F1-Score**: 0.84 — Despite the lower precision, the F1-score remains strong due to the high recall.

3. **Others**:
   - **Precision**: 0.97 — The model was highly accurate in predicting `others` when it made a prediction, with only a few false positives.
   - **Recall**: 0.78 — The model correctly identified 78% of all actual `others` cases. However, it missed some true cases, as indicated by the lower recall.
   - **F1-Score**: 0.86 — A balanced performance overall for the `others` class, but there is still room for improvement in recall.

#### Confusion Matrix:
The confusion matrix provides further insight into the model’s performance:

\[
\begin{bmatrix}
  34 & 5 & 1 \\
  2 & 38 & 0 \\
  2 & 7 & 31 \\
\end{bmatrix}
\]

- The diagonal values (34, 38, and 31) represent the number of correct predictions for each class.
- The model misclassified 5 instances of `driving_license` as `social_security` and 1 as `others`.
- Similarly, 7 instances of `others` were misclassified as `social_security`.

### Conclusion:
The final model performs well overall, with strong accuracy across all classes. The model has a high recall for the `social_security` class but slightly lower precision, indicating room for improvement in minimizing false positives for this class. The `others` class also shows strong precision, but its recall could be further improved.



In [29]:
# Save the model
model.save('model.keras')