In [None]:
from google.colab import drive
drive.mount('/content/gdrive', force_remount=True)


Mounted at /content/gdrive


**Step 1: Read the CSV files for train, validation, and test sets**
*   Read the CSV files containing the class labels for the train, validation, and test sets.
*   Define the class labels for the classification task.
*   Retrieve the image file paths and labels from the CSV files for the train, validation, and test sets.







In [None]:
import cv2
import os
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelBinarizer

# Step 1: Read the CSV files for train, validation, and test sets
train_class_file = pd.read_csv('/content/gdrive/My Drive/TensorFlow/Data/classification/train/_classes.csv')
val_class_file = pd.read_csv('/content/gdrive/My Drive/TensorFlow/Data/classification/valid/_classes.csv')
test_class_file = pd.read_csv('/content/gdrive/My Drive/TensorFlow/Data/classification/test/_classes.csv')

class_labels = ['Abrasione', 'Ammaccatura', 'Crepa']

train_image_files = train_class_file['filename'].values
train_labels = train_class_file[['Abrasione', 'Ammaccatura', 'Crepa']].values

val_image_files = val_class_file['filename'].values
val_labels = val_class_file[['Abrasione', 'Ammaccatura', 'Crepa']].values

test_image_files = test_class_file['filename'].values
test_labels = test_class_file[['Abrasione', 'Ammaccatura', 'Crepa']].values






**Step 2: Preprocess the images**


*  Define a function preprocess_image to preprocess the images.

  *   Check if the image file exists.
  *   Read the image using OpenCV.
  *   Convert the image to grayscale.
  *   Resize the image to a target size.
  *   Normalize the pixel values.
  *   Add a single channel dimension to the image.
  *   Return the preprocessed image.
*   Specify the image directory paths for the train, validation, and test sets.
*   Load and preprocess the images for the train, validation, and test sets using the preprocess_image function.
* Dataset augmentation












In [None]:
def preprocess_image(file_path, target_size):
    if not os.path.isfile(file_path):
        raise FileNotFoundError(f"Image file not found: {file_path}")

    image = cv2.imread(file_path)
    if image is None:
        raise ValueError(f"Invalid image file: {file_path}")

    image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)  # Convert to grayscale
    image = cv2.resize(image, target_size)
    image = image.astype(np.float32) / 255.0  # Normalize pixel values
    image = np.expand_dims(image, axis=-1)  # Add a single channel dimension
    return image


# Specify the image directory paths
train_image_dir = '/content/gdrive/My Drive/TensorFlow/Data/classification/train/'
val_image_dir = '/content/gdrive/My Drive/TensorFlow/Data/classification/valid/'
test_image_dir = '/content/gdrive/My Drive/TensorFlow/Data/classification/test/'

# Load and preprocess images for train set
train_preprocessed_images = []
for file_name in train_image_files:
    file_path = os.path.join(train_image_dir, file_name)
    try:
        image = preprocess_image(file_path, (416, 416))
        train_preprocessed_images.append(image)
    except (FileNotFoundError, ValueError) as e:
        print(f"Error processing image {file_path}: {str(e)}")

train_preprocessed_images = np.array(train_preprocessed_images)

# Load and preprocess images for validation set
val_preprocessed_images = []
for file_name in val_image_files:
    file_path = os.path.join(val_image_dir, file_name)
    try:
        image = preprocess_image(file_path, (416, 416))
        val_preprocessed_images.append(image)
    except (FileNotFoundError, ValueError) as e:
        print(f"Error processing image {file_path}: {str(e)}")

val_preprocessed_images = np.array(val_preprocessed_images)

# Load and preprocess images for test set
test_preprocessed_images = []
for file_name in test_image_files:
    file_path = os.path.join(test_image_dir, file_name)
    try:
        image = preprocess_image(file_path, (416, 416))
        test_preprocessed_images.append(image)
    except (FileNotFoundError, ValueError) as e:
        print(f"Error processing image {file_path}: {str(e)}")

test_preprocessed_images = np.array(test_preprocessed_images)


In [None]:
from imgaug import augmenters as iaa

def augment_image(image):
    # Define the image augmentations
    seq = iaa.Sequential([
        iaa.Fliplr(0.5),  # Horizontal flip with 50% probability
        iaa.Affine(
            rotate=(-20, 20),  # Rotate by -20 to 20 degrees
            scale=(0.8, 1.2),  # Scale by 0.8 to 1.2
            shear=(-0.2, 0.2),  # Shear by -0.2 to 0.2
        ),
    ])

    # Apply the image augmentations to the image
    augmented_image = seq.augment_image(image)

    return augmented_image


# Augment the train images
augmented_train_images = []
for image in train_preprocessed_images:
    augmented_image = augment_image(image)
    augmented_train_images.append(augmented_image)

train_preprocessed_images = np.array(augmented_train_images)


**Step 3: Encode the labels**


*   Use LabelBinarizer from scikit-learn to encode the class labels into binary vectors.

*   Encode the labels for the train, validation, and test sets.




In [None]:
lb = LabelBinarizer()

encoded_train_labels = lb.fit_transform(train_labels)
encoded_val_labels = lb.transform(val_labels)
encoded_test_labels = lb.transform(test_labels)



# Making sure our dataset has been preproccessed correctly.



In [None]:
# Print the encoded labels for train, validation, and test sets
print("Encoded Train Labels:")
print(encoded_train_labels)
print("Encoded Validation Labels:")
print(encoded_val_labels)
print("Encoded Test Labels:")
print(encoded_test_labels)


Encoded Train Labels:
[[0 0 1]
 [0 1 0]
 [0 0 1]
 [0 0 1]
 [1 0 0]
 [0 0 1]
 [1 0 0]
 [1 0 1]
 [0 1 1]
 [1 0 1]
 [0 0 1]
 [1 0 1]
 [0 0 1]
 [1 0 1]
 [0 0 1]
 [0 0 1]
 [0 0 1]
 [0 1 0]
 [1 0 1]
 [0 0 1]
 [1 0 1]
 [0 0 1]
 [0 1 1]
 [0 0 1]
 [1 0 0]
 [1 0 0]
 [0 0 1]
 [1 0 1]]
Encoded Validation Labels:
[[1 0 0]
 [1 0 0]
 [0 1 1]
 [0 0 1]
 [0 0 1]
 [0 0 1]
 [0 1 0]
 [0 0 1]]
Encoded Test Labels:
[[0 1 0]
 [0 1 0]
 [0 0 1]
 [1 0 1]]


**We print the image name and the corresponding encoded labels**

In [None]:
for filename, encoded_label in zip(train_preprocessed_images, encoded_train_labels):
    print(f"Image: {filename}, Encoded Label: {encoded_label}")


Image: [[[0.66899514]
  [0.6672871 ]
  [0.66506594]
  ...
  [0.28104323]
  [0.28936505]
  [0.2952359 ]]

 [[0.6683326 ]
  [0.6668582 ]
  [0.6671952 ]
  ...
  [0.26953125]
  [0.28278953]
  [0.2920956 ]]

 [[0.670324  ]
  [0.6675016 ]
  [0.67001384]
  ...
  [0.26678923]
  [0.28280485]
  [0.2916667 ]]

 ...

 [[0.6556449 ]
  [0.6474265 ]
  [0.64395684]
  ...
  [0.35179228]
  [0.3466146 ]
  [0.349303  ]]

 [[0.65621936]
  [0.65159315]
  [0.6473575 ]
  ...
  [0.36577052]
  [0.35588238]
  [0.3540441 ]]

 [[0.6593597 ]
  [0.6613971 ]
  [0.654722  ]
  ...
  [0.36456805]
  [0.36202514]
  [0.3629098 ]]], Encoded Label: [0 0 1]
Image: [[[0.]
  [0.]
  [0.]
  ...
  [0.]
  [0.]
  [0.]]

 [[0.]
  [0.]
  [0.]
  ...
  [0.]
  [0.]
  [0.]]

 [[0.]
  [0.]
  [0.]
  ...
  [0.]
  [0.]
  [0.]]

 ...

 [[0.]
  [0.]
  [0.]
  ...
  [0.]
  [0.]
  [0.]]

 [[0.]
  [0.]
  [0.]
  ...
  [0.]
  [0.]
  [0.]]

 [[0.]
  [0.]
  [0.]
  ...
  [0.]
  [0.]
  [0.]]], Encoded Label: [0 1 0]
Image: [[[0.]
  [0.]
  [0.]
  ...
  [0


**Step 4: Commencing the training!**

My idea is to do a comparison between a pretrained VGG16, LeNet-5 model architecture and a custom CNN architectured that I configuered.




**VGG16 Model Training**

*   Convert the preprocessed train images to RGB format for compatibility with the VGG16 model.
*   Load the pre-trained VGG16 model from TensorFlow's keras.applications module.
*   Freeze the pre-trained layers of the VGG16 model.
*   Add custom classification layers on top of the VGG16 model.
*   Compile the model with the Adam optimizer and categorical cross-entropy loss.
*   Train the model using the preprocessed train images and encoded train labels.







In [None]:
import tensorflow as tf
from tensorflow.keras.applications import VGG16
from tensorflow.keras import layers

#Converting the images to rgb for the vgg16 model
train_preprocessed_images_rgb = np.repeat(train_preprocessed_images, 3, axis=-1)


# Load the pre-trained VGG16 model
vgg16 = VGG16(weights='imagenet', include_top=False, input_shape=(416, 416, 3))

# Freeze the pre-trained layers
for layer in vgg16.layers:
    layer.trainable = False

# Add custom classification layers
modelVGG = tf.keras.Sequential()
modelVGG.add(vgg16)
modelVGG.add(layers.Flatten())
modelVGG.add(layers.Dense(256, activation='relu'))
modelVGG.add(layers.Dense(3, activation='softmax'))

# Compile the model
modelVGG.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Train the model
num_epochs = 10
batch_size = 32

modelVGG.fit(train_preprocessed_images_rgb, encoded_train_labels, epochs=num_epochs, batch_size=batch_size)


Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/vgg16/vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7f9feaa65f60>

**LeNet-5 Model Training**
*   Define the LeNet-5 model architecture using Conv2D, MaxPooling2D, Flatten, and Dense layers.
*   Compile the model with the Adam optimizer and categorical cross-entropy loss.
*   Train the model using the preprocessed train images and encoded train labels.





In [None]:
import tensorflow as tf
from tensorflow.keras import layers

# LeNet-5 model architecture
model_LeNet = tf.keras.Sequential()
model_LeNet.add(layers.Conv2D(6, kernel_size=(5, 5), activation='relu', input_shape=(416, 416, 1)))
model_LeNet.add(layers.MaxPooling2D(pool_size=(2, 2)))
model_LeNet.add(layers.Conv2D(16, kernel_size=(5, 5), activation='relu'))
model_LeNet.add(layers.MaxPooling2D(pool_size=(2, 2)))
model_LeNet.add(layers.Flatten())
model_LeNet.add(layers.Dense(120, activation='relu'))
model_LeNet.add(layers.Dense(84, activation='relu'))
model_LeNet.add(layers.Dense(3, activation='softmax'))  # Modify the number of classes here

# Compile the model
model_LeNet.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Train the model
num_epochs = 10
batch_size = 32

model_LeNet.fit(train_preprocessed_images, encoded_train_labels, epochs=num_epochs, batch_size=batch_size)


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7f9fd4400310>

**Custom CNN Architecture Model Training**

*   Define a custom model architecture using Conv2D, MaxPooling2D, Flatten, and Dense layers.
*   Compile the model with the Adam optimizer and binary cross-entropy loss.
*   Train the model using the preprocessed train images and encoded train labels.



In [None]:
image_height = 416
image_width = 416
num_labels = 3
channels = 1


model = tf.keras.Sequential()
model.add(layers.Conv2D(32, (3, 3), activation='relu', input_shape=(image_height, image_width, channels)))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(64, (3, 3), activation='relu'))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(128, (3, 3), activation='relu'))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Flatten())
model.add(layers.Dense(128, activation='relu'))
model.add(layers.Dense(num_labels, activation='sigmoid'))

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
batch_size = 32
num_epochs = 10


model.fit(train_preprocessed_images, encoded_train_labels, epochs=num_epochs, batch_size=batch_size)


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7f9fe882f010>

**Step 5: Model Evaluation and Metrics**
*   Evaluate all three models on the preprocessed validation images and encoded validation labels.
*   Make predictions on the preprocessed test images using the trained models.
*   Decode the model's output to obtain predicted labels.
*   Calculate accuracy, precision, recall, and F1-score for each class.
*   Print the classification report containing the evaluation metrics.


In [None]:
afrom sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

val_preproccessed_images_rgb = np.repeat(val_preprocessed_images, 3, axis=-1)
# Evaluate the model
val_loss_VGG, val_accuracy_VGG = modelVGG.evaluate(val_preproccessed_images_rgb, encoded_val_labels)
print("Validation Loss:", val_loss_VGG)
print("Validation Accuracy:", val_accuracy_VGG)

# Make predictions on test data
test_predictions = model.predict(test_preprocessed_images)
# Decode the model's output to obtain predicted labels
predicted_labels = lb.inverse_transform(test_predictions)

import numpy as np

# Convert encoded_test_labels to multiclass format
test_labels = np.argmax(encoded_test_labels, axis=1)

# Convert test_predictions to multiclass format
predicted_classes = np.argmax(test_predictions, axis=1)

# Calculate accuracy
accuracy_VGG = accuracy_score(test_labels, predicted_classes)
print("Accuracy:", accuracy_VGG)


# Calculate precision, recall, and F1-score for each class
precision_VGG = precision_score(test_labels, predicted_classes, average=None, zero_division=1)
recall_VGG = recall_score(test_labels, predicted_classes, average=None)
f1_VGG = f1_score(test_labels, predicted_classes, average=None)

# Print classification report
for i, class_label in enumerate(class_labels):
    print("Class:", class_label)
    print("Precision:", precision_VGG[i])
    print("Recall:", recall_VGG[i])
    print("F1-score:", f1_VGG[i])
    print()


Validation Loss: 49.077659606933594
Validation Accuracy: 0.5
Accuracy: 0.5
Class: Abrasione
Precision: 1.0
Recall: 1.0
F1-score: 1.0

Class: Ammaccatura
Precision: 1.0
Recall: 0.0
F1-score: 0.0

Class: Crepa
Precision: 0.3333333333333333
Recall: 1.0
F1-score: 0.5



**LeNet_5 Model Evaluation**


In [None]:

from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# Evaluate the model
val_loss_LeNet, val_accuracy_LeNet = model_LeNet.evaluate(val_preprocessed_images, encoded_val_labels)
print("Validation Loss:", val_loss_LeNet)
print("Validation Accuracy:", val_accuracy_LeNet)

# Make predictions on test data
test_predictions = model.predict(test_preprocessed_images)
# Decode the model's output to obtain predicted labels
predicted_labels = lb.inverse_transform(test_predictions)

import numpy as np

# Convert encoded_test_labels to multiclass format
test_labels = np.argmax(encoded_test_labels, axis=1)

# Convert test_predictions to multiclass format
predicted_classes = np.argmax(test_predictions, axis=1)

# Calculate accuracy
accuracy_LeNet = accuracy_score(test_labels, predicted_classes)
print("Accuracy:", accuracy_LeNet)

# Calculate precision, recall, and F1-score for each class
precision_LeNet= precision_score(test_labels, predicted_classes, average=None, zero_division=1)
recall_LeNet = recall_score(test_labels, predicted_classes, average=None)
f1_LeNet = f1_score(test_labels, predicted_classes, average=None)

# Print classification report
for i, class_label in enumerate(class_labels):
    print("Class:", class_label)
    print("Precision:", precision_LeNet[i])
    print("Recall:", recall_LeNet[i])
    print("F1-score:", f1_LeNet[i])
    print()


Validation Loss: 38.67838668823242
Validation Accuracy: 0.5
Accuracy: 0.5
Class: Abrasione
Precision: 1.0
Recall: 1.0
F1-score: 1.0

Class: Ammaccatura
Precision: 1.0
Recall: 0.0
F1-score: 0.0

Class: Crepa
Precision: 0.3333333333333333
Recall: 1.0
F1-score: 0.5



**Custom CNN Architecture Evaluation**


In [None]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# Evaluate the model
val_loss, val_accuracy = model.evaluate(val_preprocessed_images, encoded_val_labels)
print("Validation Loss:", val_loss)
print("Validation Accuracy:", val_accuracy)

# Make predictions on test data
test_predictions = model.predict(test_preprocessed_images)
# Decode the model's output to obtain predicted labels
predicted_labels = lb.inverse_transform(test_predictions)

import numpy as np

# Convert encoded_test_labels to multiclass format
test_labels = np.argmax(encoded_test_labels, axis=1)

# Convert test_predictions to multiclass format
predicted_classes = np.argmax(test_predictions, axis=1)

# Calculate accuracy
accuracy = accuracy_score(test_labels, predicted_classes)
print("Accuracy:", accuracy)

# Calculate precision, recall, and F1-score for each class
precision = precision_score(test_labels, predicted_classes, average=None, zero_division=1)
recall = recall_score(test_labels, predicted_classes, average=None)
f1 = f1_score(test_labels, predicted_classes, average=None)

# Print classification report
for i, class_label in enumerate(class_labels):
    print("Class:", class_label)
    print("Precision:", precision[i])
    print("Recall:", recall[i])
    print("F1-score:", f1[i])
    print()


Validation Loss: 0.47248631715774536
Validation Accuracy: 0.5
Accuracy: 0.5
Class: Abrasione
Precision: 1.0
Recall: 1.0
F1-score: 1.0

Class: Ammaccatura
Precision: 1.0
Recall: 0.0
F1-score: 0.0

Class: Crepa
Precision: 0.3333333333333333
Recall: 1.0
F1-score: 0.5



**Step 6: Model Comparison**



In [None]:
import pandas as pd

# Create an empty dataframe
df = pd.DataFrame(columns=['Model', 'Validation Loss', 'Validation Accuracy', 'Accuracy', 'Precision', 'Recall', 'F1-score'])

# Model VGG16
model1_name = 'Model VGG16'
model1_val_loss = val_loss_VGG
model1_val_accuracy = val_accuracy_VGG
model1_accuracy = accuracy_VGG
model1_precision = precision_VGG
model1_recall = recall_VGG
model1_f1 = f1_VGG

# Add Model VGG16 results to the dataframe
df = df.append({'Model': model1_name,
                'Validation Loss': model1_val_loss,
                'Validation Accuracy': model1_val_accuracy,
                'Accuracy': model1_accuracy,
                'Precision': model1_precision,
                'Recall': model1_recall,
                'F1-score': model1_f1}, ignore_index=True)

# Model LeNet
model2_name = 'Model LeNet'
model2_val_loss = val_loss_LeNet
model2_val_accuracy = val_accuracy_LeNet
model2_accuracy = accuracy_LeNet
model2_precision = precision_LeNet
model2_recall = recall_LeNet
model2_f1 = f1_LeNet

# Add Model LeNet results to the dataframe
df = df.append({'Model': model2_name,
                'Validation Loss': model2_val_loss,
                'Validation Accuracy': model2_val_accuracy,
                'Accuracy': model2_accuracy,
                'Precision': model2_precision,
                'Recall': model2_recall,
                'F1-score': model2_f1}, ignore_index=True)

# Model Custom CNN
model3_name = 'Custom CNN'
model3_val_loss = val_loss
model3_val_accuracy = val_accuracy
model3_accuracy = accuracy
model3_precision = precision
model3_recall = recall
model3_f1 = f1

# Add Model Custom CNN results to the dataframe
df = df.append({'Model': model3_name,
                'Validation Loss': model3_val_loss,
                'Validation Accuracy': model3_val_accuracy,
                'Accuracy': model3_accuracy,
                'Precision': model3_precision,
                'Recall': model3_recall,
                'F1-score': model3_f1}, ignore_index=True)

# Display the dataframe
print(df)


         Model  Validation Loss  Validation Accuracy  Accuracy  \
0  Model VGG16        49.077660                  0.5       0.5   
1  Model LeNet        38.678387                  0.5       0.5   
2   Custom CNN         0.472486                  0.5       0.5   

                        Precision           Recall         F1-score  
0  [1.0, 1.0, 0.3333333333333333]  [1.0, 0.0, 1.0]  [1.0, 0.0, 0.5]  
1  [1.0, 1.0, 0.3333333333333333]  [1.0, 0.0, 1.0]  [1.0, 0.0, 0.5]  
2  [1.0, 1.0, 0.3333333333333333]  [1.0, 0.0, 1.0]  [1.0, 0.0, 0.5]  


  df = df.append({'Model': model1_name,
  df = df.append({'Model': model2_name,
  df = df.append({'Model': model3_name,


### **Results**

Given the dataset is too small, with a test set of less than 5 images. It is clear that the custom CNN architecture I defined has performed best, the reason it is because I had to experiment with a lot of different configurations and number of layers, and it has performed best on the training metrics. but since the test set is very very small, percision, recall and f1 scores are the same. However, after adding image augmentation, the results have even improved drastically.

