### Downloading Zipped Dataset

In [None]:
!wget -O Indian-Bird-Species.zip "https://www.dropbox.com/scl/fi/2054v5dycvdjy37gmt9ex/Indian-Bird-Species.zip?rlkey=q1siybqz3tzp4xtvyj35vkum4&dl=0"

### Creating Directory to Move Unzipped Data to this Directory

In [None]:
!mkdir Indian-Bird-Species

### Unzipping Dataset

In [None]:
!unzip Indian-Bird-Species.zip -d Indian-Bird-Species/

# VGG-19 Implementation

## Importing Dependencies

In [None]:
# Import OS to interact with the operating system
import os

# Import OpenCV, which is a Computer Vision library, and here we use it to deal with our image dataset
import cv2

# Import NumPy as our model trains on arrays which will be handled by NumPy
import numpy as np

# Import mayplotlib and seaborn to visualize the metrics of our model
import matplotlib.pyplot as plt
import seaborn as sns

# Import Shutil to move, copy and delete directories and subdirectories
import shutil

# Import Math to use some mathematical computations
import math

# Import Keras ImageDataGenerator which is used for getting the input of the original data and further, it makes the transformation of this data on a random basis and gives the output resultant containing only the data that is newly transformed.
from keras.preprocessing.image import ImageDataGenerator

from keras.layers import Input, Conv2D, MaxPooling2D, Flatten, Dense

from keras.models import Model

# Import EarlyStopping and ModelCheckpoint
# EarlyStopping is used to halt the learning of the used when the model's accuracy does not improve by any significant amount
# ModelCheckpoint is used to save the model or weights in a checkpoint file at some time interval
from keras.callbacks import EarlyStopping, ModelCheckpoint

# Import categorical_crossentropy
# categorical cross entropy compares each of the predicted probabilities to actual class output. It then calculates the score that penalizes the probabilities based on the distance from the expected value. That means how close or far from the actual value.
from keras.losses import categorical_crossentropy

# Import scikit-learn metrics which will be used to display the metrics of the model
from sklearn.metrics import classification_report, confusion_matrix, ConfusionMatrixDisplay, precision_recall_curve, average_precision_score, roc_curve, auc

## Splitting Dataset into Training, Validation and Testing

In [None]:
DATASET_PATH = "Indian-Bird-Species"

In [None]:
def count_files_in_each_directory(directory_path: str) -> dict:
    """
    This function counts the number of images or files in each of the directories or classes of the dataset folder

    Arguments:
        directory_path: The path of the folder or directory that contains the required classes

    Returns:
        number_of_images: Number of images or files present in each class
    """
    number_of_images = {}
    
    # label the number of images in each class of our dataset
    for sub_directory in os.listdir(directory_path):
        # os.listdir() is used to list or count the number of images in each directory of dataset
        # os.path.join() is used to join the parent directory, any subdirectory and the contents of the directory
        number_of_images[sub_directory] = len(os.listdir(os.path.join(directory_path, sub_directory)))
    
    return number_of_images

In [None]:
dataset_images = count_files_in_each_directory(DATASET_PATH)
print(dataset_images)

Randomly display any image from each class of the dataset

In [None]:
# listing all the subdirectories in our main dataset directory
for bird_directory in os.listdir(DATASET_PATH):
    # picking a random image from each class of the dataset
    for img in np.random.choice(os.listdir(os.path.join(DATASET_PATH, bird_directory)), size = 2):
        # read each image with the help of OpenCV
        image = cv2.imread(os.path.join(DATASET_PATH, bird_directory, img))
        # plot the image using matplotlib
        plt.imshow(image); plt.axis("off")
        # give image a title using matplotlib
        plt.title(bird_directory)
        # show or display the image using matplotlib
        plt.show()

***Listing the number of images in each class of our dataset***

In [None]:
def list_images_in_each_directory(number_of_images: dict, folder_name: str) -> None:
    """
    This function prints a list of the total images present in each class.

    Arguments:
        number_of_images: A dictionary containing the counts of the images present in each directory.
        folder_name: The folder name in which the classes are present
    """
    print(f"The {folder_name} folder contains the following:")

    for i, each_class in enumerate(number_of_images):
        listing = f"{i + 1})"
        print(f"{listing:<2} {number_of_images[each_class]} images of {each_class}")

In [None]:
list_images_in_each_directory(dataset_images, DATASET_PATH)

### Declaring and Defining our Custom Function for Splitting the Dataset

In [None]:
def data_folder(folder_name: str, split_ratio: float) -> None:
    """
    This function will split the dataset in a given number of new folders, namely, Training, Testing and Validation in a given ratio.
    Such that Training:Testing:Validation = x:y:z
    Where,
        x is the number of images in Training Folder
        y is the number of images in Testing Folder
        z is the number of images in Validation Folder

    Arguments:
        folder_name: Name of the folder created for splitting the dataset
        split_ratio: Percentage of images of the original dataset for every split folder_name
    """

    # checking if the folder does not already exist
    if not os.path.exists("./" + folder_name):
        # if the folder doesn't already exist, then create that folder
        print(f"Creating {folder_name} folder...")
        os.mkdir("./" + folder_name)

        # listing all the subdirectories in our main dataset directory
        for sub_directory in os.listdir(DATASET_PATH):
            # checking if that subdirectory for this folder does not already exist
            if not os.path.exists("./" + folder_name + "/" + sub_directory):
                # if it does not already exist, then create that subdirectory for this folder
                print(f"Creating {sub_directory} directory for {folder_name} folder...")
                os.makedirs("./" + folder_name + "/" + sub_directory)
    
                # picking random images from each class of the dataset and copying it to the Training, Testing or Validation folder
                # size for each directory is the product of number of images in each class and the ratio of train, test, validate folders
                # For example: if Buffalo contains 1000 images and ratio train:test:validate = 70:15:15, then Testing folder will contain 700 images, Testing folder 150 images and Validation Folder 150 images
                for file in np.random.choice(a = os.listdir(os.path.join(DATASET_PATH, sub_directory)), size = (math.floor(split_ratio * dataset_images[sub_directory])), replace = False):

                    # pathname of original dataset
                    O = os.path.join(DATASET_PATH, sub_directory, file)

                    # pathname of split dataset
                    D = os.path.join("./" + folder_name, sub_directory)

                    # copy each image from the original dataset path to a split dataset path
                    shutil.copy(O, D)
            else:
                print(f"Can't create {sub_directory} as it already exists in the {folder_name} folder.")
    else:
        print(f"Can't create {folder_name} folder as it already exists.")

***Creating Training Folder***

In [None]:
data_folder("Training", 0.7)

***Creating Validation Folder***

In [None]:
data_folder("Validation", 0.15)

***Creating Testing Folder***

In [None]:
data_folder("Testing", 0.15)

In [None]:
TRAIN_PATH = "Training"

In [None]:
TEST_PATH = "Testing"

In [None]:
VALIDATE_PATH = "Validation"

***Listing the number of images in each class of our Training Folder***

In [None]:
train_images = count_files_in_each_directory(TRAIN_PATH)
list_images_in_each_directory(train_images, TRAIN_PATH)

***Listing the number of images in each class of our Validation Folder***

In [None]:
test_images = count_files_in_each_directory(TEST_PATH)
list_images_in_each_directory(test_images, TEST_PATH)

***Listing the number of images in each class of our Testing Folder***

In [None]:
validate_images = count_files_in_each_directory(VALIDATE_PATH)
list_images_in_each_directory(validate_images, VALIDATE_PATH)

## Preprocessing Images

**ImageDataGenerator()** is used to perform augmentation on a given image. Augmentation means that the images are duplicated with some kind of variations that increase the size of the training set without acquiring new images.
Augmentation (or variations) that we performed here are the following:
- *zoom_range:* zoom in or zoom out images in a given range
- *width_shift_range:* shift the images horizontally in a given range
- *height_shift_range:* shift the images vertically in a given range
- *shear_range:* compress vertically or horizontally in a given range, the original image is somewhat distorted.

In [None]:
train_data_generator = ImageDataGenerator(
    zoom_range = 0.15,
    width_shift_range = 0.2,
    height_shift_range = 0.2,
    shear_range = 0.15
)

validate_data_generator = ImageDataGenerator()

test_data_generator = ImageDataGenerator()

***flow_from_directory()*** takes the path to a directory & generates batches of augmented data. <br>
**Arguments:**
- *directory:* string, or path to the directory.
- *target_size:* Tuple of integers (height, width), defaults to (256,256). The dimensions to which all images found will be resized.
- *batch_size:* Size of the batches of data (default: 32).
- *shuffle:* Whether to shuffle the data (default: True) If set to False, sorts the data in alphanumeric order.
- *class_mode:* One of "categorical", "binary", "sparse", "input", or None. Default: "categorical". Determines the type of label arrays that are returned:
    - "categorical" will be 2D one-hot encoded labels,
    - "binary" will be 1D binary labels, "sparse" will be 1D integer labels

In [None]:
# TODO: play around with 'batch_size': try with different batch sizes of 16, 64, etc.

train_generator = train_data_generator.flow_from_directory(
    directory = TRAIN_PATH,
    target_size = (224, 224),
    batch_size = 32,
    shuffle = True,
    class_mode = 'categorical'
)

In [None]:
validate_generator = validate_data_generator.flow_from_directory(
    directory = VALIDATE_PATH,
    target_size = (224, 224),
    batch_size = 32,
    shuffle = False,
    class_mode='categorical'
)

In [None]:
test_generator = test_data_generator.flow_from_directory(
    directory = TEST_PATH,
    target_size = (224, 224),
    batch_size = 32,
    shuffle = False,
    class_mode='categorical'
)

In [None]:
labels = list(train_generator.class_indices.keys())
NUMBER_OF_CLASSES = len(labels)

print(f"The dataset contains {NUMBER_OF_CLASSES} labels and these are:")
print(labels)

## Building VGG Model (19 Layers)

In [None]:
def VGG19(input_shape=(224, 224, 3) | tuple):
    """
    Implementation of the popular VGG-19 with the following architecture:
        CONV2D * 2 -> MAX-POOL -> CONV2D * 2 -> MAX-POOL -> CONV2D * 4 -> MAX-POOL -> CONV2D * 4 -> MAX-POOL -> CONV2D * 4 -> MAX-POOL

    Arguments:
        input_shape: shape of the dataset images

    Returns:
        model: a Model() instance in Keras
    """

    # Define the input as a tensor with shape input_shape
    X_input = Input(input_shape)

    # BLOCK 1
    X = Conv2D(64, (3, 3), activation='relu', padding='same', name='block1_conv1')(X_input)     # layer 1
    X = Conv2D(64, (3, 3), activation='relu', padding='same', name='block1_conv2')(X)           # layer 2
    X = MaxPooling2D((2, 2), strides=(2, 2), name='block1_pool')(X)

    # BLOCK 2
    X = Conv2D(128, (3, 3), activation='relu', padding='same', name='block2_conv1')(X)          # layer 3
    X = Conv2D(128, (3, 3), activation='relu', padding='same', name='block2_conv2')(X)          # layer 4
    X = MaxPooling2D((2, 2), strides=(2, 2), name='block2_pool')(X)

    # BLOCK 3
    X = Conv2D(256, (3, 3), activation='relu', padding='same', name='block3_conv1')(X)          # layer 5
    X = Conv2D(256, (3, 3), activation='relu', padding='same', name='block3_conv2')(X)          # layer 6
    X = Conv2D(256, (3, 3), activation='relu', padding='same', name='block3_conv3')(X)          # layer 7
    X = Conv2D(256, (3, 3), activation='relu', padding='same', name='block3_conv4')(X)          # layer 8
    X = MaxPooling2D((2, 2), strides=(2, 2), name='block3_pool')(X)

    # BLOCK 4
    X = Conv2D(512, (3, 3), activation='relu', padding='same', name='block4_conv1')(X)          # layer 9
    X = Conv2D(512, (3, 3), activation='relu', padding='same', name='block4_conv2')(X)          # layer 10
    X = Conv2D(512, (3, 3), activation='relu', padding='same', name='block4_conv3')(X)          # layer 11
    X = Conv2D(512, (3, 3), activation='relu', padding='same', name='block4_conv4')(X)          # layer 12
    X = MaxPooling2D((2, 2), strides=(2, 2), name='block4_pool')(X)

    # BLOCK 5
    X = Conv2D(512, (3, 3), activation='relu', padding='same', name='block5_conv1')(X)          # layer 13
    X = Conv2D(512, (3, 3), activation='relu', padding='same', name='block5_conv2')(X)          # layer 14
    X = Conv2D(512, (3, 3), activation='relu', padding='same', name='block5_conv3')(X)          # layer 15
    X = Conv2D(512, (3, 3), activation='relu', padding='same', name='block5_conv4')(X)          # layer 16
    X = MaxPooling2D((2, 2), strides=(2, 2), name='block5_pool')(X)

    # Create Model
    vgg_model = Model(inputs=X_input, outputs=X, name="VGG-19")

    # return VGG-19 as a model
    return vgg_model

In [None]:
base_model = VGG19(input_shape=(224, 224, 3))

In [None]:
head_model = base_model.output

# Flatten the output of our model
head_model = Flatten()(head_model)

# Constructing fully connected layer
head_model = Dense(4096, activation="relu", name="fc1")(head_model)                         # layer 17
head_model = Dense(4096, activation="relu", name="fc2")(head_model)                         # layer 18
head_model = Dense(NUMBER_OF_CLASSES, activation="softmax", name="fc3")(head_model)         # layer 19

In [None]:
model = Model(
    inputs = base_model.input,
    outputs = head_model
)

In [None]:
model.summary()

### `compile` method
Configure the model for training.
#### *Arguments*
- **optimizer:** String (name of optimizer) or optimizer instance.
- **loss:** Loss function. Maybe a string (name of loss function), or a `keras.losses` instance.
- **metrics:** List of metrics to be evaluated by the model during training and testing. Each of this can be a string (name of a built-in function), function or a keras.metrics instance.

In [None]:
model.compile(
    optimizer = 'adam',
    loss = categorical_crossentropy,
    metrics=['accuracy']
)

## Saving our Model as a JSON File

In [None]:
# Creating a directory to save our models
!mkdir model

In [None]:
# Saving our model as json file
model_json = model.to_json()
with open("./model/VGG-19.json", 'w') as json_file:
    json_file.write(model_json)

## Defining and Initializing Callbacks for our Model

### Early Stopping
Stop training when a monitored metric has stopped improving.

Assuming the goal of training is to maximize the validation accuracy. With this, the metric to be monitored would be 'val_accuracy', and the mode would be 'max'.

#### *Arguments*
- **monitor:** Quantity to be monitored.
- **min_delta:** Minimum change in the monitored quantity to qualify as an improvement, i.e. an absolute change of less than min_delta, will count as no improvement.
- **mode:** One of {"auto", "min", "max"}. In min mode, training will stop when the quantity monitored has stopped decreasing; in "max" mode it will stop when the quantity monitored has stopped increasing; in "auto" mode, the direction is automatically inferred from the name of the monitored quantity.
- **verbose:** Verbosity mode, 0 or 1. Mode 0 is silent, and mode 1 displays messages when the callback takes an action.
- **patience:** Number of epochs with no improvement after which training will be stopped.

In [None]:
early_stopping = EarlyStopping(
    monitor = 'val_accuracy',
    min_delta = 0.01,
    mode = 'max',
    verbose = 1,
    patience = 20
)

### Model Checkpoint
ModelCheckpoint() callback is used in conjunction with training using model.fit() or model.fit_generator to save a model or weights (in a checkpoint file) at some interval, so the model or weights can be loaded later to continue the training from the state saved.

A few options this callback provides include:
- Whether to only keep the model that has achieved the "best performance" so far, or whether to save the model at the end of every epoch regardless of performance.
- Definition of 'best'; which quantity to monitor and whether it should be maximized or minimized.
- The frequency it should save at. Currently, the callback supports saving at the end of every epoch, or after a fixed number of training batches.
- Whether only weights are saved, or the whole model is saved.

#### *Arguments*
- **filepath:** string or path to save the model file.
- **monitor:** The metric name to monitor.
    - Prefix the name with "val_" to monitor validation metrics.
    - Use "loss" or "val_loss" to monitor the model's total loss.
    - If you specify metrics as strings, like "accuracy", pass the same string (with or without the "val_" prefix).
- **save_best_only:** if save_best_only=True, it only saves when the model is considered the "best" and the latest best model according to the quantity monitored will not be overwritten.
- **mode:** one of {'auto', 'min', 'max'}. If save_best_only=True, the decision to overwrite the current save file is made based on either the maximization or the minimization of the monitored quantity. For val_accuracy, this should be max, for val_loss this should be min, etc. In auto mode, the mode is set to max if the quantities monitored are 'accuracy' and are set to min for the rest of the quantities.

In [None]:
model_checkpoint = ModelCheckpoint(
    filepath = './model/best_model.h5',
    monitor = 'val_accuracy',
    mode = 'max',
    save_best_only = True
)

### model.fit()
*Trains the model for a fixed number of epochs (dataset iterations).*

#### *Arguments*
- **x:** Input data. It could be:
    - A Numpy array (or array-like), or a list of arrays (in case, the model has multiple inputs).
    - A TensorFlow tensor, or a list of tensors (in case, the model has multiple inputs).
    - A dict mapping input names to the corresponding array/tensors, if the model has named inputs.
    - A `tf.data` dataset. Should return a tuple of either `(inputs, targets)` or `(inputs, targets, sample_weights)`.
    - A generator or keras.utils.Sequence returning `(inputs, targets)` or `(inputs, targets, sample_weights)`.
- **validation_data:** this can be either:
    - a generator for the validation data
    - a list (inputs, targets)
    - a list (inputs, targets, sample_weights). on which to evaluate the loss and any model metrics at the end of each epoch. The model will not be trained on this data.
- **steps_per_epoch:** Total number of steps (batches of samples) to yield from `generator` before declaring one epoch finished and starting the next epoch. It should typically be equal to the number of samples if your dataset divided by the batch size.
- **epochs:** Integer. Number of epochs to train the model. An epoch is an iteration over the entire data provided, as defined by `steps_per_epoch`.
- **verbose:** Verbosity mode (0 = silent, 1 = progress bar, 2 = one line per epoch).
- **callbacks:** List of callbacks to apply during training.

In [None]:
# TODO: Play around with 'epochs'. Change the number epochs and note the matrices of the model to see how number of epochs can affect the model accuracy.

model_history = model.fit(
    train_generator,
    validation_data = validate_generator,
    epochs = 100,
    verbose = 1,
    callbacks = [early_stopping, model_checkpoint]
)

#### Plotting Accuracy vs Validation Accuracy

In [None]:
sns.set()

plt.plot(model_history.history['accuracy'], label="Accuracy")
plt.plot(model_history.history['val_accuracy'], c='red', label="Validation Accuracy")

plt.title("Accuracy vs Validation Accuracy")
plt.xlabel("Number of Epochs")
plt.ylabel("Accuracy")
plt.legend(loc="best")
plt.show()

#### Plotting Loss vs Validation Loss

In [None]:
sns.set()

plt.plot(model_history.history['loss'], label="Loss")
plt.plot(model_history.history['val_loss'], c='red', label="Validation Loss")

plt.title("Loss vs Validation Loss")
plt.xlabel("Number of Epochs")
plt.ylabel("Loss")
plt.legend(loc="best")

# Set the y-axis range to [0, 20]
plt.ylim(0, 10)

plt.show()

### model.load_weights()
Loads all layer weights from a saved files.
#### *Arguments*
- **filepath:** String, path to the weight file to load.

In [None]:
model.load_weights("./model/best_model.h5")

### model.evaluate()
Evaluates the model on a data generator.

In [None]:
evaluation = model.evaluate(test_generator)

print(f"The accuracy of our model on Testing Data is {(evaluation[1] * 100):.3f}%")

### model.predict()
Generates output predictions for the input samples from a data generator.
#### *Arguments*
- **x:** Input samples (here, generator yielding batches of input samples).
- **steps:** Total number of steps (batches of samples) to yield from `generator` before stopping.
- **verbose:** verbosity mode, 0 or 1.
- **workers:** Maximum number of threads to use for parallel processing.

In [None]:
predictions = model.predict(
    test_generator,
    steps = np.ceil(test_generator.samples / test_generator.batch_size),
    verbose = 0,
    workers = 0
)

In [None]:
print(predictions)

In [None]:
# number of images in our testing dataset
test_generator.samples

In [None]:
# batch size for testing data generator
test_generator.batch_size

In [None]:
# Store the predicted outcomes of our model
# Get the class with the highest probability for each sample
predicted_classes = np.argmax(predictions, axis=1)

print(predicted_classes)

In [None]:
# classes found in our testing data generator
print(test_generator.classes)

## Metrics for our ResNet-50 Convolutional Neural Network

### Confusion Matrix

In [None]:
c_m = confusion_matrix(test_generator.classes, predicted_classes)

In [None]:
sns.set_style("ticks")

# Setting default size of the plot
# Setting default fontsize used in the plot
plt.rcParams['figure.figsize'] = (10.0, 9.0)
plt.rcParams['font.size'] = 15


# Implementing visualization of Confusion Matrix
display_c_m = ConfusionMatrixDisplay(c_m, display_labels=labels)


# Plotting Confusion Matrix
# Setting a color map to be used
display_c_m.plot(cmap='OrRd', xticks_rotation=25)
# Other possible options for a color map are:
# "autumn_r", "Blues", "cool", "Greens", "Greys", "PuRd", "copper_r"


# Setting fontsize for xticks and yticks
plt.xticks(fontsize=15)
plt.yticks(fontsize=15)


# Giving name to the plot
plt.title('Confusion Matrix', fontsize=24)


# Saving plot
plt.savefig('confusion_matrix.png', transparent=True, dpi=500)

# Showing the plot
plt.show()

### Classification Report

In [None]:
print("Classification Report contains the following metrics:\n")
print(classification_report(test_generator.classes, predicted_classes, target_names = labels))

In [None]:
test_true_labels = test_generator.classes
predictions_matrix = predictions

### Precision-Recall Curve

In [None]:
sns.set()

# Initialize empty lists to store precision, recall, and average precision for each class
precision_list = []
recall_list = []
average_precision_list = []

# Iterate over each class
for class_idx in range(predictions_matrix.shape[1]):
    # Get the predicted probabilities for the current class
    predictions_class = predictions_matrix[:, class_idx]

    # Convert the one-vs-all labels for the current class
    class_labels = (test_true_labels == class_idx).astype(int)

    # Calculate precision, recall, and average precision for the current class
    precision, recall, threshold = precision_recall_curve(class_labels, predictions_class)
    avg_precision = average_precision_score(class_labels, predictions_class)

    # Append the results to the respective lists
    precision_list.append(precision)
    recall_list.append(recall)
    average_precision_list.append(avg_precision)

    # Plot the precision-recall curve for the current class
    plt.plot(recall, precision, marker='.')

# Set the plot labels and title
plt.xlabel("Recall")
plt.ylabel("Precision")
plt.title("Precision-Recall Curve")

# Show the legend with class names or indices
plt.legend(labels)

# Calculate the mean Precision-Recall score for multi-class classification
mean_average_precision = np.mean(average_precision_list)

# Add mean ROC-AUC score as a text annotation on the plot
plt.annotate(f"Mean Average Precision: {mean_average_precision:.4f}", xy=(0.5, 0.1), xycoords='axes fraction', fontsize=12, color='black')

# Show the plot
plt.show()

### ROC-AUC Curve

In [None]:
sns.set()

# Initialize empty lists to store false positive rate, true positive rate, and AUC for each class
fpr_list = []
tpr_list = []
auc_list = []

# Iterate over each class
for class_idx in range(predictions_matrix.shape[1]):
    # Get the predicted probabilities for the current class
    predictions_class = predictions_matrix[:, class_idx]

    # Convert the one-vs-all labels for the current class
    class_labels = (test_true_labels == class_idx).astype(int)

    # Calculate false positive rate, true positive rate, and AUC for the current class
    fpr, tpr, _ = roc_curve(class_labels, predictions_class)
    roc_auc = auc(fpr, tpr)

    # Append the results to the respective lists
    fpr_list.append(fpr)
    tpr_list.append(tpr)
    auc_list.append(roc_auc)

    # Plot the ROC curve for the current class
    plt.plot(fpr, tpr, marker='.')

# Set the plot labels and title
plt.xlabel("False Positive Rate")
plt.ylabel("True Positive Rate")
plt.title("ROC Curve")

# Show the legend with class names or indices
plt.legend(labels)

# Calculate the mean ROC-AUC score for multi-class classification
mean_roc_auc_score = np.mean(auc_list)

# Add mean ROC-AUC score as a text annotation on the plot
plt.annotate(f"Mean ROC-AUC: {mean_roc_auc_score:.4f}", xy=(0.7, 0.2), xycoords='axes fraction', fontsize=12, color='black')

# Show the plot
plt.show()

## Perform Prediction on Some Random Images

### model.predict()
Generates output predictions for the input samples.
#### *Arguments*
- **x:** Input samples. It could be:
    - A Numpy array (or array-like), or a list of arrays (in case, the model has multiple inputs).
    - A TensorFlow tensor, or a list of tensors (in case, the model has multiple inputs).
    - A generator

### numpy.argmax()
Returns the indices of the maximum values along an axis.
#### *Arguments*
- **a:** *array_like* <br> Input array.
- **axis:** *int, optional* <br> By default, the index is into the flattened array, otherwise along the specified axis.
- **out:** *array, optional* <br> If provided, the result will be inserted into this array. It should be of the appropriate shape and dtype.
- **keepdims:** *bool, optional* <br> If this is set to True, the axes which are reduced are left in the result as dimensions with size one. With this option, the result will broadcast correctly against the array.
#### *Returns*
- **index_array:** *ndarray of ints* <br> Array of indices into the array. It has the same shape as a.shape with the dimension along axis removed. If keepdims is set to True, then the size of axis will be 1 with the resulting array having same shape as a.shape.

In [None]:
import cv2
import numpy as np
import matplotlib.pyplot as plt

def predict_image(image_path, nn_model, classes):
    """
    This function takes an image path as an argument and predicts its class using the provided model.

    Arguments:
        image_path: string, the path of the image.
        nn_model: the pre-trained model used for prediction.
        classes: list of strings, containing the names of the classes.

    Returns:
        predicted_label: string, the predicted class label.
    """

    # read the image from image_path with the help of OpenCV
    image_file = cv2.imread(image_path)
    # resize the image to (224, 224) as our model accepts the input of size (224, 224)
    resized_image = cv2.resize(image_file, (224, 224))

    # Preprocess the image data if necessary (e.g., normalization)

    # Expand the dimensions to match the model's input shape
    input_image = np.expand_dims(resized_image, axis=0)

    # Make the prediction using the model
    prediction_probabilities = nn_model.predict(input_image, verbose=0)

    # Get the index of the predicted class with the highest probability
    predicted_class_index = np.argmax(prediction_probabilities, axis=1)[0]

    # Get the corresponding class label
    predicted_label = classes[predicted_class_index]

    print("Predicted Label: ", predicted_label)
    print("Actual Label: ", image_path.split('/')[1])

    plt.imshow(image_file)
    plt.axis("off")
    plt.show()

In [None]:
# list all the directories in our main dataset path
for directory in os.listdir(DATASET_PATH):
    # pick a single random image from each subdirectory of the main dataset path
    for img in np.random.choice(os.listdir(os.path.join(DATASET_PATH, directory)), size = 3):
        # perform prediction on each image that is being chosen
        predict_image(os.path.join(DATASET_PATH, directory, img), model, labels)
        print("\n\n")