## The Testing phase

### Goal: Using Keras, train a CNN model to correctly identify celebrity images. Utilize 100 training epochs. See Keras documentation in the ReadMe file citations.


### Install the following libraries:

pip install numpy

pip install pandas

pip install scikit-learn

pip install tensorflow



In [1]:
!pip install tensorflow

Defaulting to user installation because normal site-packages is not writeable


In [None]:
# install libraries
import os
import numpy as np
import pandas as pd
import tensorflow as tf
from sklearn.model_selection import train_test_split # for splitting data into training / test sets
from tensorflow.keras.preprocessing.image import ImageDataGenerator # 
from tensorflow.keras import layers, models, optimizers

data_dir = " " # set base directory
image_size = (150, 150)  # Resize images to 150 x 150
batch_size = 32 # batch size refers to the number of images that will be processed at a time before the model's parameters are reset. 
# we chose this number becuase it is relatively standard.
epochs = 100 # we settled on 100 epochs for the testing phase

# using ImageDataGenerator, load / preprocess the images
datagen = ImageDataGenerator(rescale=1./255, validation_split=0.2)
# the "rescale" parameter scales down the range of pixel sizes in the images to between 0 and 1. This helps with processing speed.
# "validation-split" defines a 20/80 test/training split (0.2 = ratio of images placed in testing category)

# Training data generator -- method to produce a set of images based on chosen characteristics
train_generator = datagen.flow_from_directory( 
    data_dir, # specifies path to target directory ("celebrities_all")
    target_size=image_size, # standardizes image size to 150 x 150, as specified earlier
    batch_size=batch_size, # batch size is 32, as specified earlier
    class_mode='categorical', # the model will be identifying celeb images, where each celeb is a different "class". Celeb is a categorical variable
    subset='training' # the generator will pull the training images, which make up 80% of the celebrities_all images.
)

# Validation data generator -- repeating the steps used to define the train_generator. Parameters are the same, but the generator will pull from the 20% of images in the test (validation) set
validation_generator = datagen.flow_from_directory(
    data_dir,
    target_size=image_size,
    batch_size=batch_size,
    class_mode='categorical',
    subset='validation', # pulling from the validation set
    shuffle=False
)

# Now, we will build a simple CNN model using Keras. 
# First, using the models.sequential class, a sequential model will be created to produce a stack of layers. Each layer will have an input
# sensor and an output sensor. This will create a "feed-forward" neural network wherein each layer is directly connected to the one before it.


model = models.Sequential([
    # First convolutional block
    layers.Conv2D(32, (3, 3), activation='relu', input_shape=(image_size[0], image_size[1], 3)),
    layers.BatchNormalization(),
    layers.MaxPooling2D((2, 2)),
    layers.Dropout(0.25),  # Dropout for regularization

    # Second convolutional block
    layers.Conv2D(64, (3, 3), activation='relu'),
    layers.BatchNormalization(),
    layers.MaxPooling2D((2, 2)),
    layers.Dropout(0.25),

    # Third convolutional block
    layers.Conv2D(128, (3, 3), activation='relu'),
    layers.BatchNormalization(),
    layers.MaxPooling2D((2, 2)),
    layers.Dropout(0.25),

    # Fourth convolutional block
    layers.Conv2D(256, (3, 3), activation='relu'),  # Increased filter depth
    layers.BatchNormalization(),
    layers.MaxPooling2D((2, 2)),
    layers.Dropout(0.25),

    # Flattening and fully connected layers
    layers.Flatten(),
    layers.Dense(512, activation='relu'),
    layers.BatchNormalization(),
    layers.Dropout(0.5),  # Higher dropout for dense layers
    layers.Dense(train_generator.num_classes, activation='softmax')  # Output layer
])

# Compile the model
model.compile(optimizer=optimizers.Adam(),
              loss='categorical_crossentropy',
              metrics=['accuracy'])

results = {
    "epoch": [],
    "filename": [],
    "true_class": [],
    "predicted_class": [],
    "confidence": []
}


class LoggingCallback(tf.keras.callbacks.Callback):
    def __init__(self, validation_generator):
        self.validation_generator = validation_generator

    def on_epoch_end(self, epoch, logs=None):
        predictions = self.model.predict(self.validation_generator)
        predicted_classes = predictions.argmax(axis=1)
        true_classes = self.validation_generator.classes
        filenames = self.validation_generator.filenames

        for i, file in enumerate(filenames):
            results["epoch"].append(epoch + 1)
            results["filename"].append(file)
            results["true_class"].append(true_classes[i])
            results["predicted_class"].append(predicted_classes[i])
            results["confidence"].append(predictions[i, predicted_classes[i]])

# Add callback and train the model
logger_callback = LoggingCallback(validation_generator)

# Train the model
history = model.fit(
    train_generator,
    epochs=epochs,
    validation_data=validation_generator,
    callbacks=[logger_callback]
)

# Evaluate the model
accuracy_results = model.evaluate(validation_generator)
print(f"Validation Accuracy: {accuracy_results[1]}")

# Save results to CSV
results_df = pd.DataFrame(results)
results_df.to_csv("epoch_results.csv", index=False)
print("Results saved to epoch_results.csv.")