## The Testing phase

### Goal: Using Keras, implement _______________


### Install the following libraries:

pip install numpy

pip install pandas

pip install scikit-learn

pip install tensorflow



In [1]:
# install libraries
import os
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split # for splitting data into training / test sets
from tensorflow.keras.preprocessing.image import ImageDataGenerator # 
from tensorflow.keras import layers, models, optimizers

data_dir = "/workspace/DS4002Project3/DATA/celebrities_all" # set base directory
image_size = (150, 150)  # Resize images to 150 x 150
batch_size = 32 # batch size refers to the number of images that will be processed at a time before the model's parameters are reset. 
# we chose this number becuase it is relatively standard.
epochs = 100 # we settled on 100 epochs for the testing phase

# using ImageDataGenerator, load / preprocess the images
datagen = ImageDataGenerator(rescale=1./255, validation_split=0.2)
# the "rescale" parameter scales down the range of pixel sizes in the images to between 0 and 1. This helps with processing speed.
# "validation-split" defines a 20/80 test/training split (0.2 = ratio of images placed in testing category)

# Training data generator -- method to produce a set of images based on chosen characteristics
train_generator = datagen.flow_from_directory( 
    data_dir, # specifies path to target directory ("celebrities_all")
    target_size=image_size, # standardizes image size to 150 x 150, as specified earlier
    batch_size=batch_size, # batch size is 32, as specified earlier
    class_mode='categorical', # the model will be identifying celeb images, where each celeb is a different "class". Celeb is a categorical variable
    subset='training' # the generator will pull the training images, which make up 80% of the celebrities_all images.
)

# Validation data generator -- repeating the steps used to define the train_generator. Parameters are the same, but the generator will pull from the 20% of images in the test (validation) set
validation_generator = datagen.flow_from_directory(
    data_dir,
    target_size=image_size,
    batch_size=batch_size,
    class_mode='categorical',
    subset='validation' # pulling from the validation set
)

# Now, we will build a simple CNN model using Keras. 
# First, using the models.sequential class, a sequential model will be created to produce a stack of layers. Each layer will have an input
# sensor and an output sensor. This will create a "feed-forward" neural network wherein each layer is directly connected to the one before it.


model = models.Sequential([
    layers.Conv2D(32, (3, 3), activation='relu', input_shape=(image_size[0], image_size[1], 3)),
    layers.MaxPooling2D((2, 2)),
    layers.Conv2D(64, (3, 3), activation='relu'),
    layers.MaxPooling2D((2, 2)),
    layers.Conv2D(128, (3, 3), activation='relu'),
    layers.MaxPooling2D((2, 2)),
    layers.Conv2D(128, (3, 3), activation='relu'),
    layers.MaxPooling2D((2, 2)),
    layers.Flatten(),
    layers.Dense(512, activation='relu'),
    layers.Dense(train_generator.num_classes, activation='softmax')
])

# Compile the model
model.compile(optimizer=optimizers.Adam(),
              loss='categorical_crossentropy',
              metrics=['accuracy'])

# 3. Train the model
history = model.fit(
    train_generator,
    epochs=epochs,
    validation_data=validation_generator
)

# 4. Evaluate the model and store accuracies
accuracy_results = model.evaluate(validation_generator)
print(f"Validation Accuracy: {accuracy_results[1]}")

# Create a DataFrame to store the accuracy of each image
celebs_accuracy = pd.DataFrame({
    "image_path": validation_generator.filenames,
    "accuracy": [accuracy_results[1]] * len(validation_generator.filenames)
})

# Save the DataFrame to a CSV file (optional)
celebs_accuracy.to_csv("celebs_accuracy.csv", index=False)

print("Accuracy data saved to celebs_accuracy.csv.")


2024-11-20 00:30:10.849395: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1732062610.866449    3431 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1732062610.879650    3431 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-11-20 00:30:10.901899: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


Found 8000 images belonging to 25 classes.
Found 2000 images belonging to 25 classes.


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
2024-11-20 00:30:13.624705: E external/local_xla/xla/stream_executor/cuda/cuda_driver.cc:152] failed call to cuInit: INTERNAL: CUDA error: Failed call to cuInit: UNKNOWN ERROR (303)
  self._warn_if_super_not_called()


Epoch 1/100
[1m 28/250[0m [32m━━[0m[37m━━━━━━━━━━━━━━━━━━[0m [1m2:19[0m 630ms/step - accuracy: 0.0434 - loss: 3.2294

KeyboardInterrupt: 