In [47]:
import tensorflow as tf
from tensorflow.keras.preprocessing.image import (
    ImageDataGenerator, 
    load_img,
    img_to_array,
    DirectoryIterator
)
from keras.applications.resnet50 import ResNet50, preprocess_input
import PIL

import numpy as np
import pathlib

from typing import Dict

First, download the pre-trained ResNet 50 model from TensorFlow Hub

In [31]:
base_model = ResNet50(
    include_top=False,
    weights="imagenet",
    input_shape=(224, 224, 3),
)

Then, figure out how many celebrities we have (just the number of directories inside the `Celebrity Faces Dataset`)

In [32]:
path_to_celebrity_dataset = pathlib.Path("ml_project_4_face_detection/backend/Celebrity Faces Dataset/")
assert path_to_celebrity_dataset.exists(), f"Could not find {path_to_celebrity_dataset}."
number_celebrities = len(list(path_to_celebrity_dataset.iterdir()))
print(f"There are {number_celebrities} celebrities in the dataset.")

There are 17 celebrities in the dataset.


Now, we'll instantiate a model on top of our pre-trained ResNet 50 model.

In [33]:
model = tf.keras.Sequential([
    base_model,
    tf.keras.layers.GlobalAveragePooling2D(),
    tf.keras.layers.Dense(number_celebrities, activation="softmax"),
])
model.summary()

Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 resnet50 (Functional)       (None, 7, 7, 2048)        23587712  
                                                                 
 global_average_pooling2d_2  (None, 2048)              0         
  (GlobalAveragePooling2D)                                       
                                                                 
 dense_2 (Dense)             (None, 17)                34833     
                                                                 
Total params: 23622545 (90.11 MB)
Trainable params: 23569425 (89.91 MB)
Non-trainable params: 53120 (207.50 KB)
_________________________________________________________________


Now we'll use TensorFlow utilities (`ImageDataGenerator`) to load and preprocess the celebrity images

In [34]:
target_image_size = (224, 224)

training_data_generator = ImageDataGenerator(
    preprocessing_function=preprocess_input,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
)

training_data_streamer: DirectoryIterator = training_data_generator.flow_from_directory(
    path_to_celebrity_dataset,
    target_size=target_image_size,
    batch_size=32,
    class_mode="categorical",
)


Found 1800 images belonging to 17 classes.


Now we'll compile our model, using the Adam optimizer, and the categorical crossentropy loss function

In [35]:
model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=1e-4),
    loss=tf.keras.losses.CategoricalCrossentropy(),
    metrics=["accuracy"],
)



In [36]:
training_epochs = 10

history = model.fit(
    training_data_streamer,
    epochs=training_epochs,
)

Epoch 1/10


2023-07-22 14:42:37.837185: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.


Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


Now, we use this fine-tuned model to make predictions:

In [49]:
def load_image_from_file(image_path: pathlib.Path, target_image_size=target_image_size) -> PIL.Image:
    return load_img(image_path, target_size=target_image_size)

def get_predicted_celebrity_name_from_idx(data_streamer: DirectoryIterator, predicted_idx: int) -> str:
    """
    The mapping from celebrity name to index in the data streamer needs to be
    inverted so that we can get the name of the predicted celebrity, given the
    index
    """
    idx_to_celebrity_name = {v: k for k, v in data_streamer.class_indices.items()}
    return idx_to_celebrity_name.get(predicted_idx)

def make_prediction(image: PIL.Image, model: tf.keras.Model, data_streamer: DirectoryIterator) -> str:
    example_image_array: PIL.Image  = img_to_array(image)
    example_image_array = np.expand_dims(example_image_array, axis=0)
    example_image_array = preprocess_input(example_image_array)

    prediction_probabilities = model.predict(example_image_array)

    predicted_celebrity_index = np.argmax(prediction_probabilities)

    return get_predicted_celebrity_name_from_idx(data_streamer, predicted_celebrity_index)


example_image = load_image_from_file(
    image_path=path_to_celebrity_dataset / "Will Smith/001_beebcee2.jpg"
)

predicted_celebrity = make_prediction(example_image, model, training_data_streamer)
print(f"Predicted celebrity: {predicted_celebrity}")

Predicted celebrity: Will Smith
