# Notebook 2

Importing the needed libraries

In [1]:
import os
import cv2
import pickle
import numpy as np
from tensorflow.keras.layers import Dense, Dropout, Flatten
from tensorflow.keras.models import Model
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.applications import InceptionV3
from tensorflow.keras.layers import BatchNormalization
from tensorflow.keras.regularizers import l2
from tensorflow.keras.optimizers import RMSprop
from sklearn.model_selection import train_test_split
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.preprocessing.image import img_to_array





Reading the cropped images

In [2]:
# Step 1: Load Cropped Faces
cropped_faces_dir = 'cropped_faces'
images, labels = [], []
for root, _, files in os.walk(cropped_faces_dir):
    for file in files:
        if file.endswith('.jpg'):
            img_path = os.path.join(root, file)
            img = cv2.imread(img_path)
            img = cv2.resize(img, (224, 224))  # Adjust size based on pre-trained model input requirements
            images.append(img)
            # Extract the folder name (without the cropped_faces part)
            label = os.path.basename(root)  # Get the last part of the path (the folder name)
            labels.append(label) 

Showing the number of the classes (labels)

In [3]:
# number of unique labels (names of celebrities)
len(set(labels))
# images[0][0]
# set(labels)

105

- Convert the images to float and normalize there values

- Convert the labels names to one-hot-encoding

In [4]:
images = np.array(images, dtype='float32') / 255.0
labels_encoded = LabelEncoder().fit_transform(labels)
labels_one_hot = to_categorical(labels_encoded)

See a sample of the labels as one-hot-encoding

In [5]:
labels_one_hot[0]

array([1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0.], dtype=float32)

- Define a InceptionV3 model with its weights 

- Freeze all the layers up to 'maxed6'

- Convert the image to 1D array using Flatten

- Apply 2 hidden layes with BatchNormalzation and Dropout

- Apply one output layer with activation function 'softmax'

In [6]:
local_weights_file = 'inception_v3_weights_tf_dim_ordering_tf_kernels.h5'
base_model = InceptionV3(input_shape=(224, 224, 3), include_top=True, weights=None)
base_model.load_weights(local_weights_file)

# Freeze all layers up to 'mixed6'
for layer in base_model.layers:
    layer.trainable = False
last_layer = base_model.get_layer('mixed6').output

# Step 4: Add Custom Layers for Classification
x = Flatten()(last_layer)
x = Dense(1024, activation='leaky_relu', activity_regularizer=l2(0.0001))(x)
x = BatchNormalization()(x)
x = Dropout(0.2)(x)

x = Dense(256, activation='relu', activity_regularizer=l2(0.0001))(x)
x = BatchNormalization()(x)
x = Dropout(0.2)(x)

output_layer = Dense(105, activation='softmax')(x)

# Create the complete model
model = Model(inputs=base_model.input, outputs=output_layer)

# Freeze base model layers to retain the learned weights during initial training
for layer in base_model.layers:
    layer.trainable = False







- Compile the model using:
    - optimizer: RMSprop 
    - loss: categorical_crossentropy
    - metrics: accuracy
    - learning_rate: 0.0001

In [7]:
# Compile the model
model.compile(optimizer=RMSprop(learning_rate=0.0001),
              loss='categorical_crossentropy',
              metrics=['accuracy'])

Show the summary of the model with total number of parameters 

In [8]:
model.summary()

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 input_1 (InputLayer)        [(None, 224, 224, 3)]        0         []                            
                                                                                                  
 conv2d (Conv2D)             (None, 111, 111, 32)         864       ['input_1[0][0]']             
                                                                                                  
 batch_normalization (Batch  (None, 111, 111, 32)         96        ['conv2d[0][0]']              
 Normalization)                                                                                   
                                                                                                  
 activation (Activation)     (None, 111, 111, 32)         0         ['batch_normalization[0][0

Split the data to train and validate with ratios 80% ,20% respectively

In [9]:
X_train, X_val, y_train, y_val = train_test_split(images, labels_one_hot, test_size=0.2, random_state=42)

Apply a function for early stop

In [10]:
# Set up EarlyStopping callback
early_stop = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)

Fit the model using early stop till the val_loss be stable 

In [11]:
# Train the model with EarlyStopping
history = model.fit(
    X_train, y_train,
    validation_data=(X_val, y_val),
    epochs=50,
    batch_size=32,  # You can specify the batch size directly here
    callbacks=[early_stop]
)

Epoch 1/50

Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50


Evaluate the model on the validate set

In [19]:
# Evaluate the model on the validation set
loss, accuracy = model.evaluate(X_val, y_val)

# Print the accuracy
print(f"Validation accuracy: {accuracy*100:.2f}%")

Validation accuracy: 80.22%


Saving the trained model to be used later 

In [13]:
# Save the trained model to a file
model.save('celebrity_face_recognition_model.h5')

  saving_api.save_model(


Count the number of images in each class

In [14]:
# Initialize the class_images dictionary
class_images = {}
extract_dir='cropped_faces'
# Iterate over the folders (classes) in extract_dir
for class_name in os.listdir(extract_dir):
    class_dir = os.path.join(extract_dir, class_name)
    if os.path.isdir(class_dir):
        images = []
        # Iterate over the image files in the class directory
        for image_file in os.listdir(class_dir):
            image_path = os.path.join(class_dir, image_file)
            # Read the image using OpenCV
            image = cv2.imread(image_path)
            # Convert the image to array and append to the images list
            if image is not None:
                images.append(img_to_array(image))
        # Add the list of images to the class_images dictionary
        class_images[class_name] = images

# Print the class names and the number of images for each class
for class_name, images in class_images.items():
    print(f"Class: {class_name}, Number of Images: {len(images)}")

Class: pins_Adriana Lima, Number of Images: 213
Class: pins_Alex Lawther, Number of Images: 152
Class: pins_Alexandra Daddario, Number of Images: 225
Class: pins_Alvaro Morte, Number of Images: 141
Class: pins_alycia dabnem carey, Number of Images: 212
Class: pins_Amanda Crew, Number of Images: 117
Class: pins_amber heard, Number of Images: 218
Class: pins_Andy Samberg, Number of Images: 199
Class: pins_Anne Hathaway, Number of Images: 203
Class: pins_Anthony Mackie, Number of Images: 126
Class: pins_Avril Lavigne, Number of Images: 162
Class: pins_barack obama, Number of Images: 119
Class: pins_barbara palvin, Number of Images: 199
Class: pins_Ben Affleck, Number of Images: 126
Class: pins_Bill Gates, Number of Images: 123
Class: pins_Bobby Morley, Number of Images: 139
Class: pins_Brenton Thwaites, Number of Images: 209
Class: pins_Brian J. Smith, Number of Images: 1
Class: pins_Brie Larson, Number of Images: 170
Class: pins_camila mendes, Number of Images: 164
Class: pins_Chris Evan

Make preprocess for the image by resize it, normalize and expand the dimensions

In [15]:
def preprocess_image(image):
    # Resize the image to the required input size of your model (e.g., 150x150)
    resized_image = cv2.resize(image, (224, 224))
    # Normalize pixel values to be in the range [0, 1]
    normalized_image = resized_image / 255.0
    # Expand dimensions to match the model's input shape (batch_size, height, width, channels)
    preprocessed_image = np.expand_dims(normalized_image, axis=0)
    return preprocessed_image


Define a function that extract the embedding 

In [16]:
# Define the class labels
class_labels = list(class_images.keys())

# Define a function to extract embeddings from images
def extract_embeddings(model, images):
    embeddings = []
    for image in images:
        # Preprocess image (resize, normalize, etc.)
        preprocessed_image = preprocess_image(image)
        # Get embedding from model
        embedding = model.predict(preprocessed_image)
        embeddings.append(embedding)
    return embeddings

Save the class labels

In [17]:
# Save class_labels to a file on Google Drive
with open('class_labels.pkl', 'wb') as f:
    pickle.dump(class_labels, f)

For each image see the embedding of it and at the end save the class embedding that have been trained

In [18]:
# Extract embeddings for each class
class_embeddings = {}  # List of class embeddings (embeddings for each class)
for class_name, images in class_images.items():
    embeddings = extract_embeddings(model, images)
    class_embeddings[class_name] = embeddings

# Save class embeddings
with open('class_embeddings.pkl', 'wb') as f:
    pickle.dump(class_embeddings, f)

