Dataset link : https://www.kaggle.com/datasets/vasukipatel/face-recognition-dataset

In [3]:
!pip install opencv-python
!pip install numpy
!pip install pandas



In [4]:
import numpy as np
import cv2
import os
import pandas as pd

# Creating new dataset by cropping only faces

https://www.tutorialspoint.com/how-to-crop-and-save-the-detected-faces-in-opencv-python

In [8]:
def crop_face(img_path):
    #take imagepath, crops the face, and returns it
    img = cv2.imread(img_path)
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)        #convert image to grayscale
    face_cascade = cv2.CascadeClassifier('haarcascades/haarcascade_frontalface_default.xml')        #read haarcascade file

    faces = face_cascade.detectMultiScale(gray, 1.3, 4)

    # loop over all detected faces
    if(len(faces) > 0):
        #we are assuming that only one face in each image
        (x, y, w, h) = faces[0]
        face = img[y:y+h, x:x+w]
        return face

    return None         #if no face is found simply return None

In [5]:

import glob
import os
import uuid

In [9]:
#use glob to go through all the images
#read image and crop faces using opencv and haar-cascade
#save inside cropped > akshay Kumar > new image


for img_path in glob.glob("data/celeb_images/*/*.jpg"):
    temp = crop_face(img_path)

    if(temp is not None):
        folder_name = img_path.split('/')[-2]
        #chck if this folder exists
        #if yes, save in it, if no, create then save in it

        if(os.path.exists("data/celeb_images_cropped/" + folder_name)):
            cv2.imwrite("data/celeb_images_cropped/" + folder_name + "/" + str(uuid.uuid4()) + ".jpg", temp)
        else:
            os.mkdir("data/celeb_images_cropped/" + folder_name)
            cv2.imwrite("data/celeb_images_cropped/" + folder_name + "/" + str(uuid.uuid4()) + ".jpg", temp)


# shuffling and creating test, train, val data

In [6]:
#create a list of dicts {'image' : imagepath, 'label' : celeb_name}
#then shuffle them
#then partition and convert to dataframes

parent = []

for img_path in glob.glob("data/celeb_images_cropped/*/*.jpg"):
    temp = img_path.split("/") 
    parent.append({'image' : img_path, "label" : temp[-2]})

In [7]:
import random

random.shuffle(parent)      #shuffle the list

In [8]:
#now divide into train, test, val
train_list = parent[0 : int(len(parent) * 0.8)]
test_list = parent[int(len(parent) * 0.8) : int(len(parent) * 0.9)]
val_list = parent[int(len(parent) * 0.9) : ]

print(len(parent), len(train_list), len(test_list), len(val_list))

2356 1884 236 236


In [9]:
import pandas as pd

train_df = pd.DataFrame(train_list)
test_df = pd.DataFrame(test_list)
val_df = pd.DataFrame(val_list)

In [10]:
!pip install tensorflow==2.15.0

#pip install --force-reinstall "tensorflow==2.15.0" 



In [11]:
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator

2024-07-19 16:20:00.814565: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-07-19 16:20:00.816321: I external/local_tsl/tsl/cuda/cudart_stub.cc:31] Could not find cuda drivers on your machine, GPU will not be used.
2024-07-19 16:20:00.840917: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-07-19 16:20:00.840962: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-07-19 16:20:00.841861: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to

In [12]:
test_gen = ImageDataGenerator(rescale= 1./255)
train_gen = ImageDataGenerator(rescale= 1./255)
val_gen = ImageDataGenerator(rescale= 1./255)

#extract class names
class_names = set()
for img in glob.glob("data/celeb_images/*/*.jpg"):
    temp = img.split("/")
    class_names.add(temp[-2])
class_names = list(class_names)


In [13]:
class_names

['Kashyap',
 'Ellen Degeneres',
 'Virat Kohli',
 'Anushka Sharma',
 'Vijay Deverakonda',
 'Billie Eilish',
 'Claire Holt',
 'Dwayne Johnson',
 'Alia Bhatt',
 'Brad Pitt',
 'Marmik',
 'Amitabh Bachchan',
 'Tom Cruise',
 'Elizabeth Olsen',
 'Akshay Kumar',
 'Margot Robbie',
 'Natalie Portman',
 'Henry Cavill',
 'Hrithik Roshan',
 'Priyanka Chopra',
 'Robert Downey Jr',
 'Camila Cabello',
 'Jessica Alba',
 'Zac Efron',
 'Lisa Kudrow',
 'Courtney Cox',
 'Alexandra Daddario',
 'Charlize Theron',
 'Andy Samberg',
 'Hugh Jackman',
 'Roger Federer']

In [36]:
#create datasets
test_dataset = test_gen.flow_from_dataframe(
    dataframe = test_df,
    x_col = 'image',
    y_col = 'label',
    target_size = (224, 224),
    classes = class_names,
    class_mode = "sparse",
    batch_size = 32
)

train_dataset = train_gen.flow_from_dataframe(
    dataframe = train_df,
    x_col = 'image',
    y_col = 'label',
    target_size = (224, 224),
    classes = class_names,
    class_mode = "sparse",
    batch_size = 32
)

val_dataset = val_gen.flow_from_dataframe(
    dataframe = val_df,
    x_col = 'image',
    y_col = 'label',
    target_size = (224, 224),
    classes = class_names,
    class_mode = "sparse",
    batch_size = 32
)

Found 236 validated image filenames belonging to 31 classes.
Found 1884 validated image filenames belonging to 31 classes.
Found 236 validated image filenames belonging to 31 classes.


# Create model

In [37]:
conv_base = tf.keras.applications.MobileNetV2(
    weights = 'imagenet',
    include_top = False,
    input_shape = (224, 224, 3)
)

conv_base.trainable = False

model = tf.keras.Sequential([
    conv_base,
    tf.keras.layers.GlobalAveragePooling2D(),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dense(31, activation="softmax")
])

model.compile(
    loss = tf.keras.losses.SparseCategoricalCrossentropy(),
    optimizer = 'adam',
    metrics = ["accuracy"]
)

In [38]:
!pip install pillow
!pip install scipy



In [39]:
model.fit(
    train_dataset,
    epochs = 15, 
    validation_data = (val_dataset)
)

Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


<keras.src.callbacks.History at 0x7fd1dea163b0>

In [40]:
model.save("celeb_face_recog.keras")

In [41]:
model.evaluate(
    test_dataset, verbose = 2
)

8/8 - 4s - loss: 1.2965 - accuracy: 0.6441 - 4s/epoch - 500ms/step


[1.2965295314788818, 0.6440678238868713]