In [2]:
!pip install labelme tensorflow opencv-python matplotlib albumentations

Collecting labelme
  Using cached labelme-5.4.1-py3-none-any.whl
Collecting opencv-python
  Using cached opencv_python-4.9.0.80-cp37-abi3-win_amd64.whl.metadata (20 kB)
Collecting albumentations
  Downloading albumentations-1.4.4-py3-none-any.whl.metadata (37 kB)
Collecting gdown (from labelme)
  Using cached gdown-5.1.0-py3-none-any.whl.metadata (5.7 kB)
Collecting imgviz>=1.7.5 (from labelme)
  Using cached imgviz-1.7.5-py3-none-any.whl
Collecting natsort>=7.1.0 (from labelme)
  Using cached natsort-8.4.0-py3-none-any.whl.metadata (21 kB)
Collecting onnxruntime!=1.16.0,>=1.14.1 (from labelme)
  Downloading onnxruntime-1.17.3-cp312-cp312-win_amd64.whl.metadata (4.6 kB)
Collecting PyYAML (from labelme)
  Using cached PyYAML-6.0.1-cp312-cp312-win_amd64.whl.metadata (2.1 kB)
Collecting qtpy!=1.11.2 (from labelme)
  Using cached QtPy-2.4.1-py3-none-any.whl.metadata (12 kB)
Collecting scikit-image (from labelme)
  Downloading scikit_image-0.23.2-cp312-cp312-win_amd64.whl.metadata (14 kB)
C

In [3]:
import os
import time
import uuid
import cv2

import tensorflow as tf
import json
import numpy as np
from matplotlib import pyplot as plt

import albumentations as alb

from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Conv2D, Dense, GlobalMaxPooling2D
from tensorflow.keras.applications import VGG16

from tensorflow.keras.models import save_model
from tensorflow.keras.models import load_model

Start taking images and label them

In [None]:

IMAGES_PATH = os.path.join('data','images')
number_images = 30 #You can modify how many images you want to take

In [None]:
#This cell will open your camera, capture pictures of you, and save them in the "images" folder inside the "data" directory.
#If you wish to capture more pictures to expand your dataset, simply re-run this cell (100 captures should suffice).
#Make sure to move across the frame, cover your face sometimes, make weird expressions, and exit the frame occasionally. All of this is necessary to obtain a good dataset.

cap = cv2.VideoCapture(0)
for imgnum in range(number_images):
    print('Collecting image {}'.format(imgnum))
    ret, frame = cap.read()
    imgname = os.path.join(IMAGES_PATH,f'{str(uuid.uuid1())}.jpg')
    cv2.imwrite(imgname, frame)
    cv2.imshow('frame', frame)
    time.sleep(0.5)

    if cv2.waitKey(1) & 0xFF == ord('q'):
        break
cap.release()
cv2.destroyAllWindows()

Draw label rectangles in Images with LabelMe

In [None]:
# This cell will open LabelMe, a software used to draw rectangles around your own face for labeling.
# Once opened, click on "Open Directory" and select the directory containing the images (.\data\images).
# Additionally, enable the "Save Automatically" option in the file panel.
# In the same panel, click on "Change Output Dir" and set it to .\data\labels.
# In the "Edit" menu, select "Draw Rectangles".
# Now, simply draw a rectangle around the face, label it as 'face', and proceed to the next image.

!labelme

Load Image into TF Data Pipeline

In [None]:
images = tf.data.Dataset.list_files('data\\images\\*.jpg')

In [None]:
images.as_numpy_iterator().next()

In [None]:
def load_image(x): 
    byte_img = tf.io.read_file(x)
    img = tf.io.decode_jpeg(byte_img)
    return img

In [None]:
images = images.map(load_image)

In [None]:
images.as_numpy_iterator().next()

In [None]:
type(images)

View Raw Images with Matplotlib

In [None]:
image_generator = images.batch(4).as_numpy_iterator()

In [None]:
plot_images = image_generator.next()

In [None]:
fig, ax = plt.subplots(ncols=4, figsize=(20,20))
for idx, image in enumerate(plot_images):
    ax[idx].imshow(image) 
plt.show()

### MANUALLY SPLIT DATA INTO TRAIN TEST AND VAL

Move the Matching Labels

In [None]:
for folder in ['train','test','val']:
    for file in os.listdir(os.path.join('data', folder, 'images')):
        
        filename = file.split('.')[0]+'.json'
        existing_filepath = os.path.join('data','labels', filename)
        if os.path.exists(existing_filepath): 
            new_filepath = os.path.join('data',folder,'labels',filename)
            os.replace(existing_filepath, new_filepath)      

Apply Image Augmentation on Images and Labels using Albumentations

In [None]:
augmentor = alb.Compose([alb.RandomCrop(width=450, height=450), 
                         alb.HorizontalFlip(p=0.5), 
                         alb.RandomBrightnessContrast(p=0.2),
                         alb.RandomGamma(p=0.2), 
                         alb.RGBShift(p=0.2), 
                         alb.VerticalFlip(p=0.5)], 
                       bbox_params=alb.BboxParams(format='albumentations', 
                                                  label_fields=['class_labels']))

Test Image with CV and JSON

In [None]:
img = cv2.imread(os.path.join('data','train', 'images','0aa64205-f2fb-11ee-951d-f9da159ac108.jpg'))

In [None]:
with open(os.path.join('data', 'train', 'labels', '0aa64205-f2fb-11ee-951d-f9da159ac108.json'), 'r') as f:
    label = json.load(f)

In [None]:
label['shapes'][0]['points']

Extract Coordinates and Rescale to Match Image Resolution

In [None]:
coords = [0,0,0,0]
coords[0] = label['shapes'][0]['points'][0][0]
coords[1] = label['shapes'][0]['points'][0][1]
coords[2] = label['shapes'][0]['points'][1][0]
coords[3] = label['shapes'][0]['points'][1][1]

In [None]:
coords

In [None]:
coords = list(np.divide(coords, [640,480,640,480]))

In [None]:
coords

Apply Augmentations and View Results

In [None]:
augmented = augmentor(image=img, bboxes=[coords], class_labels=['face'])

In [None]:
augmented['bboxes'][0][2:]

In [None]:
augmented['bboxes']

In [None]:
cv2.rectangle(augmented['image'], 
              tuple(np.multiply(augmented['bboxes'][0][:2], [450,450]).astype(int)),
              tuple(np.multiply(augmented['bboxes'][0][2:], [450,450]).astype(int)), 
                    (255,0,0), 2)

plt.imshow(augmented['image'])

Augmentation Pipeline

In [None]:
for partition in ['train','test','val']: 
    for image in os.listdir(os.path.join('data', partition, 'images')):
        img = cv2.imread(os.path.join('data', partition, 'images', image))

        coords = [0,0,0.00001,0.00001]
        label_path = os.path.join('data', partition, 'labels', f'{image.split(".")[0]}.json')
        if os.path.exists(label_path):
            with open(label_path, 'r') as f:
                label = json.load(f)

            coords[0] = label['shapes'][0]['points'][0][0]
            coords[1] = label['shapes'][0]['points'][0][1]
            coords[2] = label['shapes'][0]['points'][1][0]
            coords[3] = label['shapes'][0]['points'][1][1]
            coords = list(np.divide(coords, [640,480,640,480]))

        try: 
            for x in range(120):
                augmented = augmentor(image=img, bboxes=[coords], class_labels=['face'])
                cv2.imwrite(os.path.join('aug_data', partition, 'images', f'{image.split(".")[0]}.{x}.jpg'), augmented['image'])

                annotation = {}
                annotation['image'] = image

                if os.path.exists(label_path):
                    if len(augmented['bboxes']) == 0: 
                        annotation['bbox'] = [0,0,0,0]
                        annotation['class'] = 0 
                    else: 
                        annotation['bbox'] = augmented['bboxes'][0]
                        annotation['class'] = 1
                else: 
                    annotation['bbox'] = [0,0,0,0]
                    annotation['class'] = 0 


                with open(os.path.join('aug_data', partition, 'labels', f'{image.split(".")[0]}.{x}.json'), 'w') as f:
                    json.dump(annotation, f)

        except Exception as e:
            print(e)

Load Augmented Images to Tensorflow Dataset

In [None]:
train_images = tf.data.Dataset.list_files('aug_data\\train\\images\\*.jpg', shuffle=False)
train_images = train_images.map(load_image)
train_images = train_images.map(lambda x: tf.image.resize(x, (120,120)))
train_images = train_images.map(lambda x: x/255)

In [None]:
test_images = tf.data.Dataset.list_files('aug_data\\test\\images\\*.jpg', shuffle=False)
test_images = test_images.map(load_image)
test_images = test_images.map(lambda x: tf.image.resize(x, (120,120)))
test_images = test_images.map(lambda x: x/255)

In [None]:
val_images = tf.data.Dataset.list_files('aug_data\\val\\images\\*.jpg', shuffle=False)
val_images = val_images.map(load_image)
val_images = val_images.map(lambda x: tf.image.resize(x, (120,120)))
val_images = val_images.map(lambda x: x/255)

In [None]:
train_images.as_numpy_iterator().next()

Prepare Labels

In [None]:
def load_labels(label_path):
    with open(label_path.numpy(), 'r', encoding = "utf-8") as f:
        label = json.load(f)
        
    return [label['class']], label['bbox']

Load Labels to Tensorflow Dataset

In [None]:
train_labels = tf.data.Dataset.list_files('aug_data\\train\\labels\\*.json', shuffle=False)
train_labels = train_labels.map(lambda x: tf.py_function(load_labels, [x], [tf.uint8, tf.float16]))

In [None]:
test_labels = tf.data.Dataset.list_files('aug_data\\test\\labels\\*.json', shuffle=False)
test_labels = test_labels.map(lambda x: tf.py_function(load_labels, [x], [tf.uint8, tf.float16]))

In [None]:
val_labels = tf.data.Dataset.list_files('aug_data\\val\\labels\\*.json', shuffle=False)
val_labels = val_labels.map(lambda x: tf.py_function(load_labels, [x], [tf.uint8, tf.float16]))

In [None]:
train_labels.as_numpy_iterator().next()

Combine Label and Image Samples

Check Partition Lengths

In [None]:
len(train_images), len(train_labels), len(test_images), len(test_labels), len(val_images), len(val_labels)

Create Final Datasets

In [None]:
train = tf.data.Dataset.zip((train_images, train_labels))
train = train.shuffle(5000)
train = train.batch(8)
train = train.prefetch(4)

In [None]:
test = tf.data.Dataset.zip((test_images, test_labels))
test = test.shuffle(1300)
test = test.batch(8)
test = test.prefetch(4)

In [None]:
val = tf.data.Dataset.zip((val_images, val_labels))
val = val.shuffle(1000)
val = val.batch(8)
val = val.prefetch(4)

In [None]:
train.as_numpy_iterator().next()[1]

View Images and Annotations

In [None]:
data_samples = train.as_numpy_iterator()

In [None]:
res = data_samples.next()

In [None]:
fig, ax = plt.subplots(ncols=4, figsize=(20,20))
for idx in range(4): 
    sample_image = res[0][idx] 
    sample_coords = res[1][1][idx]

    sample_image = cv2.UMat(sample_image)

    cv2.rectangle(sample_image,
                  tuple(np.multiply(sample_coords[:2], [120, 120]).astype(int)),
                  tuple(np.multiply(sample_coords[2:], [120, 120]).astype(int)),
                  (255, 0, 0), 1)
    
    sample_image = sample_image.get()

    ax[idx].imshow(sample_image)

Build Deep Learning using the Functional API

Download VGG16

In [None]:
vgg = VGG16(include_top=False) #idk what are theses commandes i just found them on the internet xdxdxd

In [None]:
vgg.summary()

Build Network

In [None]:
def build_model(): 
    input_layer = Input(shape=(120,120,3))
    
    vgg = VGG16(include_top=False)(input_layer)

    # Classification Model  
    f1 = GlobalMaxPooling2D()(vgg)
    class1 = Dense(2048, activation='relu')(f1)
    class2 = Dense(1, activation='sigmoid')(class1)
    
    # Bounding box model
    f2 = GlobalMaxPooling2D()(vgg)
    regress1 = Dense(2048, activation='relu')(f2)
    regress2 = Dense(4, activation='sigmoid')(regress1)
    
    facetracker = Model(inputs=input_layer, outputs=[class2, regress2])
    return facetracker

Test the Neural Network

In [None]:
facetracker = build_model()

In [None]:
facetracker.summary()

In [None]:
X, y = train.as_numpy_iterator().next()

In [None]:
X.shape

In [None]:
classes, coords = facetracker.predict(X)

In [None]:
classes, coords

Define Optimizer and LearningRate

In [None]:
batches_per_epoch = len(train)
lr_decay = (1./0.75 -1)/batches_per_epoch

In [None]:
opt = tf.keras.optimizers.Adam(learning_rate=0.0001, decay=lr_decay)

Create Localization Loss and Classification Loss

In [None]:
def localization_loss(y_true, yhat):            
    delta_coord = tf.reduce_sum(tf.square(y_true[:,:2] - yhat[:,:2]))
                  
    h_true = y_true[:,3] - y_true[:,1] 
    w_true = y_true[:,2] - y_true[:,0] 

    h_pred = yhat[:,3] - yhat[:,1] 
    w_pred = yhat[:,2] - yhat[:,0] 
    
    delta_size = tf.reduce_sum(tf.square(w_true - w_pred) + tf.square(h_true-h_pred))
    
    return delta_coord + delta_size

In [None]:
classloss = tf.keras.losses.BinaryCrossentropy()
regressloss = localization_loss

Test out Loss Metrics

In [None]:
localization_loss(y[1], coords)

In [None]:
classloss(y[0], classes)

In [None]:
regressloss(y[1], coords)

Create Custom Model Class

In [None]:
class FaceTracker(Model): 
    def __init__(self, eyetracker,  **kwargs): 
        super().__init__(**kwargs)
        self.model = eyetracker

    def compile(self, opt, classloss, localizationloss, **kwargs):
        super().compile(**kwargs)
        self.closs = classloss
        self.lloss = localizationloss
        self.opt = opt
    
    def train_step(self, batch, **kwargs): 
        
        X, y = batch

    


        y_class = tf.reshape(y[0], (-1, 1))
        y_coords = tf.reshape(y[1], (-1, 4))


        
        with tf.GradientTape() as tape: 
            classes, coords = self.model(X, training=True)

          
            
            batch_classloss = self.closs(y_class, classes)
            batch_localizationloss = self.lloss(tf.cast(y_coords, tf.float32), coords)
            
            total_loss = batch_localizationloss+0.5*batch_classloss
            
            grad = tape.gradient(total_loss, self.model.trainable_variables)
        
        opt.apply_gradients(zip(grad, self.model.trainable_variables))
        
        return {"total_loss":total_loss, "class_loss":batch_classloss, "regress_loss":batch_localizationloss}
    
    def test_step(self, batch, **kwargs): 
        X, y = batch

        y_class = tf.reshape(y[0], (-1, 1))
        y_coords = tf.reshape(y[1], (-1, 4))
        
        classes, coords = self.model(X, training=False)
        
        batch_classloss = self.closs(y_class, classes)
        batch_localizationloss = self.lloss(tf.cast(y_coords, tf.float32), coords)
        total_loss = batch_localizationloss+0.5*batch_classloss
        
        return {"total_loss":total_loss, "class_loss":batch_classloss, "regress_loss":batch_localizationloss}
        
    def call(self, X, **kwargs): 
        return self.model(X, **kwargs)

In [None]:
model = FaceTracker(facetracker)
model.compile(opt, classloss, regressloss)

Train

In [None]:
logdir='logs'

In [None]:
tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=logdir)

In [None]:
hist = model.fit(train, epochs=40, validation_data=val, callbacks=[tensorboard_callback])

Plot Performance

In [None]:
hist.history

In [None]:
#i had a problem with this : actually in the training it shows total_loss , class_loss..... but when i write hist.history it doesn't show them , so i skip this part of plotting performances , but i left it here (i didn't remove it) bcz i want an answer from you please

fig, ax = plt.subplots(ncols=3, figsize=(20,5))

ax[0].plot(hist.history['total_loss'], color='teal', label='loss')
ax[0].plot(hist.history['val_total_loss'], color='orange', label='val loss')
ax[0].title.set_text('Loss')
ax[0].legend()

ax[1].plot(hist.history['class_loss'], color='teal', label='class loss')
ax[1].plot(hist.history['val_class_loss'], color='orange', label='val class loss')
ax[1].title.set_text('Classification Loss')
ax[1].legend()

ax[2].plot(hist.history['regress_loss'], color='teal', label='regress loss')
ax[2].plot(hist.history['val_regress_loss'], color='orange', label='val regress loss')
ax[2].title.set_text('Regression Loss')
ax[2].legend()

plt.show()

Predictions on Test Set (to verify accuracy)

In [None]:
test_data = test.as_numpy_iterator()

In [None]:
test_sample = test_data.next()

In [None]:
yhat = facetracker.predict(test_sample[0])

In [None]:
fig, ax = plt.subplots(ncols=4, figsize=(20,20))
for idx in range(4): 
    sample_image = test_sample[0][idx]
    sample_coords = yhat[1][idx]

    sample_image = cv2.UMat(sample_image)
    
    if yhat[0][idx] > 0.9: 

        cv2.rectangle(sample_image, 
                      tuple(np.multiply(sample_coords[:2], [120,120]).astype(int)),
                      tuple(np.multiply(sample_coords[2:], [120,120]).astype(int)), 
                            (255,0,0), 1)
    
    sample_image = sample_image.get()

    ax[idx].imshow(sample_image)

Save the Model

In [None]:
facetracker.save('faceTrackerModel.keras')

In [None]:
facetracker = load_model('facetrackermodel.keras')

Real Time Detection

In [None]:
cap = cv2.VideoCapture(0)
while cap.isOpened():
    ret , frame = cap.read()
    if not ret:  
        break
    frame = frame[50:500, 50:500,:]
    
    rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    resized = tf.image.resize(rgb, (120,120))
    
    yhat = facetracker.predict(np.expand_dims(resized/255,0))
    sample_coords = yhat[1][0]
    
    if yhat[0] > 0.5: 
        # Controls the main rectangle
        cv2.rectangle(frame, 
                      tuple(np.multiply(sample_coords[:2], [450,450]).astype(int)),
                      tuple(np.multiply(sample_coords[2:], [450,450]).astype(int)), 
                            (255,0,0), 2)
        # Controls the label rectangle
        cv2.rectangle(frame, 
                      tuple(np.add(np.multiply(sample_coords[:2], [450,450]).astype(int), 
                                    [0,-30])),
                      tuple(np.add(np.multiply(sample_coords[:2], [450,450]).astype(int),
                                    [80,0])), 
                            (255,0,0), -1)
        
        # Controls the text rendered
        cv2.putText(frame, 'face', tuple(np.add(np.multiply(sample_coords[:2], [450,450]).astype(int),
                                               [0,-5])),
                    cv2.FONT_HERSHEY_SIMPLEX, 1, (255,255,255), 2, cv2.LINE_AA)
    
    cv2.imshow('faceTrack', frame)
    
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break
cap.release()
cv2.destroyAllWindows()