In [26]:
import os
import sys
import json
import cv2
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from tensorflow.keras.models import load_model
from tensorflow.keras.optimizers import Adam, SGD
from tensorflow.keras.layers import (
    Dense,
    Dropout,
    Conv2D,
    MaxPooling2D,
    Flatten,
    GlobalMaxPooling2D,
    Input,
)
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.losses import (
    categorical_crossentropy,
    binary_crossentropy,
)
from tensorflow.keras.applications import VGG19, VGG16, ResNet50, InceptionV3

import tensorflow as tf

In [27]:
print(tf.__version__)

print("Num GPUs Available: ", len(tf.config.experimental.list_physical_devices("GPU")))
print(tf.config.experimental.list_physical_devices("GPU"))

2.17.0
Num GPUs Available:  1
[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]


In [28]:
train_image = os.listdir(
    "/home/hany_jr/Ai/FaceDetectionDeepLearning/Dataset/train_augm/images"
)
test_image = os.listdir(
    "/home/hany_jr/Ai/FaceDetectionDeepLearning/Dataset/test_augm/images"
)
valid_image = os.listdir(
    "/home/hany_jr/Ai/FaceDetectionDeepLearning/Dataset/valid_augm/images"
)
train_image = sorted(train_image)
test_image = sorted(test_image)
valid_image = sorted(valid_image)

In [29]:
train_label = os.listdir(
    "/home/hany_jr/Ai/FaceDetectionDeepLearning/Dataset/train_augm/labels"
)
test_label = os.listdir(
    "/home/hany_jr/Ai/FaceDetectionDeepLearning/Dataset/test_augm/labels"
)
valid_label = os.listdir(
    "/home/hany_jr/Ai/FaceDetectionDeepLearning/Dataset/valid_augm/labels"
)

train_label = sorted(train_label)
test_label = sorted(test_label)
valid_label = sorted(valid_label)

## convert them to dataframe 

In [30]:
def load_json_data(file_path):
    with open(file_path, "r") as f:
        data = json.load(f)

    face = data["shapes"][0]["label"]
    if face == "face":
        box = data["shapes"][0]["points"]
        result = [1, box[0][0], box[0][1], box[1][0], box[1][1]]
    else:
        result = [0, 0, 0, 0, 0]

    return result


face = []
x1 = []
y1 = []
x2 = []
y2 = []
for i in range(len(train_label)):
    train_label[i] = load_json_data(
        "/home/hany_jr/Ai/FaceDetectionDeepLearning/Dataset/train_augm/labels/"
        + train_label[i]
    )
    face.append(train_label[i][0])
    x1.append(train_label[i][1])
    y1.append(train_label[i][2])
    x2.append(train_label[i][3])
    y2.append(train_label[i][4])

train_dataFrame = pd.DataFrame(
    {
        "image": train_image,
        "face": face,
        "x1": x1,
        "y1": y1,
        "x2": x2,
        "y2": y2,
    }
)

face = []
x1 = []
y1 = []
x2 = []
y2 = []

for i in range(len(test_label)):
    test_label[i] = load_json_data(
        "/home/hany_jr/Ai/FaceDetectionDeepLearning/Dataset/test_augm/labels/"
        + test_label[i]
    )
    face.append(test_label[i][0])
    x1.append(test_label[i][1])
    y1.append(test_label[i][2])
    x2.append(test_label[i][3])
    y2.append(test_label[i][4])

test_dataFrame = pd.DataFrame(
    {
        "image": test_image,
        "face": face,
        "x1": x1,
        "y1": y1,
        "x2": x2,
        "y2": y2,
    }
)


face = []
x1 = []
y1 = []
x2 = []
y2 = []

for i in range(len(valid_label)):
    valid_label[i] = load_json_data(
        "/home/hany_jr/Ai/FaceDetectionDeepLearning/Dataset/valid_augm/labels/"
        + valid_label[i]
    )
    face.append(valid_label[i][0])
    x1.append(valid_label[i][1])
    y1.append(valid_label[i][2])
    x2.append(valid_label[i][3])
    y2.append(valid_label[i][4])

valid_dataFrame = pd.DataFrame(
    {
        "image": valid_image,
        "face": face,
        "x1": x1,
        "y1": y1,
        "x2": x2,
        "y2": y2,
    }
)

In [31]:
face.clear()
x1.clear()
y1.clear()
x2.clear()
y2.clear()
train_image.clear()
test_image.clear()
valid_image.clear()
train_label.clear()
test_label.clear()
valid_label.clear()

In [32]:
train_dataFrame.head()

Unnamed: 0,image,face,x1,y1,x2,y2
0,00a6fab8-7396-11ef-a98c-9c443d1ab0ba0.jpg,0,0.0,0.0,0.0,0.0
1,00a6fab8-7396-11ef-a98c-9c443d1ab0ba1.jpg,1,434.0,157.0,268.0,0.0
2,00a6fab8-7396-11ef-a98c-9c443d1ab0ba10.jpg,1,355.0,249.999985,189.000015,76.000008
3,00a6fab8-7396-11ef-a98c-9c443d1ab0ba11.jpg,1,217.0,256.0,51.000015,118.000008
4,00a6fab8-7396-11ef-a98c-9c443d1ab0ba12.jpg,0,0.0,0.0,0.0,0.0


In [33]:
test_dataFrame.head()

Unnamed: 0,image,face,x1,y1,x2,y2
0,e4af90cc-7395-11ef-a98c-9c443d1ab0ba0.jpg,0,0.0,0.0,0.0,0.0
1,e4af90cc-7395-11ef-a98c-9c443d1ab0ba1.jpg,1,236.999985,256.0,44.000031,49.999989
2,e4af90cc-7395-11ef-a98c-9c443d1ab0ba10.jpg,1,512.0,256.0,452.0,49.999989
3,e4af90cc-7395-11ef-a98c-9c443d1ab0ba11.jpg,0,0.0,0.0,0.0,0.0
4,e4af90cc-7395-11ef-a98c-9c443d1ab0ba12.jpg,0,0.0,0.0,0.0,0.0


In [34]:
valid_dataFrame.head()

Unnamed: 0,image,face,x1,y1,x2,y2
0,db991c9c-7395-11ef-a98c-9c443d1ab0ba0.jpg,1,512.0,79.000008,283.999969,0.0
1,db991c9c-7395-11ef-a98c-9c443d1ab0ba1.jpg,1,512.0,256.0,306.0,97.0
2,db991c9c-7395-11ef-a98c-9c443d1ab0ba10.jpg,1,442.0,256.0,208.999969,0.0
3,db991c9c-7395-11ef-a98c-9c443d1ab0ba11.jpg,0,0.0,0.0,0.0,0.0
4,db991c9c-7395-11ef-a98c-9c443d1ab0ba12.jpg,1,512.0,238.0,375.0,0.0


## [Prepare data format]()

In [35]:
train_images = (
    "/home/hany_jr/Ai/FaceDetectionDeepLearning/Dataset/train_augm/images/"
    + train_dataFrame.iloc[:, 0]
)
test_images = (
    "/home/hany_jr/Ai/FaceDetectionDeepLearning/Dataset/test_augm/images/"
    + test_dataFrame.iloc[:, 0]
)
valid_images = (
    "/home/hany_jr/Ai/FaceDetectionDeepLearning/Dataset/valid_augm/images/"
    + valid_dataFrame.iloc[:, 0]
)

train_labels = train_dataFrame.iloc[:, 1:]
test_labels = test_dataFrame.iloc[:, 1:]
valid_labels = valid_dataFrame.iloc[:, 1:]

In [36]:
valid_images[0]

'/home/hany_jr/Ai/FaceDetectionDeepLearning/Dataset/valid_augm/images/db991c9c-7395-11ef-a98c-9c443d1ab0ba0.jpg'

In [37]:
def read_image(image_path, label):
    image = tf.io.read_file(image_path)
    image = tf.image.decode_jpeg(image, channels=3)
    image.set_shape((256, 512, 3))

    # normalize image

    image = tf.cast(image, tf.float32) / 255.0

    label[0].set_shape([])
    label[1].set_shape([])
    label[2].set_shape([])
    label[3].set_shape([])
    label[4].set_shape([])

    face = label[0]
    box = [label[1], label[2], label[3], label[4]]
    return image, [face], box

In [38]:
train_dataset = tf.data.Dataset.from_tensor_slices((train_images, train_labels))
train_dataset = (
    train_dataset.shuffle(buffer_size=len(train_labels)).map(read_image).batch(1)
)


test_dataset = tf.data.Dataset.from_tensor_slices((test_images, test_labels))
test_dataset = (
    test_dataset.shuffle(buffer_size=len(test_labels)).map(read_image).batch(1)
)

valid_dataset = tf.data.Dataset.from_tensor_slices((valid_images, valid_labels))
valid_dataset = (
    valid_dataset.shuffle(buffer_size=len(valid_labels)).map(read_image).batch(1)
)

In [39]:
train_dataset.as_numpy_iterator().next()[0].shape

(1, 256, 512, 3)

In [40]:
def build_model():
    input_layer = Input(shape=(256, 512, 3))

    vgg = VGG19(include_top=False)(input_layer)

    # Classification Model
    f1 = GlobalMaxPooling2D()(vgg)
    class1 = Dense(2048, activation="relu")(f1)
    class2 = Dense(1, activation="sigmoid")(class1)

    # Bounding box model
    f2 = GlobalMaxPooling2D()(vgg)
    regress1 = Dense(2048, activation="relu")(f2)
    regress2 = Dense(4, activation="linear")(regress1)

    facetracker = Model(inputs=input_layer, outputs=[class2, regress2])
    return facetracker


model = build_model()
model.summary()

## [train]()

In [41]:
model.compile(
    optimizer=Adam(learning_rate=0.0001),
    loss=["binary_crossentropy", "mse"],
    metrics=["accuracy", "accuracy"],
)
model.fit(train_dataset, epochs=10, validation_data=valid_dataset)

Epoch 1/10




[1m4019/4020[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 43ms/step - dense_5_accuracy: 0.7265 - loss: 0.2239

2024-10-01 04:59:52.506234: W external/local_tsl/tsl/framework/bfc_allocator.cc:291] Allocator (GPU_0_bfc) ran out of memory trying to allocate 16.20GiB with freed_by_count=0. The caller indicates that this is not a failure, but this may mean that there could be performance gains if more memory were available.
2024-10-01 04:59:52.641516: W external/local_tsl/tsl/framework/bfc_allocator.cc:291] Allocator (GPU_0_bfc) ran out of memory trying to allocate 32.39GiB with freed_by_count=0. The caller indicates that this is not a failure, but this may mean that there could be performance gains if more memory were available.
2024-10-01 04:59:52.776579: W external/local_tsl/tsl/framework/bfc_allocator.cc:291] Allocator (GPU_0_bfc) ran out of memory trying to allocate 16.30GiB with freed_by_count=0. The caller indicates that this is not a failure, but this may mean that there could be performance gains if more memory were available.
2024-10-01 04:59:52.907928: W external/local_tsl/tsl/framework/b

[1m4020/4020[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m189s[0m 46ms/step - dense_5_accuracy: 0.7265 - loss: 0.2238 - val_dense_5_accuracy: 0.7167 - val_loss: 0.0000e+00
Epoch 2/10
[1m4020/4020[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m178s[0m 44ms/step - dense_5_accuracy: 0.7307 - loss: 0.0000e+00 - val_dense_5_accuracy: 0.7167 - val_loss: 0.0000e+00
Epoch 3/10
[1m4020/4020[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m178s[0m 44ms/step - dense_5_accuracy: 0.7279 - loss: 0.0000e+00 - val_dense_5_accuracy: 0.7167 - val_loss: 0.0000e+00
Epoch 4/10
[1m4020/4020[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m180s[0m 45ms/step - dense_5_accuracy: 0.7272 - loss: 0.0000e+00 - val_dense_5_accuracy: 0.7167 - val_loss: 0.0000e+00
Epoch 5/10
[1m4020/4020[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m179s[0m 44ms/step - dense_5_accuracy: 0.7380 - loss: 0.0000e+00 - val_dense_5_accuracy: 0.7167 - val_loss: 0.0000e+00
Epoch 6/10
[1m4020/4020[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[

<keras.src.callbacks.history.History at 0x7af4bf585cc0>