## Train

## Importing Libraries

In [8]:
import pkg_resources

REQUIRED_PACKAGES = [
    'mtcnn','tensorflow', 'keras-facenet', 'sklearn', 'scikit-learn'
]

for package in REQUIRED_PACKAGES:
    try:
        dist = pkg_resources.get_distribution(package)
        print('{} ({}) is already installed'.format(dist.key, dist.version))
    except pkg_resources.DistributionNotFound:
        !pip install {package}

mtcnn (0.1.1) is already installed
tensorflow (2.12.0) is already installed
keras-facenet (0.3.2) is already installed
sklearn (0.0.post5) is already installed
Collecting scikit-learn
  Downloading scikit_learn-1.2.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (9.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m9.8/9.8 MB[0m [31m3.9 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hCollecting threadpoolctl>=2.0.0
  Downloading threadpoolctl-3.1.0-py3-none-any.whl (14 kB)
Collecting joblib>=1.1.1
  Downloading joblib-1.2.0-py3-none-any.whl (297 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m298.0/298.0 kB[0m [31m1.6 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hInstalling collected packages: threadpoolctl, joblib, scikit-learn
Successfully installed joblib-1.2.0 scikit-learn-1.2.2 threadpoolctl-3.1.0
[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.0.1[0m[39;4

In [2]:
from os import listdir
from os.path import isdir
from PIL import Image
from matplotlib import pyplot
from numpy import savez_compressed
from numpy import asarray
from mtcnn.mtcnn import MTCNN

2023-06-03 20:42:10.351235: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [3]:
# extract a single face from a given photograph
def extract_face(filename, required_size=(224, 224)):
    # load image from file
    image = Image.open(filename)
    # convert to RGB, if needed
    image = image.convert('RGB')
    # convert to array
    pixels = asarray(image)
    # create the detector, using default weights
    detector = MTCNN()
    # detect faces in the image
    results = detector.detect_faces(pixels)
    # extract the bounding box from the first face
    x1, y1, width, height = results[0]['box']
    # bug fix
    x1, y1 = abs(x1), abs(y1)
    x2, y2 = x1 + width, y1 + height
    # extract the face
    face = pixels[y1:y2, x1:x2]
    # resize pixels to the model size
    image = Image.fromarray(face)
    image = image.resize(required_size)
    face_array = asarray(image)
    return face_array

In [4]:
# load images and extract faces for all images in a directory
def load_faces(directory):
    faces = list()
    # enumerate files
    for filename in listdir(directory):
        # path
        path = directory + filename
        # get face
        face = extract_face(path)
        # store
        faces.append(face)
    return faces

In [5]:
# load a dataset that contains one subdir for each class that in turn contains images
def load_dataset(directory):
    X, y = list(), list()
    # enumerate folders, on per class
    for subdir in listdir(directory):
        # path
        path = directory + subdir + '/'
        # skip any files that might be in the dir
        if not isdir(path):
            continue
        # load all faces in the subdirectory
        faces = load_faces(path)
        # create labels
        labels = [subdir for _ in range(len(faces))]
        # summarize progress
        print('>loaded %d examples for class: %s' % (len(faces), subdir))
        # store
        X.extend(faces)
        y.extend(labels)
    return asarray(X), asarray(y)

In [6]:
# load train dataset
trainX, trainy = load_dataset('images/train/')
print(trainX.shape, trainy.shape)

# load test dataset
testX, testy = load_dataset('images/val/')

# save arrays to one file in compressed format
savez_compressed('ss.npz', trainX, trainy, testX, testy)

2023-06-03 20:42:13.818771: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:982] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2023-06-03 20:42:13.837917: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:982] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2023-06-03 20:42:13.838253: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:982] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2023-06-03 20:42:13.840612: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:982] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2023-06-03 20:42:13.840884: I tensorflow/compile



2023-06-03 20:42:17.293871: I tensorflow/compiler/xla/stream_executor/cuda/cuda_blas.cc:637] TensorFloat-32 will be used for the matrix multiplication. This will only be logged once.


>loaded 3 examples for class: 1685_Anjana_Om_Kashyap
>loaded 3 examples for class: 1686_Barkha_Dutt
>loaded 3 examples for class: 1687_Ravish_Kumar
>loaded 4 examples for class: 1688_Arnab_Goswami
(13, 224, 224, 3) (13,)
>loaded 1 examples for class: 1685_Anjana_Om_Kashyap
>loaded 2 examples for class: 1686_Barkha_Dutt
>loaded 2 examples for class: 1687_Ravish_Kumar
>loaded 2 examples for class: 1688_Arnab_Goswami


## Embeddings

In [9]:
# calculate a face embedding for each face in the dataset using facenet
from numpy import load
from numpy import expand_dims
from numpy import asarray
from numpy import savez_compressed
from keras.models import load_model
from keras_facenet import FaceNet
from tensorflow import keras
from sklearn.svm import SVC
from tensorflow import keras
import tensorflow as tf

In [10]:
embedder = FaceNet()

In [11]:
# get the face embedding for one face
def get_embedding(model, face_pixels):
    # scale pixel values
    face_pixels = face_pixels.astype('float32')
    detections = model.extract(face_pixels, threshold = 0.95)
    # standardize pixel values across channels (global)
    #mean, std = face_pixels.mean(), face_pixels.std()
    #face_pixels = (face_pixels - mean) / std
    # transform face into one sample
    samples = expand_dims(face_pixels, axis=0)
    # make prediction to get embedding
    # yhat = model.predict(samples)
    yhat = model.embeddings(samples)

    return yhat[0]

In [12]:
# load the face dataset
data = load('ss.npz')

trainX, trainy, testX, testy = data['arr_0'], data['arr_1'], data['arr_2'], data['arr_3']
print('Loaded: ', trainX.shape, trainy.shape, testX.shape, testy.shape)
# load the facenet model
# model = load_model('facenet_keras.h5')
# model.summary()
model = embedder
# model = SVC(kernel='linear', probability=True)
# model = load_model('face-rec_Google.npz')
# model = load('test.npz')
# model = tf.keras.applications.ResNet50(weights='imagenet')

print('Loaded Model')
# convert each face in the train set to an embedding

Loaded:  (13, 224, 224, 3) (13,) (7, 224, 224, 3) (7,)
Loaded Model


In [13]:
newTrainX = list()
for face_pixels in trainX:
    embedding = get_embedding(model, face_pixels)
    newTrainX.append(embedding)
newTrainX = asarray(newTrainX)
print(newTrainX.shape)
# convert each face in the test set to an embedding
newTestX = list()
for face_pixels in testX:
    embedding = get_embedding(model, face_pixels)
    newTestX.append(embedding)
newTestX = asarray(newTestX)
print(newTestX.shape)
# save arrays to one file in compressed format
savez_compressed('ss-embd.npz', newTrainX, trainy, newTestX, testy)

(13, 512)
(7, 512)


## Webcam

In [1]:
import cv2
import numpy as np
from mtcnn import MTCNN
from keras.models import load_model
from scipy.spatial.distance import cosine
import tensorflow as tf
from keras_facenet import FaceNet
from time import time

2023-06-03 22:54:25.583214: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
# Load the FaceNet model
facenet_model = FaceNet()

# Load the embeddings from the .npz file
data = np.load('ss-embd.npz')
embeddings_array = data['arr_0']
names = data['arr_1']

# Load the MTCNN face detection model
mtcnn_detector = MTCNN()

2023-06-03 22:54:31.119738: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:982] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2023-06-03 22:54:31.149880: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:982] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2023-06-03 22:54:31.150439: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:982] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2023-06-03 22:54:31.153313: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:982] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2023-06-03 22:54:31.153909: I tensorflow/compile

In [5]:
# Open the video capture
video_capture = cv2.VideoCapture('WIN_20230604_02_22_12_Pro.mp4')  # Use 0 for webcam or provide the path to a video file

# Define the codec and create VideoWriter object
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
out = cv2.VideoWriter('output.mp4', fourcc, video_capture.get(cv2.CAP_PROP_FPS), 
                      (int(video_capture.get(cv2.CAP_PROP_FRAME_WIDTH)),  int(video_capture.get(cv2.CAP_PROP_FRAME_HEIGHT))))

while True:
    # Start timer
    start_time = time()
    
    # Read a frame from the video
    ret, frame = video_capture.read()

    if not ret:
        break

    # Detect faces in the frame using MTCNN
    faces = mtcnn_detector.detect_faces(frame)

    # Iterate over detected faces
    for face in faces:
        # Extract the face coordinates
        x, y, w, h = face['box']

        # Extract the face ROI from the frame
        face_roi = frame[y:y+h, x:x+w]

        # Preprocess the face ROI
        face_roi = cv2.cvtColor(face_roi, cv2.COLOR_BGR2RGB)
        face_roi = cv2.resize(face_roi, (224, 224))
        face_roi = face_roi.astype('float32')
        # mean, std = face_roi.mean(), face_roi.std()
        # face_roi = (face_roi - mean) / std

        # Convert the preprocessed face ROI to a 4D tensor
        face_tensor = np.expand_dims(face_roi, axis=0)

        # Generate embeddings using the FaceNet model
        face_embedding = facenet_model.embeddings(face_tensor)[0]

        # Compare the face embedding with stored embeddings
        distances = [cosine(face_embedding, emb) for emb in embeddings_array]
        min_distance = min(distances)
        min_distance_idx = np.argmin(distances)

        # Define a threshold for face recognition
        threshold = 0.75

        # Check if the minimum distance is below the threshold
        if min_distance < threshold:
            # Identify the recognized face
            recognized_face = embeddings_array[min_distance_idx]
            recognized_name = names[min_distance_idx]

            # Calculate the confidence level
            confidence = 100 - min_distance*100

            # Draw bounding box and label for the recognized face with confidence level
            label = f"{recognized_name} ({confidence:.2f})"
            cv2.rectangle(frame, (x, y), (x+w, y+h), (0, 255, 0), 2)
            cv2.putText(frame, label, (x, y-10), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (0, 255, 0), 2)
        else:
 
            # Draw bounding box and label for unknown face
            cv2.rectangle(frame, (x, y), (x+w, y+h), (0, 0, 255), 2)
            cv2.putText(frame, 'Unknown', (x, y-10), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (0, 0, 255), 2)

    
    # end timer
    time_taken = time() - start_time
    print(f'Fps:{1/time_taken}')
    
    # Display the frame
    # cv2.imshow('Video', frame)
    out.write(frame)
    print(frame.shape)

    # Exit the loop if 'q' is pressed
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# Release the video capture and close all
out.release()

Fps:0.46356544629805607
(1080, 1920, 3)
Fps:0.6087234163598598
(1080, 1920, 3)
Fps:0.9708032525338235
(1080, 1920, 3)
Fps:0.7628206970228142
(1080, 1920, 3)
Fps:0.962312613296985
(1080, 1920, 3)
Fps:0.2524401531792605
(1080, 1920, 3)
Fps:0.9728049519002068
(1080, 1920, 3)
Fps:0.9898275101601002
(1080, 1920, 3)
Fps:0.7520655199903318
(1080, 1920, 3)
Fps:0.9439113354769715
(1080, 1920, 3)
Fps:0.765378494569108
(1080, 1920, 3)
Fps:0.932974821737761
(1080, 1920, 3)
Fps:0.9746031347694327
(1080, 1920, 3)
Fps:0.7804133062546691
(1080, 1920, 3)
Fps:0.9383652374525119
(1080, 1920, 3)
Fps:0.7582424873178945
(1080, 1920, 3)
Fps:0.9353379126037572
(1080, 1920, 3)
Fps:0.6931593894888362
(1080, 1920, 3)
Fps:0.9202262881740624
(1080, 1920, 3)
Fps:0.9509707406105814
(1080, 1920, 3)
Fps:0.9266689879581531
(1080, 1920, 3)
Fps:0.9444862909667132
(1080, 1920, 3)
Fps:0.8865311771515582
(1080, 1920, 3)
Fps:0.9649458332844454
(1080, 1920, 3)
Fps:0.901893990626057
(1080, 1920, 3)
Fps:0.9590014303953525
(1080

In [None]:
# Open the video capture
video_capture = cv2.VideoCapture('WIN_20230604_02_22_12_Pro.mp4')  # Use 0 for webcam or provide the path to a video file

# Define the codec and create VideoWriter object
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
out = cv2.VideoWriter('output.mp4', fourcc, video_capture.get(cv2.CAP_PROP_FPS), (1920,  1080))

while True:
    # Start timer
    start_time = time()
    
    # Read a frame from the video
    ret, frame = video_capture.read()

    if not ret:
        break

    # Detect faces in the frame using MTCNN
    faces = mtcnn_detector.detect_faces(frame)

    # Iterate over detected faces
    for face in faces:
        # Extract the face coordinates
        x, y, w, h = face['box']

        # Extract the face ROI from the frame
        face_roi = frame[y:y+h, x:x+w]

        # Preprocess the face ROI
        face_roi = cv2.cvtColor(face_roi, cv2.COLOR_BGR2RGB)
        face_roi = cv2.resize(face_roi, (224, 224))
        face_roi = face_roi.astype('float32')
        # mean, std = face_roi.mean(), face_roi.std()
        # face_roi = (face_roi - mean) / std

        # Convert the preprocessed face ROI to a 4D tensor
        face_tensor = np.expand_dims(face_roi, axis=0)

        # Generate embeddings using the FaceNet model
        face_embedding = facenet_model.embeddings(face_tensor)[0]

        # Compare the face embedding with stored embeddings
        distances = [cosine(face_embedding, emb) for emb in embeddings_array]
        min_distance = min(distances)
        min_distance_idx = np.argmin(distances)

        # Define a threshold for face recognition
        threshold = 0.75

        # Check if the minimum distance is below the threshold
        if min_distance < threshold:
            # Identify the recognized face
            recognized_face = embeddings_array[min_distance_idx]
            recognized_name = names[min_distance_idx]

            # Calculate the confidence level
            confidence = 100 - min_distance*100

            # Draw bounding box and label for the recognized face with confidence level
            label = f"{recognized_name} ({confidence:.2f})"
            cv2.rectangle(frame, (x, y), (x+w, y+h), (0, 255, 0), 2)
            cv2.putText(frame, label, (x, y-10), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (0, 255, 0), 2)
        else:
 
            # Draw bounding box and label for unknown face
            cv2.rectangle(frame, (x, y), (x+w, y+h), (0, 0, 255), 2)
            cv2.putText(frame, 'Unknown', (x, y-10), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (0, 0, 255), 2)

    
    # end timer
    time_taken = time() - start_time
    fps = 1/time_taken
    cv2.putText(frame, f'Fps: {round(fps, 3)}', (20, 40), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
    # print(f'Fps:{fps}')
    
    # Display the frame
    # cv2.imshow('Video', frame)
    out.write(frame)

    # Exit the loop if 'q' is pressed
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# Release the video capture and close all
video_capture.release()
out.release()