In [29]:
# import the necessary packages
from imutils.video import VideoStream
from tensorflow.keras.preprocessing.image import img_to_array
from tensorflow.keras.models import load_model
import numpy as np
import argparse
import imutils
import pickle
import time
import cv2
import os
from imutils.video import FPS
from imutils import paths
import shutil
import face_recognition

In [2]:
import tensorflow as tf
# Checking if GPU available
physical_devices = tf.config.experimental.list_physical_devices("GPU")
tf.config.experimental.set_memory_growth(physical_devices[0], True)

In [20]:
# Object Detection Prerequisites
prototxt = 'Object_Detection/caffe_models/MobileNetSSD_deploy.prototxt.txt'
model = 'Object_Detection/caffe_models/MobileNetSSD_deploy.caffemodel'
confidence = 0.2

# initialize the list of class labels MobileNet SSD was trained to detect
# and generate a set of bounding box colors for each class
CLASSES = ["background", "aeroplane", "bicycle", "bird", "boat", "bottle", "bus",
           "car", "cat", "chair", "cow", "diningtable",
           "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor"]

COLORS = np.random.uniform(0, 255, size=(len(CLASSES), 3))

# load our serialized model from disk
print("[INFO] loading model...")

# Reads a network model stored in Caffe framework's format.
# prototxt: path to .prototxt file with text description of network architecture.
# caffeModel: path to .caffemodel file with learned network.
object_net = cv2.dnn.readNetFromCaffe(prototxt, model)

[INFO] loading model...


In [21]:
# Face Recognition
encodings = "Face_Recognition/encodings.pickle"
display = 1
detection_method = 'hog'

# load the known faces and embeddings
print("[INFO] loading encodings...")
data = pickle.loads(open(encodings, "rb").read())

[INFO] loading encodings...


In [22]:
pre_trained_model = 'Face_Liveness/liveness.model'
le1 = 'Face_Liveness/le.pickle'
conf = 0.5
# load our serialized face detector from disk
print("[INFO] loading face detector...")
protoPath = 'Face_Liveness/caffe_models/deploy.prototxt.txt'
modelPath = 'Face_Liveness/caffe_models/res10_300x300_ssd_iter_140000.caffemodel'
liveness_net = cv2.dnn.readNetFromCaffe(protoPath, modelPath)

# load the liveness detector model and label encoder from disk
print("[INFO] loading liveness detector...")
model = load_model(pre_trained_model)

le = pickle.loads(open(le1, "rb").read())

[INFO] loading face detector...
[INFO] loading liveness detector...


In [33]:
def fake_or_real(frame):
    blob = cv2.dnn.blobFromImage(cv2.resize(frame, (300, 300)), 1.0,
        (300, 300), (104.0, 177.0, 123.0))

    # pass the blob through the network and obtain the detections and
    # predictions
    liveness_net.setInput(blob)
    detections = liveness_net.forward()

    # loop over the detections
    for i in range(0, detections.shape[2]):

        # extract the confidence (i.e., probability) associated with the
        # prediction
        confidence = detections[0, 0, i, 2]

        # filter out weak detections
        if confidence > conf:

            # compute the (x, y)-coordinates of the bounding box for
            # the face and extract the face ROI
            box = detections[0, 0, i, 3:7] * np.array([w, h, w, h])
            (startX, startY, endX, endY) = box.astype("int")

            # ensure the detected bounding box does fall outside the
            # dimensions of the frame
            startX = max(0, startX)
            startY = max(0, startY)
            endX = min(w, endX)
            endY = min(h, endY)

            # extract the face ROI and then preproces it in the exact
            # same manner as our training data
            face = frame[startY:endY, startX:endX]
            face = cv2.resize(face, (32, 32))
            face = face.astype("float") / 255.0
            face = img_to_array(face)
            face = np.expand_dims(face, axis=0)

            # pass the face ROI through the trained liveness detector
            # model to determine if the face is "real" or "fake"
            preds = model.predict(face)[0]
            j = np.argmax(preds)
            label = le.classes_[j]

            # draw the label and bounding box on the frame
            label = "{}: {:.4f}".format(label, preds[j])
            return label
    

In [39]:
# initialize the video stream, allow the cammera sensor to warmup,
# and initialize the FPS counter
print("[INFO] starting video stream...")
cap = cv2.VideoCapture(0)
time.sleep(2.0)
fps = FPS().start()

[INFO] starting video stream...


In [40]:
# loop over the frames from the video stream
while True:
    # resize the video stream window at a maximum width of 500 pixels
    ret, frame = cap.read()
    frame = imutils.resize(frame, width=1000)

    # grab the frame dimensions and convert it to a blob
    # Binary Large Object = BLOB
    (h, w) = frame.shape[:2]
    blob = cv2.dnn.blobFromImage(cv2.resize(frame, (300, 300)), 0.007843, (300, 300), 127.5)

    # pass the blob through the network and get the detections
    object_net.setInput(blob)
    detections = object_net.forward()

    objects_detected = set()

    # loop over the detections
    for i in np.arange(0, detections.shape[2]):
        # extract the probability of the prediction
        probability = detections[0, 0, i, 2]

        # filter out weak detections by ensuring that probability is
        # greater than the min probability
        if probability > confidence:
            # extract the index of the class label from the
            # 'detections', then compute the (x, y)-coordinates of
            # the bounding box for the object
            idx = int(detections[0, 0, i, 1])
            box = detections[0, 0, i, 3:7] * np.array([w, h, w, h])
            (startX, startY, endX, endY) = box.astype("int")

            # draw the prediction on the frame
            label = "{}: {:.2f}%".format(CLASSES[idx], probability * 100)
            objects_detected.add(label.split(':')[0])
            
            if label.split(':')[0]=='person':
                live_label = fake_or_real(frame)
                if live_label==None:
                    live_label=''
                rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
                # resize with aspect ratio
                rgb = imutils.resize(frame, width=750)
                r = frame.shape[1] / float(rgb.shape[1])

                # detect the (x, y)-coordinates of the bounding boxes
                # corresponding to each face in the input frame, then compute
                # the facial embeddings for each face
                boxes = face_recognition.face_locations(rgb,
                                                        model=detection_method)
                encodings = face_recognition.face_encodings(rgb, boxes)
                names = []

                # loop over the facial embeddings
                for encoding in encodings:

                    # attempt to match each face in the input image to our known encodings
                    matches = face_recognition.compare_faces(data["encodings"],
                                                             encoding)
                    name = "Unknown"

                    # check to see if we have found a match
                    if True in matches:

                        # find the indexes of all matched faces then initialize a
                        # dictionary to count the total number of times each face
                        # was matched
                        matchedIdxs = [i for (i, b) in enumerate(matches) if b]
                        counts = {}

                        # loop over the matched indexes and maintain a count for
                        # each recognized face face
                        for i in matchedIdxs:
                            name = data["names"][i]
                            counts[name] = counts.get(name, 0) + 1

                        # determine the recognized face with the largest number
                        # of votes (note: in the event of an unlikely tie Python
                        # will select first entry in the dictionary)
                        name = max(counts, key=counts.get)

                    # update the list of names
                    names.append(name)

                # loop over the recognized faces
                for ((top, right, bottom, left), name) in zip(boxes, names):
                    # rescale the face coordinates
                    top = int(top * r)
                    right = int(right * r)
                    bottom = int(bottom * r)
                    left = int(left * r)

                    # draw the predicted face name on the image
                    cv2.rectangle(frame, (left, top), (right, bottom),
                                  (0, 255, 0), 1)
                    y = top - 15 if top - 15 > 15 else top + 15
                    cv2.putText(frame, name+' '+live_label, (left, y), cv2.FONT_HERSHEY_SIMPLEX,
                                0.75, (0, 255, 0), 2)
            else:
                cv2.rectangle(frame, (startX, startY), (endX, endY), COLORS[idx], 2)
                y = startY - 15 if startY - 15 > 15 else startY + 15
                cv2.putText(frame, label, (startX, y), cv2.FONT_HERSHEY_SIMPLEX, 0.5, COLORS[idx], 2)

    if 'person' not in objects_detected:
        cv2.putText(frame, 'No Person Detected', (40, 40), cv2.FONT_HERSHEY_SIMPLEX, 1.5, (255, 255, 255), 2)
        
    # show the output frame
    cv2.imshow("Press q to quit", frame)
    key = cv2.waitKey(1) & 0xFF

    # if the 'q' key was pressed, break from the loop
    if key == ord("q"):
        break

    # update the FPS counter
    fps.update()

# stop the timer and display FPS information
fps.stop()
print("[INFO] elapsed time: {:.2f}".format(fps.elapsed()))
print("[INFO] approx. FPS: {:.2f}".format(fps.fps()))

# cleanup
cv2.destroyAllWindows()
cap.release()

[INFO] elapsed time: 75.21
[INFO] approx. FPS: 2.94


In [38]:
cv2.destroyAllWindows()
cap.release()