In [1]:
import os, sys
import cv2
import time
import imutils
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing.image import img_to_array
from tensorflow.keras.applications.mobilenet_v2 import preprocess_input

physical_devices = tf.config.experimental.list_physical_devices('GPU')
config = tf.config.experimental.set_memory_growth(physical_devices[0], True)
print(f'Running on Python {sys.version}, Tensorflow {tf.__version__}, OpenCV {cv2.__version__}')

Running on Python 3.9.10 (tags/v3.9.10:f2f3f53, Jan 17 2022, 15:14:21) [MSC v.1929 64 bit (AMD64)], Tensorflow 2.8.0-rc1, OpenCV 4.5.5


In [2]:
print('Loading models...')
DNN = "CAFFE"  # Single Shot Multi-box Detector with ResNet-10 (SSD), TF for 8 bit mobile, or YOLO V3
assert DNN in ['TF', 'CAFFE', 'YOLO'], f'invalid DNN! Got: {DNN}'
if DNN == "CAFFE":
    modelFile = "face_detector/res10_300x300_ssd_iter_140000_fp16.caffemodel"
    configFile = "face_detector/deploy.prototxt"
    face_detector = cv2.dnn.readNetFromCaffe(configFile, modelFile)
elif DNN == 'TF':
    modelFile = "face_detector/opencv_face_detector_uint8.pb"
    configFile = "face_detector/opencv_face_detector.pbtxt"
    face_detector = cv2.dnn.readNetFromTensorflow(modelFile, configFile)
else:# YOLOv3 dont work yet
    configFile = "face_detector/yolov3.cfg"
    modelFile = "face_detector/yolov3-wider_16000.weights"
    face_detector = cv2.dnn.readNetFromDarknet(configFile, modelFile)

print(f'Face detection model ({DNN}) loaded. Loading face mask model...')

Loading models...
Face detection model (CAFFE) loaded. Loading face mask model...


In [3]:
model = load_model('MobileNetV2 Full mask')
model_img_height, model_img_width = model.input_shape[1:3]  # The dimensions that the model is trained on
model.summary()
print('All models loaded.')

Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_2 (InputLayer)        [(None, 224, 224, 3)]     0         
                                                                 
 mobilenetv2_1.00_224 (Funct  (None, 7, 7, 1280)       2257984   
 ional)                                                          
                                                                 
 average_pooling2d (AverageP  (None, 1, 1, 1280)       0         
 ooling2D)                                                       
                                                                 
 flatten (Flatten)           (None, 1280)              0         
                                                                 
 dense (Dense)               (None, 128)               163968    
                                                                 
 activation (Activation)     (None, 128)               0     

In [4]:
# TODO: NMS
def detect_and_predict_mask(frame, faceNet, maskNet):
    (img_height, img_width) = frame.shape[:2]
    blob = cv2.dnn.blobFromImage(frame, 1.0)  # no cropping as face detector sucks at cropped pics
    faceNet.setInput(blob)
    detections = faceNet.forward()
    locs, preds = [], []
    for i in range(0, detections.shape[2]):  # loop through each face detected
        # extract the confidence of the detection
        confidence = detections[0, 0, i, 2]
        if confidence > 0.5:
            # compute the coordinates of the bounding box
            box = detections[0, 0, i, 3:7] * np.array([img_width, img_height, img_width, img_height])
            (startX, startY, endX, endY) = box.astype("int")

            # ensure the bounding boxes fall within the dimensions of the frame
            (startX, startY) = (max(0, startX), max(0, startY))
            (endX, endY) = (min(img_width - 1, endX), min(img_height - 1, endY))

            # extract the face ROI, convert it from BGR to RGB channel resize it to model dimensions, and preprocess it
            face = frame[startY:endY, startX:endX]
            face = cv2.resize(cv2.cvtColor(face, cv2.COLOR_BGR2RGB), (model_img_height, model_img_width))
            face = np.expand_dims(preprocess_input(img_to_array(face)), axis=0)
            locs.append((startX, startY, endX, endY))
            preds.append(tf.nn.softmax(maskNet.predict(face))[0])  # softmax here as model does not have due to using logit for loss
    return locs, preds

In [5]:
stream = cv2.VideoCapture('test/Free Stock Footage (People wearing face mask, Empty Street, Covid19).mp4')  # src=0 is webcam
# stream = cv2.VideoCapture(0)  # src=0 is webcam
# fourcc = cv2.VideoWriter_fourcc(*'XVID')  # output dont work yet
# out = cv2.VideoWriter('output.avi', fourcc, 20.0, (650, 1155))  # output dont work yet
prev_frame_time, new_frame_time = 0, 0  # for FPS calculation
while stream.isOpened():
    has_frame, frame = stream.read()
    frame = imutils.resize(frame, width=650)
    if not has_frame:
        print('Reached end of stream')
        if cv2.waitKey(0) & 0xFF == ord('q'):
            break
    else:
        locs, preds = detect_and_predict_mask(frame=frame, faceNet=face_detector, maskNet=model)

        for (box, pred) in zip(locs, preds):  # loop over the detected face locations and their corresponding locations
            # unpack the bounding box and predictions
            (startX, startY, endX, endY) = box
            (noMask, Mask) = pred
            label = "Mask On" if Mask > noMask else "No Mask"
            color = (0, 255*float(max(Mask, noMask)), 255*(1-float(max(Mask, noMask)))) if label == "Mask On" else (0, 255*(1-float(max(Mask, noMask))), 255*float(max(Mask, noMask)))
            label = f"{label}: {max(Mask, noMask) * 100:.3f}%"  # confidence for mask detector

            # display the label and bounding box rectangle on the output frame
            cv2.putText(frame, label, (startX, startY - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.45, color, 1)
            cv2.rectangle(frame, (startX, startY), (endX, endY), color, 2)
        new_frame_time = time.time()
        fps = str(int(1/(new_frame_time-prev_frame_time)))
        prev_frame_time = new_frame_time
        cv2.putText(frame, fps, (7, 70), cv2.FONT_HERSHEY_SIMPLEX, 3, (100, 255, 0), 3, cv2.LINE_AA)
        # out.write(frame)  # output dont work yet
        cv2.imshow("Frame", frame)
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

stream.release()
cv2.destroyAllWindows()