In [33]:
%load_ext autoreload
%autoreload 2
import sys
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import cv2

import tensorflow as tf
from tensorflow.keras.preprocessing.image import load_img
from tensorflow.keras.preprocessing.image import img_to_array
from tensorflow.keras.preprocessing.image import array_to_img
from tensorflow.keras.preprocessing.image import ImageDataGenerator
#from google.colab import drive
#drive.mount('/content/gdrive/')
#sys.path.append('/content/gdrive/My Drive/')

#tf.debugging.set_log_device_placement(True)
print("Num GPUs Available: ", len(tf.config.experimental.list_physical_devices('GPU')))

%matplotlib inline
# edit this line to change the figure size
plt.rcParams['figure.figsize'] = (16.0, 10.0)
plt.rcParams['font.size'] = 16
# may be needed to avoid mulitply defined openmp libs
import os
os.environ['KMP_DUPLICATE_LIB_OK']='TRUE'

sys.path.append("..") # Adds higher directory to python modules path.
weights_folder = '../weights'
weights_file_name = 'model-best.h5'
weights_path = os.path.join(weights_folder, weights_file_name)

## Video settings
web_cam = True
video_path = '../data/videos/wear_masks_720p.mp4'

def get_video_capture():
    global web_cam, video_path
    
    return cv2.VideoCapture(video_path) if not web_cam else cv2.VideoCapture(0)

def resize_frame(frame):
    global web_cam, video_path
    
    return cv2.resize(frame, None,fx=0.5, fy=0.5, interpolation = cv2.INTER_LINEAR) if not web_cam else frame

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
Num GPUs Available:  0


### OpenCV - haarcascade_fullbody

In [35]:
# Assume the model works correctly at this point
# Try to do some detection now

# using haarcascades detector - seems not working well

import cv2
face_cascade = cv2.CascadeClassifier('../data/cascades/haarcascade_fullbody.xml')

# Initiate video capture for video file, here we are using the video file in which pedestrians would be detected
cap = get_video_capture()

# Loop once video is successfully loaded
while cap.isOpened():

    # Reading the each frame of the video 
    ret, frame = cap.read()

    # Resize the frame
    frame = resize_frame(frame)

    # Detects faces of different sizes in the input image 
    gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
    faces = face_cascade.detectMultiScale(gray, 1.1, 1) 
    
    # To draw a rectangle in each faces 
    for (x,y,w,h) in faces: 
        cv2.rectangle(frame,(x,y),(x+w,y+h),(0,0,255),2) 
        cv2.imshow('haarcascade_fullbody', frame)

    # Wait for Esc key to stop 
    if cv2.waitKey(33) == 27: 
        break

cap.release()
cv2.destroyAllWindows()

In [25]:
def non_max_suppression_fast(boxes, overlapThresh):
    # if there are no boxes, return an empty list
    if len(boxes) == 0:
        return []
    # if the bounding boxes integers, convert them to floats --
    # this is important since we'll be doing a bunch of divisions
    if boxes.dtype.kind == "i":
        boxes = boxes.astype("float")
    # initialize the list of picked indexes	
    pick = []
    # grab the coordinates of the bounding boxes
    x1 = boxes[:,0]
    y1 = boxes[:,1]
    x2 = boxes[:,2]
    y2 = boxes[:,3]
    # compute the area of the bounding boxes and sort the bounding
    # boxes by the bottom-right y-coordinate of the bounding box
    area = (x2 - x1 + 1) * (y2 - y1 + 1)
    idxs = np.argsort(y2)
    # keep looping while some indexes still remain in the indexes
    # list
    while len(idxs) > 0:
        # grab the last index in the indexes list and add the
        # index value to the list of picked indexes
        last = len(idxs) - 1
        i = idxs[last]
        pick.append(i)
        # find the largest (x, y) coordinates for the start of
        # the bounding box and the smallest (x, y) coordinates
        # for the end of the bounding box
        xx1 = np.maximum(x1[i], x1[idxs[:last]])
        yy1 = np.maximum(y1[i], y1[idxs[:last]])
        xx2 = np.minimum(x2[i], x2[idxs[:last]])
        yy2 = np.minimum(y2[i], y2[idxs[:last]])
        # compute the width and height of the bounding box
        w = np.maximum(0, xx2 - xx1 + 1)
        h = np.maximum(0, yy2 - yy1 + 1)
        # compute the ratio of overlap
        overlap = (w * h) / area[idxs[:last]]
        # delete all indexes from the index list that have
        idxs = np.delete(idxs, np.concatenate(([last],
            np.where(overlap > overlapThresh)[0])))
    # return only the bounding boxes that were picked using the
    # integer data type
    return boxes[pick].astype("int")

### OpenCV - Histrogram of gradident descriptors

In [36]:
# using Histrogram of gradident descriptors

import cv2
#video_path = '../samples/wear_masks_240p.mp4'
video_path = '../data/videos/wear_masks_720p.mp4'

# Initiate video capture for video file, here we are using the video file in which pedestrians would be detected
cap = get_video_capture()

# Initialized HOG descriptos
hog = cv2.HOGDescriptor()
hog.setSVMDetector(cv2.HOGDescriptor_getDefaultPeopleDetector())

# Loop once video is successfully loaded
while cap.isOpened():

    # Reading the each frame of the video 
    ret, frame = cap.read()

    # Resize the frame
    frame = frame = resize_frame(frame)
    
    # detect people in the video
    (faces, weights) = hog.detectMultiScale(frame, winStride=(4, 4), padding=(8, 8), scale=1.05)
    
    # draw the original bounding boxes
    for (x, y, w, h) in faces:
        cv2.rectangle(frame, (x, y), (x + w, y + h), (0, 0, 255), 2)
        
    # apply non-maxima suppression to the bounding boxes using a
    # fairly large overlap threshold to try to maintain overlapping
    # boxes that are still people
    faces = np.array([[x, y, x + w, y + h] for (x, y, w, h) in faces])
    pick = non_max_suppression_fast(faces, overlapThresh=0.65)
    
    # draw the final bounding boxes
    for (xA, yA, xB, yB) in pick:
        cv2.rectangle(frame, (xA, yA), (xB, yB), (0, 255, 0), 2)
    
    cv2.imshow('HOG', frame)

    # Wait for Esc key to stop 
    if cv2.waitKey(33) == 27: 
        break

cap.release()
cv2.destroyAllWindows()

### OpenCV - Cascade classifier


In [28]:
import cv2
#video_path = '../samples/wear_masks_240p.mp4'
video_path = '../data/videos/wear_masks_720p.mp4'
head_cascade = cv2.CascadeClassifier('../data/cascades/lbp_cascade_head.xml')
person_cascade = cv2.CascadeClassifier('../data/cascades/lbp_cascade_person.xml')

# Initiate video capture for video file, here we are using the video file in which pedestrians would be detected
cap = get_video_capture()

# Loop once video is successfully loaded
while cap.isOpened():

    # Reading the each frame of the video 
    ret, frame = cap.read()

    # Resize the frame
    frame = frame = resize_frame(frame)

    # Detects faces of different sizes in the input image 
    gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
    faces = person_cascade.detectMultiScale(gray, 1.1, 1) 
    
    # To draw a rectangle in each faces 
    for (x,y,w,h) in faces: 
        cv2.rectangle(frame,(x,y),(x+w,y+h),(0,0,255),2) 
        
    # apply non-maxima suppression to the bounding boxes using a
    # fairly large overlap threshold to try to maintain overlapping
    # boxes that are still people
    faces = np.array([[x, y, x + w, y + h] for (x, y, w, h) in faces])
    pick = non_max_suppression_fast(faces, overlapThresh=0.65)
    
    # draw the final bounding boxes
    for (xA, yA, xB, yB) in pick:
        cv2.rectangle(frame, (xA, yA), (xB, yB), (0, 255, 0), 2)
    
    cv2.imshow('Cascade Classifier', frame)
    # Wait for Esc key to stop 
    if cv2.waitKey(33) == 27: 
        break

cap.release()
cv2.destroyAllWindows()

### YoloV2 tiny

In [32]:
from detect.detectors import yolov2_simple_cnn

yolo_weights = 'C:/Users/bones/uw/dl/mask-detector/YoloKerasFaceDetection/pretrain/yolov2_tiny-face.h5'
masks_detector = yolov2_simple_cnn(weights_path, yolo_weights, object_detector_confidence=0.8)


# Initiate video capture for video file, here we are using the video file in which pedestrians would be detected
cap = cap = get_video_capture()

font = cv2.FONT_HERSHEY_SIMPLEX
fontScale = 1
fontColor = (255,255,255)
lineType = 2
frame_n = 0
frame_rate = 12
# Loop once video is successfully loaded
while cap.isOpened():

    # Reading the each frame of the video 
    ret, frame = cap.read()

    # Resize the frame
    frame = resize_frame(frame)
        
    if frame_n % frame_rate == 0:
        #frame_to_detect = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        frame_to_detect = frame
        results, faces_detector_output = masks_detector.detect(frame_to_detect, verbose=0)
        
    for p, data in zip(results, faces_detector_output):
        x, y, w, h = data['box']
        x1 = x + w
        y1 = y + h
        
        mask = p == 0
        color = (0, 255, 0) if mask else (255, 0, 0)
        cv2.rectangle(img=frame, pt1=(x, y), pt2=(x1, y1), color=color)
        
        if 'keypoints' in data:
            r = 5
            cv2.circle(frame, data['keypoints']['left_eye'], r, (0, 0, 255))
            cv2.circle(frame, data['keypoints']['right_eye'], r, (0, 0, 255))
            cv2.circle(frame, data['keypoints']['nose'], r, (0, 0, 255))
            cv2.circle(frame, data['keypoints']['mouth_left'], r, (0, 0, 255))
            cv2.circle(frame, data['keypoints']['mouth_right'], r, (0, 0, 255))

        cv2.putText(frame, 'Mask' if mask else 'Not mask', 
            (x, y), 
            font, 
            fontScale,
            fontColor,
            lineType)
        
    cv2.imshow('Yolov2', frame)
    frame_n += 1
    
    # Wait for Esc key to stop 
    if cv2.waitKey(33) == 27: 
        break

cap.release()
cv2.destroyAllWindows()



### Yolov3 tiny - darknet

In [31]:
import cv2
from detect.yolo.darknet_model import DarknetModel

configPath = "../../darknet-ab/cfg/yolov3-tiny-masks-small.cfg"
weightPath = "../../darknet-ab/backup-20000/yolov3-tiny-masks-small_1000.weights"
metaPath = "faces-20000.data"

masks_detector = DarknetModel(configPath, weightPath, metaPath)

# Initiate video capture for video file, here we are using the video file in which pedestrians would be detected
cap = get_video_capture()

font = cv2.FONT_HERSHEY_SIMPLEX
fontScale = 1
fontColor = (255,255,255)
lineType = 2
frame_n = 0
frame_rate = 12
# Loop once video is successfully loaded
while cap.isOpened():

    # Reading the each frame of the video 
    ret, frame = cap.read()

    # Resize the frame
    frame = resize_frame(frame)
        
    if frame_n % frame_rate == 0:
        frame_to_detect = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        results, faces_detector_output = masks_detector.detect(frame_to_detect, verbose=0)

    for p, data in zip(results, faces_detector_output):
        x, y, w, h = data['box']
        x1 = x + w
        y1 = y + h
        
        mask = p == 0
        color = (0, 255, 0) if mask else (255, 0, 0)
        cv2.rectangle(img=frame, pt1=(x, y), pt2=(x1, y1), color=color)
        
        if 'keypoints' in data:
            r = 5
            cv2.circle(frame, data['keypoints']['left_eye'], r, (0, 0, 255))
            cv2.circle(frame, data['keypoints']['right_eye'], r, (0, 0, 255))
            cv2.circle(frame, data['keypoints']['nose'], r, (0, 0, 255))
            cv2.circle(frame, data['keypoints']['mouth_left'], r, (0, 0, 255))
            cv2.circle(frame, data['keypoints']['mouth_right'], r, (0, 0, 255))

        cv2.putText(frame, 'Mask' if mask else 'Not mask', 
            (x, y), 
            font, 
            fontScale,
            fontColor,
            lineType)
        
    cv2.imshow('Yolov3', frame)
    frame_n += 1
    
    # Wait for Esc key to stop 
    if cv2.waitKey(33) == 27: 
        break

cap.release()
cv2.destroyAllWindows()

### Using deep learning for Face Detection

Pretrained model: https://github.com/ipazc/mtcnn

`pip install mtcnn`

In [37]:
from detect.detectors import mtcnn_simple_cnn

#weights_file_name = 'model-best.h5'
weights_file_name = 'prev-model.h5'
weights_path = os.path.join(weights_folder, weights_file_name)
masks_detector = mtcnn_simple_cnn(weights_path, object_detector_confidence=0.8)


# Initiate video capture for video file, here we are using the video file in which pedestrians would be detected
#cap = cv2.VideoCapture(video_path)
cap = get_video_capture()

font = cv2.FONT_HERSHEY_SIMPLEX
fontScale = 1
fontColor = (255,255,255)
lineType = 2
frame_n = 0
frame_rate = 12
# Loop once video is successfully loaded
while cap.isOpened():

    # Reading the each frame of the video 
    ret, frame = cap.read()

    # Resize the frame
    frame = resize_frame(frame)
        
    if frame_n % frame_rate == 0:
        frame_to_detect = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        results, faces_detector_output = masks_detector.detect(frame_to_detect, verbose=0)

    for p, data in zip(results, faces_detector_output):
        #print(data['confidence'])
        x, y, w, h = data['box']
        x1 = x + w
        y1 = y + h
        mask = p == 0
        color = (0, 255, 0) if mask else (255, 0, 0)
        cv2.rectangle(img=frame, pt1=(x, y), pt2=(x1, y1), color=color)
        
        if 'keypoints' in data:
            r = 5
            cv2.circle(frame, data['keypoints']['left_eye'], r, (0, 0, 255))
            cv2.circle(frame, data['keypoints']['right_eye'], r, (0, 0, 255))
            cv2.circle(frame, data['keypoints']['nose'], r, (0, 0, 255))
            cv2.circle(frame, data['keypoints']['mouth_left'], r, (0, 0, 255))
            cv2.circle(frame, data['keypoints']['mouth_right'], r, (0, 0, 255))

        cv2.putText(frame, 'Mask' if mask else 'Not mask', 
            (x, y), 
            font, 
            fontScale,
            fontColor,
            lineType)
        
    cv2.imshow('MTCNN + CNN', frame)
    frame_n += 1
    
    # Wait for Esc key to stop 
    if cv2.waitKey(33) == 27: 
        break

cap.release()
cv2.destroyAllWindows()