# COVID-19: Face Mask Detector with OpenCV, Pytorch and Deep Learning

In order to train a custom face mask detector, we need to break our project into two distinct phases, each with its own respective sub-steps
1. **Training**: Here we'll focus on loading our face mask detection dataset from disk, training a model (using Pytorch) on this dataset, and then serializing the face mas detector to disk.
2. **Deployment**: Once the face mask detector is trained, we can then move on to loading the mask detector, performing face detection, and then classifying each face as with_mask or without_mask

## Import all the necessary libraries

In [1]:
import os
import time
import imutils
import cv2
import torch
import numpy as np
import torch.nn.functional as F

from torch import nn, optim
from tqdm.notebook import tqdm
from torchvision import models, datasets, transforms

## Loading the dataset from the dataset folder

In [2]:
# define data directory
data_dir = 'dataset/'

# define train, test directory
train_dir = os.path.join(data_dir, 'train/')
test_dir = os.path.join(data_dir, 'test/')

# define transformations
transform = transforms.Compose([transforms.Resize(256),
                                transforms.CenterCrop(224),
                                transforms.ToTensor(),
                                transforms.Normalize([0.485, 0.456, 0.406],
                                                     [0.229, 0.224, 0.225])
                               ])

# load train and test data
train_data = datasets.ImageFolder(train_dir, transform=transform)
test_data = datasets.ImageFolder(test_dir, transform=transform)

# define dataloaders
train_loader = torch.utils.data.DataLoader(train_data, batch_size=10,
                                           shuffle = True)
test_loader = torch.utils.data.DataLoader(test_data, batch_size=10,
                                          shuffle = True)

# Print the number of pictures in the datasets
print(f"No of Training Images: {len(train_data)},",
      f"No of Testing Images: {len(test_data)}")


No of Training Images: 1176, No of Testing Images: 200


# Load the vgg16 model

In [3]:
# load the model
model = models.vgg16(pretrained=True)

# freeze the weights
for param in model.parameters():
    param.requires_grad = False
    
# Check if gpu is available
use_cuda = torch.cuda.is_available()

# no of parameters comming from last cnn layer
n_inputs = model.classifier[6].in_features

# Change the last layer according to our need
model.classifier[6] = nn.Linear(n_inputs, 2)

# if gpu is available then add model to gpu
if use_cuda:
    model = model.cuda()

# verify if the model architecture is as expected
print(model)

VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1

In [4]:
# Define loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters())

# Training the model

In [None]:
n_epochs = 15

def train(model):
    for e in range(n_epochs):
        train_loss = 0
        for images, labels in tqdm(train_loader):
            if use_cuda:
                images, labels = images.cuda(), labels.cuda()

            # Clear previous accumulated gradients
            optimizer.zero_grad()

            # forward pass
            output = model(images)

            # calculate loss
            loss = criterion(output, labels)

            # backward pass (backpropagation)
            loss.backward()

            # Update weights
            optimizer.step()

            # Update train_loss
            train_loss += loss.item()
            
        torch.save(model.state_dict(), 'model-1-acc-99.pt')
        print(f" Epochs: {e+1}/{n_epochs}, Train_loss : {train_loss / len(train_loader)}")
train(model)

# Testing the model

In [6]:
test_loss = 0.
correct = 0.
total = 0.

model.eval()

for batch_idx, (data, target) in tqdm(enumerate(test_loader)):
    
    if use_cuda:
        data, target = data.cuda(), target.cuda()
    
    # forward pass
    output = model(data)
    
    # calculate the loss
    loss = criterion(output, target)
    
    # update the averae test loss
    test_loss = test_loss + ((1 / (batch_idx + 1)) * (loss.data - test_loss))
    
    # convert the output probabilities to predicted class
    pred = output.data.max(1, keepdim=True)[1]
    
    # compare the prediction to true label
    correct += np.sum(np.squeeze(pred.eq(target.data.view_as(pred))).cpu().numpy())
    total += data.size(0)
    
print('Test Loss: {:.6f}\n'.format(test_loss))
print('\n Test Accuracy: %2d%% (%2d/%2d)' % (100. * correct / total, correct, total))

HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))


Test Loss: 0.021583


 Test Accuracy: 99% (199/200)


In [5]:
# Function to test custom images
from PIL import Image

def with_or_without_mask(image, model):
    input_img = Image.fromarray(image)
    preprocess = transforms.Compose([
        transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406],
                             [0.229, 0.224, 0.225])
    ])
    
    input_tensor = preprocess(input_img)
    input_batch = input_tensor.unsqueeze(0)
    
    # Move the input to gpu if available
    if use_cuda:
        input_batch = input_batch.cuda()
        
    with torch.no_grad():
        output = model(input_batch)
        
    prob, index = torch.max(F.softmax(output[0]), 0)
    
    # predicted class index and probability
    return prob, index

In [6]:
train_data.class_to_idx

{'with_mask': 0, 'without_mask': 1}

# Face Detector from Image

In [8]:
# loading our serialized face detector model from disk
print("Loading face detector model...")
prototxtPath = 'face_detector/deploy.prototxt'
weightsPath = 'face_detector/res10_300x300_ssd_iter_140000.caffemodel'

net = cv2.dnn.readNet(prototxtPath, weightsPath)

# load the face mask detector model from disk
print("Loading face mask detector model")
model.load_state_dict(torch.load('model-1-acc-99.pt'))

# load the input image from disk, clone it, and grab the image dimensions
image = cv2.imread('examples/multiple_face.jpg')
image = imutils.resize(image, width=400)
# cap = cv2.VideoCapture('http://192.168.225.24:8080/video')
# cap = cv2.VideoCapture(0)
# _, image = cap.read()
orig = image.copy()
(h, w) = image.shape[:2]

# construct a blob from the image
blob = cv2.dnn.blobFromImage(image, 1.0, (300, 300), (104.0, 177.0, 123.0))

# pass the blob through the network and obtain the face detections
print("Computing face detections...")
net.setInput(blob)
detections = net.forward()

# loop over the detections
for i in range(0, detections.shape[2]):
    # extract the confidence (i.e, probability) associated with the detection
    confidence = detections[0, 0, i, 2]
    
    # filter out weak detections by ensuring the confidence is greater
    # than the minimum confidence
    if confidence > 0.5:
        # compute the (x, y)-coordinates of the bounding box for the object
        box = detections[0, 0, i, 3:7]  * np.array([w, h, w, h])
        (startX, startY, endX, endY) = box.astype("int")
        
        # ensure the bounding boxes fall within the dimensions of the frame
        (startX, startY) = (max(0, startX), max(0, startY))
        (endX, endY) = (min(w-1, endX), min(h-1, endY))
        
        # preprocess the image and get the class and proba
        prob, class_ = with_or_without_mask(image[startY:endY, startX:endX], model)
        
        # determine the class label and color we'll use to draw the bounding
        # box and text
        label = "Mask" if class_ == 0 else "No Mask"
        color = (0, 255, 0) if label == "Mask" else (0, 0, 255)
        
        # include the probability in the label
        label = "{}: {:.2f}%".format(label, prob*100)
        
        # display the label and bounding box rectangle on the output frame
        cv2.putText(image, label, (startX, startY - 10),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.45, color, 2)
        cv2.rectangle(image, (startX, startY), (endX, endY), color, 2)

# show the output image
cv2.imshow("output", image)
cv2.waitKey(0)
cv2.destroyAllWindows()

Loading face detector model...
Loading face mask detector model
Computing face detections...




# Face Detector in a real time Video

In [9]:
def detect_and_predict(frame, faceNet, maskNet):
    # grab the dimension of the frame and then construct a blob from it
    (h, w) = frame.shape[:2]
    blob = cv2.dnn.blobFromImage(frame, 1.0, (300, 300),
                                 (104.0, 177.0, 123.0))
    
    # pass the blob through the network and obtain the face detections
    faceNet.setInput(blob)
    detections = faceNet.forward()
    
    # initialize our list of faces, their corresponding locations
    # and the list of predictions from our face mask network
    faces = []
    locs = []
    preds = []
    
    # loop over the detections
    for i in range(0, detections.shape[2]):
        # extract the confidence associated with the detections
        confidence = detections[0, 0, i, 2]
        
        # filter out weak detections by ensuring the confidence greater than
        # the minimum confidence
        if confidence > 0.7:
            # compute the (x, y)-coordinates of the bounding box for the object
            box = detections[0, 0, i, 3:7] * np.array([w, h, w, h])
            (startX, startY, endX, endY) = box.astype('int')
            
            # ensure the bounding boxex fall within the dimension of the frame
            (startX, startY) = (max(0, startX), max(0, startY))
            (endX, endY) = (min(w - 1, endX), min(h - 1, endY))
            
            # extract the face ROI and preprocess and predict
            face = frame[startY:endY, startX:endX]
            faces.append(face)
            locs.append((startX, startY, endX, endY))
            
    # only make a predictions if atleast one face was detected
    if len(faces) > 0:
        # for faster inference we'll make batch predictions on all the
        # faces at the same time rather than one-by-one prediction
        # in the above for loop
        for face in faces:
            pred = with_or_without_mask(face, maskNet)
            preds.append(pred)
    return (locs, preds)

In [None]:
# loading our serialized face detector model from disk
print("Loading face detector model...")
prototxtPath = 'face_detector/deploy.prototxt'
weightsPath = 'face_detector/res10_300x300_ssd_iter_140000.caffemodel'

faceNet = cv2.dnn.readNet(prototxtPath, weightsPath)

# load the face mask detector model from disk
model.load_state_dict(torch.load('model-1-acc-99.pt'))

# Starting video stream
# vs = cv2.VideoCapture('http://192.168.225.24:8080/video')
vs = cv2.VideoCapture(0)
# time.sleep(2.0)

# loop over the frames from the video stream
while True:
    
    # grab the frame from the threaded video stram and resize to have a
    # minimum of 400 px
    _, frame = vs.read()
#     frame = imutils.resize(frame, width=400)
    
    # detect faces in the frame and determine if they are wearing
    # a face mask or not
    (locs, preds) = detect_and_predict(frame, faceNet, model)
    
    # loop over the detected face locations and their corresponding locations
    for (box, pred) in zip(locs, preds):
        (startX, startY, endX, endY) = box
        (prob, clas) = pred
        
        # determine the class label and color we'll use to draw
        # the bounding box and text
        label = "Mask" if clas == 0 else "No Mask"
        color = (0, 255, 0) if label == "Mask" else (0, 0, 255)
        
        # include the probability in the label
        label = "{}: {:.2f}%".format(label, prob * 100)
        
        # display the label and bounding box rectangle on the original frame
        cv2.putText(frame, label, (startX, startY - 10),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.45, color, 2)
        cv2.rectangle(frame, (startX, startY), (endX, endY), color, 2)
    
    # show the output frame
    cv2.imshow('Frame', frame)
    key = cv2.waitKey(1) & 0xFF
    
    # if the `q` key is pressed, break from the loop
    if key == ord("q"):
        break
# do a bit of cleanup
cv2.destroyAllWindows()
vs.release()


Loading face detector model...
