In [23]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader
import torch.optim as optim

import torchvision
from torchvision import models
import torchvision.transforms as transforms
from torchvision.datasets import ImageFolder, DatasetFolder

import os
import numpy as np
import cv2
from tqdm.notebook import tqdm

In [24]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [25]:
image_transforms = transforms.Compose([transforms.Resize((28,28)),
                                      transforms.ToTensor(),
                                      transforms.Normalize((0.5,0.5,0.5),(0.5,0.5,0.5))])

In [26]:
data = {'train': ImageFolder(root = './Train', transform = image_transforms),
       'test': ImageFolder(root = './Test', transform = image_transforms),
       'validation': ImageFolder(root = './Validation', transform = image_transforms)}

In [27]:
data['train'].class_to_idx, data['test'].class_to_idx, data['validation'].class_to_idx

({'WithMask': 0, 'WithoutMask': 1},
 {'WithMask': 0, 'WithoutMask': 1},
 {'WithMask': 0, 'WithoutMask': 1})

In [29]:
dataloaders = {'train': DataLoader(data['train'], batch_size = 100, shuffle = True),
              'test': DataLoader(data['test'], batch_size = 20, shuffle = True),
              'validation': DataLoader(data['validation'], batch_size = 40, shuffle = True)}

## ResNet

In [32]:
pretrained_resnet = models.resnet50(pretrained=True)
pretrained_resnet.fc = nn.Linear(pretrained_resnet.fc.in_features, 2)
pretrained_resnet.to(device)

In [35]:
optimizer = optim.Adam(pretrained_resnet.parameters(), lr=0.001)
criterion = nn.CrossEntropyLoss()
train_loss = []
validation_loss = []

In [None]:
# Training Loop
epochs = 1
for epoch in tqdm(range(1,epochs+1)):
        for image, label in dataloaders['train']:
            image, label = image.to(device), label.to(device)
            
            pretrained_resnet.zero_grad()
            prediction = pretrained_resnet(image)
            loss = criterion(prediction, label)
            loss.backward()
            optimizer.step()
        print("EPOCH", epoch)
        print('training_loss: ', loss)
        train_loss.append(loss)

In [37]:
correct, total = 0, 0

for image, label in dataloaders['test']:
    image, label = image.to(device), label.to(device)

    prediction = pretrained_resnet(image)
    #loss = loss_fun(prediction, label)
    #print(prediction)
    for k in range(len(prediction)):
        #print(k)
        #print(prediction)
        #print(label)
        if torch.argmax(prediction[k]) == label[k]:
            correct+=1
            #print(prediction[k],label[k])
        total += 1

#validation_loss.append(loss)    
#print('validation_loss: ', loss)
print('accuracy: ', round(correct/total, 3))
print('correct ', correct)
print('total', total)

accuracy:  0.983
correct  975
total 992


In [40]:
transformations = transforms.Compose([transforms.ToPILImage(),transforms.Resize((28,28)),
                                      transforms.ToTensor(),transforms.Normalize((0.5,0.5,0.5),(0.5,0.5,0.5))])
labels = ['Mask', 'No Mask']
pretrained_resnet.eval()

### OpenCV DNN Method

In [48]:
import imutils
import cv2
labels = ['Mask', 'No Mask']

In [None]:
modelFile = "./cv2dnn/res10_300x300_ssd_iter_140000.caffemodel"
configFile = "./cv2dnn/deploy.prototxt.txt"
net = cv2.dnn.readNetFromCaffe(configFile, modelFile)
video_capture = cv2.VideoCapture(0)

while True:
    # Capture frame-by-frame
    ret, frame = video_capture.read()
    frame = imutils.resize(frame, width=750)

    # grab the frame dimensions and convert it to a blob
    (h, w) = frame.shape[:2]
    blob = cv2.dnn.blobFromImage(cv2.resize(frame, (300, 300)), 1.0,
        (300, 300), (104.0, 177.0, 123.0))


    net.setInput(blob)
    detections = net.forward()

    # loop over the detections
    for i in range(0, detections.shape[2]):
        confidence = detections[0, 0, i, 2]

        # filter out weak detections by ensuring the `confidence` is
        if confidence < 0.5:
            continue

        # compute the (x, y)-coordinates of the bounding box for the
        # object
        box = detections[0, 0, i, 3:7] * np.array([w, h, w, h])
        (startX, startY, endX, endY) = box.astype("int")
        face_img = frame[startY:startY+endY,startX:startX+endX]
        # draw the bounding box of the face along with the associated
        # probability
        
        output = pretrained_resnet(transformations(face_img).unsqueeze(0).to(device))
        predicted = torch.argmax(output
                                )
        #text = "{:.2f}%".format(confidence * 100)
        y = startY - 10 if startY - 10 > 10 else startY + 10
        cv2.rectangle(frame, (startX, startY), (endX, endY),
            (0, 0, 255), 2)
        cv2.putText(frame, labels[predicted], (startX, y),
            cv2.FONT_HERSHEY_SIMPLEX, 0.45, (0, 0, 255), 2)

    # show the output frame
    cv2.imshow("Frame", frame)
    key = cv2.waitKey(1) & 0xFF

    # if the `q` key was pressed, break from the loop
    if key == ord("q"):
        break

# do a bit of cleanup
cv2.destroyAllWindows()
video_capture.release()

### Open CV Haarcascade method

In [45]:
import cv2
import sys
labels = ['Mask', 'No Mask']
#cascPath = sys.argv[1]
face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml')
video_capture = cv2.VideoCapture(0)



while True:
    # Capture frame-by-frame
    ret, frame = video_capture.read()

    face_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    
    
    
    faces = face_cascade.detectMultiScale(
        face_frame,
        scaleFactor=1.1,
        minNeighbors=5,
        minSize=(30, 30),
        flags=cv2.CASCADE_SCALE_IMAGE
    )
    

    # Draw a rectangle around the faces
    for (x, y, w, h) in faces:
        cv2.rectangle(frame, (x, y), (x+w, y+h), (0, 255, 0), 2)
        
        # Predicting mask label
        face_img = frame[y:y+h,x:x+w]
        output = pretrained_resnet(transformations(face_img).unsqueeze(0).to(device))
        predicted = torch.argmax(output)
        #print(output)
        cv2.putText(frame,labels[predicted],(x,y),cv2.FONT_HERSHEY_SIMPLEX,1, (0,0,255),2)
    # Display the resulting frame
    cv2.imshow('Video', frame)
    
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# When everything is done, release the capture
video_capture.release()
cv2.destroyAllWindows()