In [1]:
import numpy as np
import cv2
import os
from imutils.video import VideoStream
import imutils
import torch
from PIL import Image
import torchvision.transforms as T

In [2]:
prototxtPath = 'deploy.prototxt.txt'
weightsPath = 'res10_300x300_ssd_iter_140000.caffemodel'

faceNet = cv2.dnn.readNet(prototxtPath,weightsPath)
maskNet = model = torch.load('oct.pt',map_location='cpu')

In [3]:
test_transforms = T.Compose([T.Resize((224,224)),
                             T.ToTensor(),
                            ])

In [4]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
device

device(type='cpu')

In [5]:
def predict_mask(frame):
    (h,w) = frame.shape[0:2]
    
    blob = cv2.dnn.blobFromImage(image= frame,
                             scalefactor= 1.0,
                             size= (300,300),
                             mean= (104.0,177.0,123.0))
    
    faceNet.setInput(blob)
    detections = faceNet.forward()
    
    image = frame.copy()

    #loop over the detections
    for i in range(detections.shape[2]):

        confidence = detections[0,0,i,2]

        if confidence>0.5:

            #we need the X,Y coordinates as integers
            box = detections[0,0,i,3:7]*np.array([w,h,w,h])
            (startX,startY,endX,endY) = box.astype('int')

            #to ensure the bounding boxes fall within the dimensions of the frame
            (startX,startY) = ( max(0,startX), max(0,startY))
            (endX,endY) = (min(w-1,endX), min(h-1,endY))

            #extract the face ROI, convert it from BGR to RGB channel, resize it to 224,224 and preprocess it
            face = image[startY:endY, startX:endX]
            face = cv2.cvtColor(face,cv2.COLOR_BGR2RGB)

            maskNet.eval()

            face = Image.fromarray(face)
            face = test_transforms(face).float()
            face = torch.autograd.Variable(face, requires_grad=True)
            face = face.unsqueeze(0)
            output = maskNet(face)
            conf, predicted = torch.max(output.data, 1)
            mask = predicted.item()

            #determine the class label and color we will use to draw the bounding box and text
            label='Mask' if mask == 0 else 'No Mask'

            color= (0,255,0) if label=='Mask' else (0,0,255)

            #display the label and bounding boxes
            cv2.putText(image,label,(startX,startY-10),cv2.FONT_HERSHEY_SIMPLEX,0.8,color,2)
            final = cv2.rectangle(image,(startX,startY),(endX,endY),color,2)
            return final
        
        
        else:
            return frame
    
        
        

In [6]:
vs= VideoStream(src=0).start()

while True:
    #grab the frame from the threaded video stream and resize it
    #to have a maximum width of 400 pixels
    frame=vs.read()
    frame=imutils.resize(frame,width=400)
    
    frame = predict_mask(frame)
    
    #show the output frame
    cv2.imshow("Frame",frame)
    key=cv2.waitKey(1) & 0xFF
    
    if key==ord('q'):
        break
        
cv2.destroyAllWindows()
vs.stop()