In [2]:
import cv2
import numpy as np
import albumentations as A
import torch
import torchvision

model = torch.load('modelpycovid.pth')
model.eval()

FasterRCNN(
  (transform): GeneralizedRCNNTransform(
      Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
      Resize(min_size=(800,), max_size=1333, mode='bilinear')
  )
  (backbone): BackboneWithFPN(
    (body): IntermediateLayerGetter(
      (0): ConvBNActivation(
        (0): Conv2d(3, 16, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
        (1): FrozenBatchNorm2d(16, eps=1e-05)
        (2): Hardswish()
      )
      (1): InvertedResidual(
        (block): Sequential(
          (0): ConvBNActivation(
            (0): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=16, bias=False)
            (1): FrozenBatchNorm2d(16, eps=1e-05)
            (2): ReLU(inplace=True)
          )
          (1): ConvBNActivation(
            (0): Conv2d(16, 16, kernel_size=(1, 1), stride=(1, 1), bias=False)
            (1): FrozenBatchNorm2d(16, eps=1e-05)
            (2): Identity()
          )
        )
      )
      (2): InvertedResidual(


In [3]:
transform = A.Compose(
    [   A.Resize(300,300),
        #A.CenterCrop(height=224, width=224),
        #A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225))
        ])

In [4]:
from torchvision.ops import nms

def decode_output(output):
    'convert tensors to numpy arrays'
    bbs = output['boxes'].cpu().detach().numpy().astype(np.uint32)
    labels = np.array([target2label[i] for i in output['labels'].cpu().detach().numpy()])
    confs = output['scores'].cpu().detach().numpy()
    ixs = nms(torch.tensor(bbs.astype(np.float32)), torch.tensor(confs), 0.05)
    bbs, confs, labels = [tensor[ixs] for tensor in [bbs, confs, labels]]

    if len(ixs) == 1:
        bbs, confs, labels = [np.array([tensor]) for tensor in [bbs, confs, labels]]
    return bbs.tolist(), confs.tolist(), labels.tolist()

In [5]:
target2label = {0: 'background',
 1: 'With Mask',
 2: 'Without Mask',
 3: ''}

target2label

{0: 'background', 1: 'With Mask', 2: 'Without Mask', 3: ''}

In [6]:
cap = cv2.VideoCapture(0)

while True:
    model.eval()
    ret,frame = cap.read()
    res = cv2.resize(frame, dsize=(300, 300), interpolation=cv2.INTER_CUBIC)
    image = cv2.cvtColor(res, cv2.COLOR_BGR2RGB)
    augmented_image = transform(image=image)['image']
    expand_image = np.expand_dims(augmented_image, axis=0)
    torch_image = torch.tensor(expand_image/255).permute(0,3,1,2).to('cuda').float()
    outputs = model(torch_image)
    for ix, output in enumerate(outputs):
        bbs, confs, labels = decode_output(output)
        if labels != []:
            for i in range(len(labels)):
                if confs[i] >= 0.3:
                    cv2.rectangle(res, (bbs[i][0], bbs[i][1]), (bbs[i][2], bbs[i][3]), (0, 225, 255), 2)
                    cv2.putText(res, labels[i], (bbs[i][0], bbs[i][1]-10), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (0,255,255), 2)
    cv2.imshow('video face detect', res)
                
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break
        
cap.release()
cv2.destroyAllWindows()