In [1]:
import cv2 as cv
import numpy as np
import torch
import os
import torchvision
import torchvision.transforms as transforms

In [2]:
net = cv.dnn.readNet("./config/yolov3.weights", "./config/yolov3.cfg")
classes = []
with open("./data/coco.names", "r") as f:
    classes = [line.strip() for line in f.readlines()]
layer_names = net.getLayerNames()
output_layers = [layer_names[i[0] - 1] for i in net.getUnconnectedOutLayers()]

In [3]:
labels_dict={0:'MASK',1:'NO MASK'}
color_dict={0:(0,255,0),1:(0,0,255)}
label_test = np.array([0])

In [4]:
import torch.nn as nn
import torch.nn.functional as F

class ConvNet(nn.Module):
    def __init__(self):
        super(ConvNet, self).__init__()
        self.conv1 = nn.Conv2d(in_channels=3, out_channels=200, kernel_size=(3,3))
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(in_channels=200, out_channels=100, kernel_size=(2,2))
        self.Dropout = nn.Dropout(0.5)
        self.fc1 = nn.Linear(in_features=24*24*100, out_features=50)
        self.fc2 = nn.Linear(in_features=50, out_features=2)

    def forward(self, x):
        x = F.relu(self.conv1(x)) #98*98*200
        x = self.pool(x) #49*49*200
        x = F.relu(self.conv2(x)) #48*48*100
        x = self.pool(x) #24*24*100
        x = self.Dropout(x)
        x = x.view(-1, 24 * 24 * 100)
        x = F.relu(self.fc1(x))
        x = F.softmax(self.fc2(x))
        return x

convNet = ConvNet()

In [5]:
model = torch.load('./config/model95.pth', map_location='cpu')
model.eval()

ConvNet(
  (conv1): Conv2d(3, 200, kernel_size=(3, 3), stride=(1, 1))
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv2d(200, 100, kernel_size=(2, 2), stride=(1, 1))
  (Dropout): Dropout(p=0.5, inplace=False)
  (fc1): Linear(in_features=57600, out_features=50, bias=True)
  (fc2): Linear(in_features=50, out_features=2, bias=True)
)

In [27]:
# Loading image
cap = cv.VideoCapture('./Videos/Kanchan_WithoutMask.mp4')
print('Video Capturing flag =',cap.isOpened())
while True :
    _, frame = cap.read()
    height, width, channels = frame.shape
    
    # Detecting objects
    blob = cv.dnn.blobFromImage(frame, 0.00392, (416, 416), (0, 0, 0), True, crop=False)
    net.setInput(blob)
    outs = net.forward(output_layers)
    class_ids = []
    confidences = []
    boxes = []
    for out in outs:
        for detection in out:
            scores = detection[5:]
            class_id = np.argmax(scores)
            confidence = scores[class_id]
            if confidence > 0.5 and class_id == 0:
                # Object detected
                center_x = int(detection[0] * width)
                center_y = int(detection[1] * height)
                w = int(detection[2] * width)
                h = int(detection[3] * height)
                # Rectangle coordinates
                x = int(center_x - w / 2)
                y = int(center_y - h / 2)
                boxes.append([x, y, w, h])
                confidences.append(float(confidence))
                class_ids.append(class_id)
    indexes = cv.dnn.NMSBoxes(boxes, confidences, 0.5, 0.4)
    font = cv.FONT_HERSHEY_PLAIN
    for i in range(len(boxes)):
        if i in indexes:
            x, y, w, h = boxes[i]
            label = str(classes[class_ids[i]])
            color = (255, 0, 0)
            cv.rectangle(frame, (x, y), (x + w, y + h), color, 2)
            cv.putText(frame, label, (x, y + 30), font, 3, color, 3)
            faceCascade = cv.CascadeClassifier('./config/haarcascade_frontalface_default.xml')
            gray = cv.cvtColor(frame, cv.COLOR_BGR2GRAY)
            faces = faceCascade.detectMultiScale(gray,
                                         scaleFactor=1.1,
                                         minNeighbors=5,
                                         minSize=(60, 60),
                                         flags=cv.CASCADE_SCALE_IMAGE)
            for (x, y ,w, h) in faces:
                
                face_img = frame[y:y+w, x:x+w]
                resized = cv.resize(face_img,(100,100))
                filename = './Test/testing/test_image.jpg'
                cv.imwrite(filename, resized)
                transform = transforms.Compose([
                        transforms.Resize(256),
                        transforms.CenterCrop(100),
                        transforms.ToTensor(),
                        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
                    ])
                test_dataset = torchvision.datasets.ImageFolder('./Test', transform=transform)
                testloader = torch.utils.data.DataLoader(test_dataset, batch_size=32, shuffle=True, num_workers=0)
                with torch.no_grad():
                    for i, data in enumerate(testloader, 0):
                        images = data[0]
                        outputs = model(images)
                        _, predicted = torch.max(outputs.data, 1)
                        predicted = predicted.numpy()
                        if np.sum(predicted == label_test) == 1:
                            cv.rectangle(frame, (x, y), (x + w, y + h),(0, 255, 0), 2)
                            cv.putText(frame, "Face:With Mask", (x, y + 2), font, 3, (0, 255, 0), 2)
                        else :
                            print('here')
                            cv.rectangle(frame, (x, y), (x + w, y + h),(0, 0, 255), 2)
                            cv.putText(frame, "Face:Without Mask", (x, y + 2), font, 3, (0, 0, 255), 2)
                        break
                
    cv.imshow("Image", frame)
    key = cv.waitKey(1)
    if key == 27:
        break
        
cap.release()
cv.destroyAllWindows()

Video Capturing flag = True




here
here
here


In [None]:
cap.release()

In [None]:
type(label)