In [34]:
# import all the libraries we need
import urllib
import pandas as pd
import numpy as np
import cv2
import matplotlib.pyplot as plt
import torch
import torchvision
import torchvision.transforms as transforms
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torch.utils.data import DataLoader, Dataset
import os 
from PIL import Image
import random
import xml.etree.ElementTree as ET
import time
import requests
from functions import read_annot,single_img_predict,get_labels_from_dataset,get_labels_and_predictions, compute_mape

In [35]:
# Setting up GPU/CPU device
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
device

device(type='cpu')

In [36]:
#Loading the model
model = torch.load('torch_model.pth',map_location =device)
model.eval()

FasterRCNN(
  (transform): GeneralizedRCNNTransform(
      Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
      Resize(min_size=(800,), max_size=1333, mode='bilinear')
  )
  (backbone): BackboneWithFPN(
    (body): IntermediateLayerGetter(
      (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
      (bn1): FrozenBatchNorm2d(64, eps=0.0)
      (relu): ReLU(inplace=True)
      (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
      (layer1): Sequential(
        (0): Bottleneck(
          (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn1): FrozenBatchNorm2d(64, eps=0.0)
          (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn2): FrozenBatchNorm2d(64, eps=0.0)
          (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn3): FrozenBatchNorm2d(256, eps=0.0)
          (relu): ReLU(

In [39]:
def count_masks(dataset):

    def read_annot(file_name, xml_dir):

        bbox = []
        labels = []

        annot_path = os.path.join(xml_dir, file_name[:-3]+'xml')
        tree = ET.parse(annot_path)
        root = tree.getroot()
        for boxes in root.iter('object'):
            ymin = int(boxes.find("bndbox/ymin").text)
            xmin = int(boxes.find("bndbox/xmin").text)
            ymax = int(boxes.find("bndbox/ymax").text)
            xmax = int(boxes.find("bndbox/xmax").text)
            label = boxes.find('name').text
            bbox.append([xmin,ymin,xmax,ymax])
            if label == 'with_mask':
                label_idx = 2
            elif label == 'mask_weared_incorrect':
                label_idx = 0
            else:
                label_idx = 1
            labels.append(label_idx)

        return bbox, labels

    def single_img_predict(img, model, device, nm_thrs=0.3, score_thrs=0.8):
        test_img = transforms.ToTensor()(img).to(device)
        model.eval()

        with torch.no_grad():
            predictions = model(test_img.unsqueeze(0))

        # Applying non-maximum suppression
        keep_boxes = torchvision.ops.nms(predictions[0]['boxes'], predictions[0]['scores'], nm_thrs)

        # Filtering out bounding boxes with scores less than threshold
        score_filter = predictions[0]['scores'][keep_boxes] > score_thrs

        # Get the filtered results
        test_labels = predictions[0]['labels'][keep_boxes][score_filter]

        # Count occurrences of each label
        correct_count = (test_labels == 2).sum().item()  # Label 2 represents 'with_mask'
        not_wearing_count = (test_labels == 1).sum().item()  # Label 1 represents 'not wearing'
        incorrect_count = (test_labels == 0).sum().item()  # Label 0 represents 'mask_weared_incorrect'

        return [correct_count, incorrect_count, not_wearing_count]  # Returning in the specified order


    def get_labels_from_dataset(dataset_dir):
        label_counts = []

        # Define paths to image and annotation directories
        image_dir = os.path.join(dataset_dir)
        xml_dir = os.path.join(dataset_dir)

        image_files = sorted(os.listdir(image_dir))
        xml_files = sorted(os.listdir(xml_dir))

        # Ensure consistent ordering of files
        image_files.sort()
        xml_files.sort()

        # Iterate over all files in the annotation directory
        for image_file, xml_file in zip(image_files, xml_files):
            if xml_file.endswith('.xml'):
                # Call read_annot function to get bounding boxes and labels
                _, labels = read_annot(xml_file, xml_dir)

                # Initialize counters for each label for current image
                mask_count = labels.count(2)
                without_mask_count = labels.count(1)
                incorrect_mask_count = labels.count(0)

                # Append label counts and image name for current image to label_counts list
                label_counts.append((image_file, mask_count,incorrect_mask_count, without_mask_count))


        true = np.array([[item[1], item[2], item[3]] for item in label_counts])


        return true

    def get_labels_and_predictions(dataset_dir, model, device, nm_thrs=0.3, score_thrs=0.8):
        label_counts = []
        predictions = []

        # Define paths to image and annotation directories
        image_dir = os.path.join(dataset_dir)
        xml_dir = os.path.join(dataset_dir)

        # Ensure consistent ordering of files
        image_files = sorted(os.listdir(image_dir))
        xml_files = sorted(os.listdir(xml_dir))

        # Iterate over all files in the annotation directory
        for image_file, xml_file in zip(image_files, xml_files):
            if xml_file.endswith('.xml') and image_file.endswith('.jpg') or image_file.endswith('.png'):
                # Call read_annot function to get bounding boxes and labels
                _, labels = read_annot(xml_file, xml_dir)

                # Load image
                img_path = os.path.join(image_dir, image_file)
                img = Image.open(img_path).convert('RGB')

                # Predict labels for the image
                predicted_counts = single_img_predict(img, model, device, nm_thrs, score_thrs)

                # Append file name and predicted counts for current image to label_counts list
                label_counts.append((image_file, *labels))
                predictions.append((image_file, *predicted_counts))  # Unpack the list of counts

        pred = np.array([[item[1], item[2], item[3]] for item in predictions])

        return pred

    def compute_mape(actual, forecast):
        n = actual.shape[0]  # Assuming both arrays have the same number of rows
        absolute_errors = np.abs((actual - forecast) / np.maximum(actual, 1))
        mape = (1 / n) * np.sum(absolute_errors) * 100
        return mape

    predictions = get_labels_and_predictions(dataset, model, device)
    true = get_labels_from_dataset(dataset)
    mape = compute_mape(true, predictions)
    
    return predictions, mape

In [40]:
count_masks('MaskedFace/val/')

(array([[16,  0,  0],
        [ 3,  0,  3],
        [ 1,  0,  0],
        [ 2,  0,  0],
        [ 9,  0,  0],
        [ 9,  0,  0],
        [ 2,  0,  0],
        [13,  0,  0],
        [ 1,  0,  0],
        [ 2,  0,  2],
        [ 5,  0,  1],
        [ 0,  0,  1],
        [ 4,  0,  0],
        [ 2,  0,  0],
        [ 0,  0,  2],
        [ 4,  0,  4],
        [17,  0,  1],
        [ 1,  0,  0],
        [ 3,  0,  3],
        [ 6,  0,  0],
        [ 7,  0,  0],
        [ 1,  0,  0],
        [ 1,  0,  0],
        [ 1,  0,  1],
        [10,  0,  0],
        [ 1,  0,  0],
        [11,  0,  0],
        [ 2,  0, 12],
        [ 1,  0,  0],
        [12,  0,  0],
        [ 0,  0,  1],
        [13,  0,  0],
        [52,  0,  0],
        [ 2,  0,  0],
        [ 1,  0,  0],
        [15,  0,  5],
        [ 2,  0,  0],
        [ 2,  0,  0],
        [20,  0, 16],
        [ 4,  0,  7],
        [ 1,  0,  1],
        [ 1,  0,  0],
        [ 1,  0,  0],
        [ 9,  0,  0],
        [ 0,  0,  1],
        [ 