<a href="https://colab.research.google.com/github/ayushs0911/Object-Detection/blob/main/Mask_Detection_%7C_Detectron2_.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import random
import os
import json
import xml.etree.ElementTree as ET

In [None]:
!pip install -q kaggle
!mkdir ~/.kaggle
!cp kaggle.json ~/.kaggle/
!chmod 600 /root/.kaggle/kaggle.json
!kaggle datasets download -d andrewmvd/face-mask-detection

In [None]:
!unzip /content/face-mask-detection.zip

In [None]:
!python -m pip install pyyaml==5.1
import sys, os, distutils.core
# Note: This is a faster way to install detectron2 in Colab, but it does not include all functionalities.
# See https://detectron2.readthedocs.io/tutorials/install.html for full installation instructions
!git clone 'https://github.com/facebookresearch/detectron2'
dist = distutils.core.run_setup("./detectron2/setup.py")
!python -m pip install {' '.join([f"'{x}'" for x in dist.install_requires])}
sys.path.insert(0, os.path.abspath('./detectron2'))


In [None]:
import torch, detectron2
!nvcc --version
TORCH_VERSION = ".".join(torch.__version__.split(".")[:2])
CUDA_VERSION = torch.__version__.split("+")[-1]
print("torch: ", TORCH_VERSION, "; cuda: ", CUDA_VERSION)
print("detectron2:", detectron2.__version__)

In [None]:
# Setup detectron2 logger
import detectron2
from detectron2.utils.logger import setup_logger
setup_logger()

# import some common libraries
import numpy as np
import os, json, cv2, random
from google.colab.patches import cv2_imshow

# import some common detectron2 utilities
from detectron2 import model_zoo
from detectron2.engine import DefaultPredictor
from detectron2.config import get_cfg
from detectron2.utils.visualizer import Visualizer
from detectron2.data import MetadataCatalog, DatasetCatalog

# Data Visualisation

In [None]:
classes = ["with_mask", "without_mask", "mask_weared_incorrect"]
N_Classes = 3
H,W =224,224
SPLIT_SIZE=H//32
N_EPOCHS=135
BATCH_SIZE=32

In [None]:
def preprocess_xml(filename):
    tree = ET.parse(filename)
    root = tree.getroot()
    size_tree = root.find('size')
    height = float(size_tree.find("height").text)
    width = float(size_tree.find("width").text)
    depth = float(size_tree.find("depth").text)
    
    bounding_boxes = []
    
    for object_tree in root.findall('object'):
        for bounding_box in object_tree.iter('bndbox'):
            xmin = (float(bounding_box.find('xmin').text))
            ymin = (float(bounding_box.find('ymin').text))
            xmax = (float(bounding_box.find('xmax').text))
            ymax = (float(bounding_box.find('ymax').text))
            
            break
        class_name = object_tree.find('name').text
        class_dict = {classes[i]:i for i in range(len(classes))}
        bounding_box = [
            (xmin + xmax)/(2*width), #x-center
            (ymin + ymax)/(2*height), #ycenter
            (xmax - xmin)/width, #box-width
            (ymax - ymin)/height, #box height
            class_dict[class_name]
        ]
        bounding_boxes.append(bounding_box)
    return (bounding_boxes)
        
    

In [None]:
train_images = ("/content/images/")
train_maps = ("/content/annotations/")

In [None]:
im_paths = []
xml_paths = []

for i in os.listdir(train_maps):
  im_paths.append(train_images + i[:-3]+'png')
  xml_paths.append(train_maps + i)

In [None]:
import matplotlib.pyplot as plt
import matplotlib.patches as patches

def plot_bbox(image, bounding_box, class_names):
    """
    Plots the given bounding box on the image.

    Parameters:
    image (numpy.ndarray): The image to plot the bounding box on.
    bounding_box (tuple): A tuple representing the bounding box in the format (x_center, y_center, box_width, box_height, class_index).
    class_names (list): A list of strings representing the names of the classes.

    Returns:
    None
    """
    # Get the image dimensions
    height, width, _ = image.shape
    
    # Create a figure and axes object
    fig, ax = plt.subplots(1)

    # Calculate the coordinates of the bounding box
    for i in range(0, len(bounding_box)):
        x_center, y_center, box_width, box_height, class_index = tuple(bounding_box[i])
        xmin = int((x_center - box_width / 2) * width)
        xmax = int((x_center + box_width / 2) * width)
        ymin = int((y_center - box_height / 2) * height)
        ymax = int((y_center + box_height / 2) * height)

        # Get the class name from the class index
        class_name = class_names[class_index]

        # Create a rectangle patch for the bounding box
        rect = patches.Rectangle((xmin, ymin), xmax - xmin, ymax - ymin, linewidth=2, edgecolor='r', facecolor='none')
        ax.add_patch(rect)
        

        # Display the image on the axes
        plt.imshow(image)

        # Add the rectangle patch to the axes
        

        # Add the class name to the axes
        if class_index == 0:
            ax.text(xmin, ymin, class_name, fontsize=10, color='green')
        elif class_index == 1:
            ax.text(xmin, ymin, class_name, fontsize=10, color='red')
        else:
            ax.text(xmin, ymin, class_name, fontsize=10, color='yellow')
            

    # Show the plot
    plt.show()


In [None]:
for i in range(10,21):
    plot_bbox(image = plt.imread(im_paths[i]),
            bounding_box = preprocess_xml(xml_paths[i]),
            class_names = classes)

# Training on Custom Dataset 

In [None]:
import os
import json
import xml.etree.ElementTree as ET
from detectron2.structures import BoxMode


def convert_xml_to_coco(xml_dir):
    dataset_dicts = []
    image_id = 0
    annotation_id = 0


    # Set categories (modify as per your dataset)
    categories = [
        {"id": 0, "name": "with_mask", "supercategory": "object"},
        {"id": 1, "name": "without_mask", "supercategory": "object"},
        {"id": 2, "name": "mask_weared_incorrect", "supercategory": "object"}

        # Add more categories as needed
    ]
    
    # Iterate over XML files
    for filename in os.listdir(xml_dir):
        if not filename.endswith(".xml"):
            continue

        xml_path = os.path.join(xml_dir, filename)
        tree = ET.parse(xml_path)
        root = tree.getroot()
        image_name = root.find("filename").text.replace(".xml", ".png")

        # Get image information
        width = int(root.find("size/width").text)
        height = int(root.find("size/height").text)
        
        record = {
            "file_name": "/content/images/" + image_name,
            "image_id": image_id,
            "height": height,
            "width": width,
            "annotations": []
        }
        
        # Process object annotations
        for obj in root.findall("object"):
            category_name = obj.find("name").text
            category_id = [c["id"] for c in categories if c["name"] == category_name][0]

            bbox = obj.find("bndbox")
            xmin = float(bbox.find("xmin").text)
            ymin = float(bbox.find("ymin").text)
            xmax = float(bbox.find("xmax").text)
            ymax = float(bbox.find("ymax").text)

            width1 = xmax - xmin
            height1 = ymax - ymin

            annotation = {
                "bbox": [xmin, ymin, xmax, ymax],
                "bbox_mode": BoxMode.XYXY_ABS,
                "segmentation": [],
                "category_id": category_id,
                "category_name" : category_name,
                "image_id": image_id,
                "iscrowd": 0,  # Modify as needed
                "area": width1 * height1,
            }
            record["annotations"].append(annotation)

            annotation_id += 1

        dataset_dicts.append(record)
        image_id += 1

    return dataset_dicts


In [None]:
# from detectron2.data import DatasetCatalog, MetadataCatalog
# DatasetCatalog.register("MASK_detec", lambda: convert_xml_to_coco(xml_dir = "/content/annotations/",
#                                                                       images_dir = "/content/images/"))
# MetadataCatalog.get("MASK_detec").set(things_cluster = ["with_mask", "without_mask", "mask_weared_incorrect" ])

In [None]:
for d in ["annotations"]:
    DatasetCatalog.register("5" + d, lambda d=d: convert_xml_to_coco("/content/" + d + "/"))
    MetadataCatalog.get("5" + d).set(thing_classes=["with_mask", "without_mask", "mask_weared_incorrect"])
mask_metadata = MetadataCatalog.get("5annotations")

In [None]:
mask_metadata

To verify the dataset is in correct form, let's visualize the annotations of randomly selected samlples in training set. 

In [None]:
dataset_dicts = convert_xml_to_coco(xml_dir = "/content/annotations/")
random.sample(dataset_dicts,1)

In [None]:
for d in random.sample(dataset_dicts, 3):
  img = cv2.imread(d["file_name"])
  visualizer = Visualizer(img[:,:,::-1], metadata=mask_metadata, scale= 1)
  out = visualizer.draw_dataset_dict(d)
  cv2_imshow(out.get_image()[:,:,::-1])

#Train 

Now, let's fine tune a COCO-pretrained R50-FPN Mask R-CNN model on the mask datacet. 

In [None]:
from detectron2.engine import DefaultTrainer

cfg = get_cfg()
cfg.merge_from_file(model_zoo.get_config_file("COCO-Detection/faster_rcnn_R_50_FPN_3x.yaml"))
cfg.DATASETS.TRAIN = ("4annotations",)
cfg.DATASETS.TEST = ()
cfg.DATALOADER.NUM_WORKERS = 2
cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url("COCO-Detection/faster_rcnn_R_50_FPN_3x.yaml")  # Let training initialize from model zoo
cfg.SOLVER.IMS_PER_BATCH = 8  # This is the real "batch size" commonly known to deep learning people
cfg.SOLVER.BASE_LR = 0.00025  # pick a good LR
cfg.SOLVER.MAX_ITER = 2000   # 300 iterations seems good enough for this toy dataset; you will need to train longer for a practical dataset
cfg.SOLVER.STEPS = []        # do not decay learning rate
cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 128   # The "RoIHead batch size". 128 is faster, and good enough for this toy dataset (default: 512)
cfg.MODEL.ROI_HEADS.NUM_CLASSES = 3  # only has one class (ballon). (see https://detectron2.readthedocs.io/tutorials/datasets.html#update-the-config-for-new-datasets)
# NOTE: this config means the number of classes, but a few popular unofficial tutorials incorrect uses num_classes+1 here.

os.makedirs(cfg.OUTPUT_DIR, exist_ok=True)
trainer = DefaultTrainer(cfg) 
trainer.resume_or_load(resume=False)
trainer.train()

In [None]:
 # # Look at training curves in tensorboard:
# %load_ext tensorboard
# %tensorboard --logdir output

# Inference & evaluation using the trained model




In [None]:
# Inference should use the config with parameters that are used in training
# cfg now already contains everything we've set previously. We changed it a little bit for inference:
cfg.MODEL.WEIGHTS = "/content/output/model_final.pth"  # path to the model we just trained
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.65 # set a custom testing threshold
predictor = DefaultPredictor(cfg)

In [None]:
predictor(cv2.imread("/content/images/maksssksksss100.png"))

In [None]:
# from detectron2.utils.visualizer import ColorMode
# dataset_dicts = get_balloon_dicts("balloon/val")
# for d in random.sample(dataset_dicts, 3):    
#     im = cv2.imread(d["file_name"])
#     outputs = predictor(im)  # format is documented at https://detectron2.readthedocs.io/tutorials/models.html#model-output-format
#     v = Visualizer(im[:, :, ::-1],
#                    metadata=balloon_metadata, 
#                    scale=0.5, 
#                    instance_mode=ColorMode.IMAGE_BW   # remove the colors of unsegmented pixels. This option is only available for segmentation models
#     )
#     out = v.draw_instance_predictions(outputs["instances"].to("cpu"))
#     cv2_imshow(out.get_image()[:, :, ::-1])

In [None]:
import locale
locale.getpreferredencoding = lambda: "UTF-8"

In [None]:
! kaggle datasets download -d itsezsid/facemask-darknet

In [None]:
!unzip /content/facemask-darknet.zip

In [None]:
test_files = []
for filename in os.listdir("/content/test/"):
  if filename.endswith(".jpg"):
    test_files.append(filename)

In [None]:
import random
from detectron2.utils.visualizer import Visualizer

# Create subplots for displaying multiple images
fig, axs = plt.subplots(3, 3, figsize=(30, 30))

# Iterate over a sample of test images
for i, image_path in enumerate(random.sample(test_files, 9)):
    # Read the image
    image = plt.imread("/content/test/" + image_path)
    
    # Perform object detection on the image
    outputs = predictor(image)
    
    # Create a Visualizer
    visualizer = Visualizer(image[:, :, ::-1], metadata=MetadataCatalog.get(cfg.DATASETS.TRAIN[0]))
    
    # Draw the predicted bounding boxes on the image
    vis_output = visualizer.draw_instance_predictions(outputs["instances"].to("cpu"))
    
    # Get the image with drawn predictions
    output_image = vis_output.get_image()[:, :, ::-1]
    
    # Display the image
    axs[i // 3, i % 3].imshow(output_image)
    axs[i // 3, i % 3].axis("off")
