# Simple interface for helmet on head detection using trained model

## Initialization

#### Import block

In [6]:
# Предварительная установка пакетов (для Colab)
#!pip install torchinfo ultralytics

In [1]:
import numpy as np
import torch
import matplotlib.image as img
import matplotlib.pyplot as plt
import matplotlib
#import cv2
import os
import json
import torchvision.transforms.functional as F

#import typing
#import xml.etree.ElementTree as ET
#import random

from tqdm import tqdm
from PIL import Image
from torchvision.utils import draw_bounding_boxes
from torchvision.io import read_image
from torchvision.ops import box_convert
from torchinfo import summary
from torchvision.io.image import read_image
from torchvision.models.detection import ssdlite320_mobilenet_v3_large, SSDLite320_MobileNet_V3_Large_Weights, fasterrcnn_resnet50_fpn
from torchvision.models.mobilenetv3 import mobilenet_v3_large, MobileNet_V3_Large_Weights
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.utils import draw_bounding_boxes
from torchvision.transforms.functional import to_pil_image, pil_to_tensor
from torchvision import transforms
from torchvision.transforms._presets import ObjectDetection
from torchvision.datasets import ImageFolder
from torch.utils.data import DataLoader
from torch.nn.functional import smooth_l1_loss, cross_entropy
#?
from torchvision.models.detection.ssdlite import SSDLiteClassificationHead
#from ultralytics import YOLO

%matplotlib inline

#### Choose place and set default path to work

In [2]:
# Устанавливаем путь до датасета

# Локально
#ds_path = '/home/vovk/SberUniversity/DS_from_03_10_2022/PM_Group_AN/FinalProj/HelmetDetection/VOC2028/'
# relative path
ds_path = 'VOC2028/'

# Colab
# from google.colab import drive
# drive.mount('/content/drive')

# ds_path = '/content/drive/MyDrive/SberUniversity/PM_DS14AN/Fin/HelmetDetection/VOC2028/'

## Prepairing **SSDLite** model for prediction.

#### Create template for loading model.

In [3]:
model_loaded = ssdlite320_mobilenet_v3_large( weights='DEFAULT',
                                      weights_backbone=MobileNet_V3_Large_Weights.IMAGENET1K_V1,
                                      score_tresh=0.25
                                     )

m_3class = ssdlite320_mobilenet_v3_large( num_classes=3,
                                          weights_backbone=None,
                                          score_tresh=0.25,
                                        )

model_loaded.head = m_3class.head

#### Load model

In [44]:
#best_path = ds_path + 'tmp/models/' + 'SSDLiteMobNetFreezBackbone_3class_best.pt'
#best_path = ds_path + 'tmp/models/' + 'SSDLiteMobNetFreezBackbone_3class_01_best(34ep).pt'
best_path = ds_path + 'tmp/models/' + 'SSDLiteMobNetFreezBackbone_3class_best(49ep).pt'
checkpoint = torch.load(best_path, map_location=torch.device('cpu'))
model_loaded.load_state_dict(checkpoint['model_state_dict'])
epoch = checkpoint['epoch']
loss = checkpoint['loss']

print('Model trained', epoch, 'epoches')
print(f'Boxes loss: {round(loss["bbox_loss"].item(),4)}, class loss: {round(loss["cls_loss"].item(),4)}')

Model trained 49 epoches
Boxes loss: 1.5395, class loss: 1.9102


# Prediction using trained **SSDLite** model.

In [None]:
#for Colab
#from google.colab import files
#img1 = files.upload()

In [35]:
img1 = Image.open(ds_path + 'short_test/'+ "hard_hat_workers966.png")
convert_to_tensor = ObjectDetection() # Универсальное преобразование (используется в исходниках ssdlite)
tensor_img1 = convert_to_tensor(img1)
model_loaded.eval()
model_loaded.score_thresh = 0.5
prediction = model_loaded([tensor_img1])[0]

labels_dict={'1': 'head',
             '2': 'helmet'}

labels = [labels_dict[str(label.item())] + ': ' + \
          str(round(prediction["scores"][idx].item(), 2)) \
          for idx, label in enumerate(prediction["labels"])]

box = draw_bounding_boxes(pil_to_tensor(img1), # for original image case
                          #(tensor_img1*256).to(dtype=torch.uint8), # for normalized image case (not fully identical)
                          boxes=prediction['boxes'],
                          labels=labels,
                          colors='red',
                          width=3)
im = to_pil_image(box.detach())
im.show()

print(prediction)

{'boxes': tensor([[364.4760, 115.8751, 416.0000, 191.6312]], grad_fn=<StackBackward0>), 'scores': tensor([0.7539], grad_fn=<IndexBackward0>), 'labels': tensor([2])}


In [49]:
convert_to_tensor = ObjectDetection() # Универсальное преобразование (используется в исходниках ssdlite)
model_loaded.eval()
model_loaded.score_thresh = 0.4

labels_dict={'1': 'head',
             '2': 'helmet'}
colors_dict={'1': 'red',
             '2': 'green'}

test_path = ds_path + 'short_test/'
img_lst = os.listdir(test_path)

for img_name in img_lst:
    img1 = Image.open(test_path + img_name)
    
    tensor_img1 = convert_to_tensor(img1)
    prediction = model_loaded([tensor_img1])[0]

    

    labels = [labels_dict[str(label.item())] + ': ' + \
            str(round(prediction["scores"][idx].item(), 2)) \
            for idx, label in enumerate(prediction["labels"])]

    colors = [colors_dict[str(label.item())] \
              for idx, label in enumerate(prediction["labels"])]

    box = draw_bounding_boxes(pil_to_tensor(img1), # for original image case
                            #(tensor_img1*256).to(dtype=torch.uint8), # for normalized image case (not fully identical)
                            boxes=prediction['boxes'],
                            labels=labels,
                            colors=colors,
                            width=3)
    im = to_pil_image(box.detach())
    im.show()

    print(img_name, prediction)

002.jpg {'boxes': tensor([[361.3578,  68.3813, 450.0000, 184.5416],
        [243.9342,  44.5466, 341.5432, 167.8883]], grad_fn=<StackBackward0>), 'scores': tensor([0.9392, 0.8468], grad_fn=<IndexBackward0>), 'labels': tensor([2, 2])}
000009.jpg {'boxes': tensor([[243.1186,  56.5447, 358.3921, 199.8705],
        [ 94.2763, 315.8340, 231.3785, 433.4378],
        [367.1082,  30.9294, 465.0062, 146.2465],
        [ 60.1443, 121.0476, 142.0041, 219.4489],
        [381.2317,  36.9458, 470.8235, 149.4275]], grad_fn=<StackBackward0>), 'scores': tensor([0.9970, 0.7657, 0.7337, 0.6585, 0.5363], grad_fn=<IndexBackward0>), 'labels': tensor([2, 2, 2, 1, 1])}
001_crop.jpg {'boxes': tensor([[176.0350, 405.4665, 333.5681, 546.3819]], grad_fn=<StackBackward0>), 'scores': tensor([0.5629], grad_fn=<IndexBackward0>), 'labels': tensor([2])}




am3_9_frame109.jpg {'boxes': tensor([], size=(0, 4), grad_fn=<StackBackward0>), 'scores': tensor([], grad_fn=<IndexBackward0>), 'labels': tensor([], dtype=torch.int64)}
hard_hat_workers846.png {'boxes': tensor([[177.2856, 142.2804, 249.9699, 211.1718]], grad_fn=<StackBackward0>), 'scores': tensor([0.4275], grad_fn=<IndexBackward0>), 'labels': tensor([1])}




hard_hat_workers15.png {'boxes': tensor([], size=(0, 4), grad_fn=<StackBackward0>), 'scores': tensor([], grad_fn=<IndexBackward0>), 'labels': tensor([], dtype=torch.int64)}
hard_hat_workers966.png {'boxes': tensor([[360.8781, 115.7856, 416.0000, 199.3054]], grad_fn=<StackBackward0>), 'scores': tensor([0.4306], grad_fn=<IndexBackward0>), 'labels': tensor([2])}




am3_9_frame111.jpg {'boxes': tensor([], size=(0, 4), grad_fn=<StackBackward0>), 'scores': tensor([], grad_fn=<IndexBackward0>), 'labels': tensor([], dtype=torch.int64)}




001.jpg {'boxes': tensor([], size=(0, 4), grad_fn=<StackBackward0>), 'scores': tensor([], grad_fn=<IndexBackward0>), 'labels': tensor([], dtype=torch.int64)}
000012.jpg {'boxes': tensor([[0.0000e+00, 1.6722e+02, 2.8219e+02, 4.5515e+02],
        [3.3546e+02, 1.2223e+02, 5.0028e+02, 3.4359e+02],
        [5.2386e+02, 5.7271e-01, 7.9222e+02, 3.1288e+02]],
       grad_fn=<StackBackward0>), 'scores': tensor([0.9823, 0.9237, 0.9002], grad_fn=<IndexBackward0>), 'labels': tensor([2, 2, 2])}




hard_hat_workers564.png {'boxes': tensor([], size=(0, 4), grad_fn=<StackBackward0>), 'scores': tensor([], grad_fn=<IndexBackward0>), 'labels': tensor([], dtype=torch.int64)}




am3_9_frame111_crop.jpg {'boxes': tensor([], size=(0, 4), grad_fn=<StackBackward0>), 'scores': tensor([], grad_fn=<IndexBackward0>), 'labels': tensor([], dtype=torch.int64)}
am3_9_frame109_crop.jpg {'boxes': tensor([[181.8925, 198.8029, 240.8736, 290.4316]], grad_fn=<StackBackward0>), 'scores': tensor([0.4727], grad_fn=<IndexBackward0>), 'labels': tensor([2])}
