# Python и машинное обучение: нейронные сети и компьютерное зрение

## Модуль 5. Распознавание объектов на изображениях



### YOLOv5

По состоянию на декабрь 2023 года - "базовая" и самая простая модель для поиска объектов на фотографиях. Загружаем на ```torch.hub```.


In [None]:
!pip install -U ultralytics

In [None]:
import torch
import cv2
from PIL import Image

from torchinfo import summary

import matplotlib.pyplot as plt

device = "cuda" if torch.cuda.is_available() else \
    "mps" if torch.backends.mps.is_built() else "cpu"
device

%matplotlib inline

In [None]:
model = torch.hub.load('ultralytics/yolov5', 'yolov5n', pretrained=True)

print(model)

In [None]:
!wget https://ultralytics.com/images/zidane.jpg -q -O input.jpg

In [None]:
img = cv2.imread('input.jpg')
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
display(Image.fromarray(img))

In [None]:
results = model('input.jpg')

df = results.pandas().xyxy[0]
df

In [None]:
results.print()
results.xyxy[0]  # img1 predictions (tensor)

In [None]:
# tl = round(0.002 * (img.shape[0] + img.shape[1]) / 2) + 1
tl=2
tf = max(tl - 1, 1)


for i, row in df.iterrows():
    xA, yA, xB, yB = map(int, row[:4])
    cv2.rectangle(img, (xA, yA), (xB, yB), (0, 255, 0), tl)
    cv2.putText(img, row['name'], (xA, yA - 2), 0, tl / 3, [0, 255, 0], thickness=tf, lineType=cv2.LINE_AA) 
    
display(Image.fromarray(img))

## Detectron2

In [None]:
!python -m pip install 'git+https://github.com/facebookresearch/detectron2.git'

In [None]:
import detectron2
from detectron2.utils.logger import setup_logger
setup_logger()

# import some common libraries
import numpy as np
import os, json, cv2, random

# import some common detectron2 utilities
from detectron2 import model_zoo
from detectron2.engine import DefaultPredictor
from detectron2.config import get_cfg
from detectron2.utils.visualizer import Visualizer
from detectron2.data import MetadataCatalog, DatasetCatalog

from matplotlib import pyplot as plt

plt.rcParams['figure.figsize'] = (8.0, 8.0)

%matplotlib inline

In [None]:
!wget http://images.cocodataset.org/val2017/000000439715.jpg -q -O input.jpg
im = cv2.imread("./input.jpg")
im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
plt.imshow(im)
plt.show()

In [None]:
cfg = get_cfg()
# add project-specific config (e.g., TensorMask) here if you're not running a model in detectron2's core library
cfg.merge_from_file(model_zoo.get_config_file("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml"))
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.5  # set threshold for this model
# Find a model from detectron2's model zoo. You can use the https://dl.fbaipublicfiles... url as well
cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml")
predictor = DefaultPredictor(cfg)
outputs = predictor(im)

In [None]:
mask = outputs['instances'].pred_masks.cpu().numpy().astype('uint8')[0]
contour, _ = cv2.findContours(mask, cv2.RETR_LIST, cv2.CHAIN_APPROX_NONE)

image_with_overlaid_predictions = im.copy()
cv2.drawContours(image_with_overlaid_predictions, [contour[0]], -1, (0,255,0), 1)

plt.figure(figsize=(14,14))
plt.imshow(image_with_overlaid_predictions)
plt.show()

## Дообучение модели YOLO



### **Форматы аннотаций**
Аннотации описывают координаты объектов на изображении и их классы.

#### a) **COCO (Common Objects in Context)**  
- **Формат**: JSON  
- Описание: Аннотации включают координаты объектов в формате прямоугольников (bounding boxes), а также информацию о сегментации (масках).
- Пример:
```json
{
  "images": [{"id": 1, "file_name": "image1.jpg"}],
  "annotations": [
    {
      "id": 1,
      "image_id": 1,
      "bbox": [x, y, width, height],
      "category_id": 1,
      "segmentation": [[...]]
    }
  ],
  "categories": [{"id": 1, "name": "cat"}]
}
```

---

#### b) **Pascal VOC**  
- **Формат**: XML  
- Описание: Каждый объект на изображении представлен прямоугольником (bounding box) с координатами и классом.  
- Пример:
```xml
<annotation>
  <folder>images</folder>
  <filename>image1.jpg</filename>
  <object>
    <name>cat</name>
    <bndbox>
      <xmin>50</xmin>
      <ymin>50</ymin>
      <xmax>150</xmax>
      <ymax>150</ymax>
    </bndbox>
  </object>
</annotation>
```

---

#### c) **YOLO (You Only Look Once)**  
- **Формат**: TXT  
- Описание: Каждый объект записывается в виде строки: `<class> <x_center> <y_center> <width> <height>`. Координаты нормализуются в диапазон от 0 до 1.  
- Пример:  
```txt
0 0.5 0.5 0.25 0.25
```

https://colab.research.google.com/drive/1Plz91PHWwf04bYt21mnWp7qBcDIvJ6J6

https://medium.com/mlearning-ai/training-yolov5-custom-dataset-with-ease-e4f6272148ad


In [None]:
!git clone https://github.com/ultralytics/yolov5  # clone
%cd yolov5
%pip install -qr requirements.txt  # install

import torch
from yolov5 import utils

In [None]:
%cd ..

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
cp /content/drive/MyDrive/mask_archive.zip ./

In [None]:
import shutil
import os, sys

zip_file = "mask_archive.zip"

if os.path.isfile(zip_file):
    shutil.unpack_archive(zip_file, "data")
else:
    print(zip_file + " not found")

In [None]:
import os, shutil, random

# preparing the folder structure

full_data_path = 'data/obj/'
extension_allowed = '.jpg'
split_percentage = 90

images_path = 'data/images/'
if os.path.exists(images_path):
    shutil.rmtree(images_path)
os.mkdir(images_path)

labels_path = 'data/labels/'
if os.path.exists(labels_path):
    shutil.rmtree(labels_path)
os.mkdir(labels_path)

training_images_path = images_path + 'training/'
validation_images_path = images_path + 'validation/'
training_labels_path = labels_path + 'training/'
validation_labels_path = labels_path +'validation/'

os.mkdir(training_images_path)
os.mkdir(validation_images_path)
os.mkdir(training_labels_path)
os.mkdir(validation_labels_path)

files = []

ext_len = len(extension_allowed)

for r, d, f in os.walk(full_data_path):
    for file in f:
        if file.endswith(extension_allowed):
            strip = file[0:len(file) - ext_len]
            files.append(strip)

random.shuffle(files)

size = len(files)

split = int(split_percentage * size / 100)

print("copying training data")
for i in range(split):
    strip = files[i]

    image_file = strip + extension_allowed
    src_image = full_data_path + image_file
    shutil.copy(src_image, training_images_path)

    annotation_file = strip + '.txt'
    src_label = full_data_path + annotation_file
    shutil.copy(src_label, training_labels_path)

print("copying validation data")
for i in range(split, size):
    strip = files[i]

    image_file = strip + extension_allowed
    src_image = full_data_path + image_file
    shutil.copy(src_image, validation_images_path)

    annotation_file = strip + '.txt'
    src_label = full_data_path + annotation_file
    shutil.copy(src_label, validation_labels_path)

print("finished")

In [None]:
f = open("dataset.yaml", "a")

f.write("train: ../data/images/training/\n")
f.write("val: ../data/images/validation/\n")
f.write("nc: 2\n")
f.write("names: ['with mask', 'without mask']\n")
f.close()

In [None]:
%cd yolov5
!python train.py --img 640 --batch 16 --epochs 5 --data ../dataset.yaml --weights yolov5s.pt

In [None]:
!python detect.py --weights runs/train/exp/weights/best.pt --img 640 --conf 0.4 --source ../test.jpg

In [None]:
from IPython.display import display
import PIL

image_path = "runs/detect/exp2/test.jpg"
# display(PIL.Image.open(image_path))

import matplotlib.pyplot as plt

im = plt.imread(image_path)
plt.imshow(im)
%matplotlib inline