In [None]:

from google.colab import drive
drive.mount('/content/drive')

Загрузка предобученной модели на COCO датасете.

In [None]:
import torch
import os
import random
precision = 'fp32'
ssd_model = torch.hub.load('NVIDIA/DeepLearningExamples:torchhub', 'nvidia_ssd', model_math=precision)

In [None]:
%%bash
pip install numpy scipy scikit-image matplotlib

Загрузка необходимых компонентов для анализа и визуализации результатов.

In [None]:
utils = torch.hub.load('NVIDIA/DeepLearningExamples:torchhub', 'nvidia_ssd_processing_utils')

Загрузка параметров предобученной модели.

In [None]:
ssd_model.to('cuda')
ssd_model.eval()

Загрузка тестовых данных.

In [None]:
IMAGE_DIR = os.path.abspath('/content/drive/MyDrive/test/')
file_names = next(os.walk(IMAGE_DIR))[2]
img1 = os.path.join(IMAGE_DIR, random.choice(file_names))

In [None]:
uris = [
    img1
]
uris

Предобработка изображений.

In [None]:
inputs = [utils.prepare_input(uri) for uri in uris]

Запускаем SSD сеть.

In [None]:
%%time
tensor = utils.prepare_tensor(inputs, precision == 'fp16')
with torch.no_grad():
    detections_batch = ssd_model(tensor)

Сеть выдаёт координаты рамок и вероятность того или иного объекта в этих рамках. Уберём лишние и оставим только те, где вероятность больше 40%

In [None]:
results_per_input = utils.decode_results(detections_batch)
best_results_per_input = [utils.pick_best(results, 0.40) for results in results_per_input]

Загрузка классов объектов датасета COCO.

In [None]:
classes_to_labels = utils.get_coco_object_dictionary()

Визуализация результатов.

In [None]:
from matplotlib import pyplot as plt
import matplotlib.patches as patches

for image_idx in range(len(best_results_per_input)):
    fig, ax = plt.subplots(1)
    image = inputs[image_idx] / 2 + 0.5
    ax.imshow(image)
    bboxes, classes, confidences = best_results_per_input[image_idx]
    for idx in range(len(bboxes)):
        left, bot, right, top = bboxes[idx]
        x, y, w, h = [val * 300 for val in [left, bot, right - left, top - bot]]
        rect = patches.Rectangle((x, y), w, h, linewidth=1, edgecolor='r', facecolor='none')
        ax.add_patch(rect)
        ax.text(x, y, "{} {:.0f}%".format(classes_to_labels[classes[idx] - 1], confidences[idx]*100), bbox=dict(facecolor='white', alpha=0.5))
plt.show()

In [None]:
directory = '/content/drive/MyDrive/test_1/'
files = os.listdir(directory)
uris = [directory+i for i in files]


In [None]:
%%time
inputs = [utils.prepare_input(uri) for uri in uris]
tensor = utils.prepare_tensor(inputs, precision == 'fp16')
with torch.no_grad():
    detections_batch = ssd_model(tensor)

In [None]:
results_per_input = utils.decode_results(detections_batch)
best_results_per_input = [utils.pick_best(results, 0.40) for results in results_per_input]
for image_idx in range(len(best_results_per_input)):
    fig, ax = plt.subplots(1)
    image = inputs[image_idx] / 2 + 0.5
    ax.imshow(image)
    bboxes, classes, confidences = best_results_per_input[image_idx]
    for idx in range(len(bboxes)):
        left, bot, right, top = bboxes[idx]
        x, y, w, h = [val * 300 for val in [left, bot, right - left, top - bot]]
        rect = patches.Rectangle((x, y), w, h, linewidth=1, edgecolor='r', facecolor='none')
        ax.add_patch(rect)
        ax.text(x, y, "{} {:.0f}%".format(classes_to_labels[classes[idx] - 1], confidences[idx]*100), bbox=dict(facecolor='white', alpha=0.5))
plt.show()

Тест на самостоятельно загруженной фотографии

In [None]:
from google.colab import files

uploaded = files.upload()

for fn in uploaded.keys():
  print('User uploaded file "{name}" with length {length} bytes'.format(
      name=fn, length=len(uploaded[fn])))
  up_f = fn

In [None]:
uris = [
    up_f
]

In [None]:
%%time
inputs = [utils.prepare_input(uri) for uri in uris]
tensor = utils.prepare_tensor(inputs, precision == 'fp16')
with torch.no_grad():
    detections_batch = ssd_model(tensor)

In [None]:
results_per_input = utils.decode_results(detections_batch)
best_results_per_input = [utils.pick_best(results, 0.40) for results in results_per_input]
for image_idx in range(len(best_results_per_input)):
    fig, ax = plt.subplots(1)
    image = inputs[image_idx] / 2 + 0.5
    ax.imshow(image)
    bboxes, classes, confidences = best_results_per_input[image_idx]
    for idx in range(len(bboxes)):
        left, bot, right, top = bboxes[idx]
        x, y, w, h = [val * 300 for val in [left, bot, right - left, top - bot]]
        rect = patches.Rectangle((x, y), w, h, linewidth=1, edgecolor='r', facecolor='none')
        ax.add_patch(rect)
        ax.text(x, y, "{} {:.0f}%".format(classes_to_labels[classes[idx] - 1], confidences[idx]*100), bbox=dict(facecolor='white', alpha=0.5))
plt.show()