### Toy-пример для прогона изображения через классификатор

In [2]:
from torchvision import models
from torchvision import transforms
from PIL import Image
import torch

In [3]:
resnet = models.resnet101(pretrained=True)

In [4]:
# basic preprocessing functions
preprocess = transforms.Compose([
    transforms.Resize(256),       # scale image to 256x256
    transforms.CenterCrop(224),   # crop the image to 224x224 around the center
    transforms.ToTensor(),        # transform image to 3D array with color, height and width
    transforms.Normalize(         # normalize its RGB components so they have defined means and stds
    mean=[0.485, 0.456, 0.406],
    std=[0.229, 0.224, 0.225]
)])

In [5]:
img = Image.open("preprocessing/bobby.jpg").convert('RGB')
img.show()

In [6]:
img_t = preprocess(img)

In [7]:
batch_t = torch.unsqueeze(img_t, 0)

In [8]:
resnet.eval()

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 

In [9]:
out = resnet(batch_t) # здесь 1000 классов (ImageNet), и у каждого класса
                      # своя вероятность того, что на картинке именно он

In [10]:
out

tensor([[-3.5598e+00, -1.5349e+00, -2.3650e+00, -3.1609e+00, -3.5623e+00,
         -1.4726e+00, -1.8638e+00, -2.7809e+00, -1.4540e+00, -2.5612e+00,
         -1.3668e+00, -1.2080e+00, -2.2317e+00, -2.9458e+00, -2.4995e+00,
         -2.7210e+00, -3.5842e+00, -7.4206e-01, -7.0536e-01, -6.0140e-01,
         -2.8701e+00, -3.9526e+00, -2.1620e+00, -1.1822e+00, -7.2278e-01,
         -9.5239e-01, -3.2510e+00, -2.5772e+00, -2.1381e+00, -3.1418e+00,
         -3.5953e+00, -1.9633e+00, -1.9265e+00, -2.2787e+00, -1.7937e+00,
         -3.2149e+00, -1.2809e+00, -1.4755e+00, -1.4550e+00, -1.4939e+00,
         -1.0700e+00, -1.7304e+00,  1.1573e+00, -1.8638e-01, -2.5022e+00,
         -1.5027e+00,  4.8939e-01, -1.2614e+00, -3.0993e+00, -2.9462e+00,
         -2.4259e+00, -1.8352e+00, -2.1268e+00, -2.1180e+00, -1.9262e+00,
         -1.6787e+00, -4.8157e-01, -2.0333e+00, -3.4429e+00, -7.3943e-01,
         -4.2138e-01, -1.1788e+00, -8.5109e-01, -1.4479e+00, -1.9015e+00,
         -2.0515e+00, -2.0243e+00, -6.

To see the list of predicted labels, we will load a text file listing the labels in the
same order they were presented to the network during training, and then we will pick
out the label at the index that produced the highest score from the network

In [20]:
with open('preprocessing/imagenet_classes.txt') as f:
    labels = [line.strip() for line in f.readlines()]

At this point, we need to determine the index corresponding to the maximum score
in the out tensor we obtained previously. We can do that using the max function in
PyTorch, which outputs the maximum value in a tensor as well as the indices where
that maximum value occurred:

In [21]:
maximum_value, index = torch.max(out, 1)

In [22]:
index   # the highest score from the network

tensor([207])

Но сейчас index -- тензор, давайте превратим его в обычное число

In [23]:
index[0]

tensor(207)

In [27]:
percentage = torch.nn.functional.softmax(out, dim=1)[0] * 100   # to normalize our inputs into [0;1] range

print(labels[index[0]], percentage[index[0]].item())


207: 'golden retriever', 97.1847152709961


Since the model produced scores, we can also find out what the second best, third
best, and so on were.

In [28]:
_, indices = torch.sort(out, descending=True)

print([(labels[idx], percentage[idx].item()) for idx in indices[0][:5]])

[("207: 'golden retriever',", 97.1847152709961), ("208: 'Labrador retriever',", 2.2225759029388428), ("219: 'cocker spaniel, English cocker spaniel, cocker',", 0.3677794337272644), ("168: 'redbone',", 0.04937955364584923), ("852: 'tennis ball',", 0.046210650354623795)]


### Тензоры в PyTorch

![title](preprocessing/images/tensors.png)

- torch.float32 or torch.float: 32-bit floating-point
- torch.float64 or torch.double: 64-bit, double-precision floating-point
- torch.float16 or torch.half: 16-bit, half-precision floating-point
- torch.int8: signed 8-bit integers
- torch.uint8: unsigned 8-bit integers
- torch.int16 or torch.short: signed 16-bit integers
- torch.int32 or torch.int: signed 32-bit integers
- torch.int64 or torch.long: signed 64-bit integers
- torch.bool: Boolean