# Chapter 2 - Trying out torchvision

### Importing PIL library to load the image 

In [1]:
from PIL import Image
img = Image.open("C:/Users/avkothap/Documents/personal/pyTorchBook/IMG_8841.jpeg")
print(img)

<PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=3024x4032 at 0x22F1697FD60>


### Importing torchvision and its models 
- resnet 18 is one of the models that has 18 layers (layers are models in pyTorch lingo)
- we are using a pretrained model by setting (pretrained=True)

In [2]:
from torchvision import models

resnet = models.resnet101(pretrained=True)



Downloading: "https://download.pytorch.org/models/resnet101-63fe2227.pth" to C:\Users\avkothap/.cache\torch\hub\checkpoints\resnet101-63fe2227.pth


  0%|          | 0.00/171M [00:00<?, ?B/s]

### We need to preprocess the image to something similar to what the model was trained with

In [3]:
from torchvision import transforms
preprocess = transforms.Compose([
transforms.Resize(256),
transforms.CenterCrop(224),
transforms.ToTensor(),
transforms.Normalize(
mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225]
)])

img_t = preprocess(img)

In [4]:
import torch

batch_t = torch.unsqueeze(img_t, 0)

- The process of running a trained model on new data is called inference in deep learning circles. In order to do inference, we need to put the network in eval mode
- If this is not done, some models will not produce proper results

In [5]:
resnet.eval()

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 

In [6]:
out = resnet(batch_t)

In [8]:
with open('imagenet_classes.txt') as f:
    labels = [line.strip() for line in f.readlines()]


In [9]:
_, index = torch.max(out, 1)

In [10]:
percentage = torch.nn.functional.softmax(out, dim=1)[0] * 100
labels[index[0]], percentage[index[0]].item()

('457, bow_tie', 88.15957641601562)

In [11]:
_, indices = torch.sort(out, descending=True)
[(labels[idx], percentage[idx].item()) for idx in indices[0][:5]]


[('457, bow_tie', 88.15957641601562),
 ('834, suit', 5.002092361450195),
 ('652, military_uniform', 1.345510721206665),
 ('906, Windsor_tie', 1.1857060194015503),
 ('841, sweatshirt', 0.6531615257263184)]