# Exercise 3: Play with GoogLeNet Implementation

Next, let's load a pre-trained GoogLeNet model and run it on an image, for example our dog image from Exercise 1.

In [1]:
import torch
import torchvision
import os
from PIL import Image
import ast

# Load pretrained AlexNet model

os.environ['http_proxy'] = 'http://192.41.170.23:3128'
os.environ['https_proxy'] = 'http://192.41.170.23:3128'

model = torchvision.models.googlenet(pretrained=True)
model.eval()

# Load image and convert to appropriate tensor

# The AlexNet network expects an input as a 4D tensor.
# Dim 0: batch index
# Dim 1: channel index
# Dim 2: row index
# Dim 3: col index

# Need to read image as PIL object, convert to tensor, normalize the image intensities according ImageNet's means and stdevs

im = Image.open("dog.jpg")

transforms = torchvision.transforms.Compose([
    torchvision.transforms.ToTensor(),
    torchvision.transforms.Resize(256),
    torchvision.transforms.CenterCrop(224),
    torchvision.transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),
])

img_tensor = transforms(im).unsqueeze(0)

# Run input through the model and get the top 5 labels

output = model(img_tensor)
values, indices = torch.topk(output, 5, dim=1)
probs = torch.nn.Softmax(1)(output)
print(probs[0][indices[0]])

# Get ImageNet class labels

with open('imagenet1000_clsidx_to_labels.txt') as f:
    label_data = f.read()
class_labels = ast.literal_eval(label_data)

# Print out top 5 labels for this image

for i in range(5):
    class_index = indices[0][i].item()
    print('Label %d (%d): %s' % (i, class_index, class_labels[class_index]))

Downloading: "https://download.pytorch.org/models/googlenet-1378be20.pth" to /home/mdailey/.cache/torch/hub/checkpoints/googlenet-1378be20.pth


  0%|          | 0.00/49.7M [00:00<?, ?B/s]

tensor([0.7478, 0.1441, 0.0612, 0.0051, 0.0030], grad_fn=<IndexBackward0>)
Label 0 (230): Shetland sheepdog, Shetland sheep dog, Shetland
Label 1 (231): collie
Label 2 (157): papillon
Label 3 (232): Border collie
Label 4 (259): Pomeranian


In [2]:
model

GoogLeNet(
  (conv1): BasicConv2d(
    (conv): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (bn): BatchNorm2d(64, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
  )
  (maxpool1): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=True)
  (conv2): BasicConv2d(
    (conv): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
    (bn): BatchNorm2d(64, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
  )
  (conv3): BasicConv2d(
    (conv): Conv2d(64, 192, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (bn): BatchNorm2d(192, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
  )
  (maxpool2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=True)
  (inception3a): Inception(
    (branch1): BasicConv2d(
      (conv): Conv2d(192, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn): BatchNorm2d(64, eps=0.001, momentum=0.1, affine=True, track