In [3]:
import requests
from io import BytesIO
from PIL import Image

import numpy as np
from matplotlib import pyplot as plt

import torch
import torchvision.transforms as transforms
from torchvision import models

In [5]:
# Load the ResNet50 model (pretrained on ImageNet)
# ~25.6 million parameters
# He et al. (2015), "Deep Residual Learning for Image Recognition", https://arxiv.org/abs/1512.03385
model = models.resnet50(weights="IMAGENET1K_V2")
model.eval()  # Set to evaluation mode

# Get an image from the internet
response = requests.get("https://www.airlinequality.com/wp-content/uploads/2024/03/IMG_1475-500x500.jpeg")
img = Image.open(BytesIO(response.content))

# Define transformation to match ResNet50's expected input
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# Apply transformation
x = transform(img).unsqueeze(0)  # Add batch dimension

# Run the image through the model
y_hat = model(x)


In [6]:
y_hat = y_hat.detach().numpy().squeeze()
I = y_hat.argsort()[::-1]
print(I[:5])
print(y_hat[I[:5]])

[404 908 368 895 745]
[7.0693884 4.7016892 2.1308782 1.6017052 1.5412313]


In [9]:
classes = (requests
           .get("https://raw.githubusercontent.com/anishathalye/imagenet-simple-labels/master/imagenet-simple-labels.json")
           .json())
classes = np.array(classes)

In [11]:
classes[I[:5]]

array(['airliner', 'wing', 'gibbon', 'military aircraft', 'projector'],
      dtype='<U32')