In [1]:
import torch
import torchvision
from torchvision import datasets, transforms, models

In [2]:
# Load the pre-trained model
alexnet = models.alexnet(weights=models.AlexNet_Weights.IMAGENET1K_V1) # Same as pretrained=True
print(alexnet)

Downloading: "https://download.pytorch.org/models/alexnet-owt-7be5be79.pth" to /Users/joe/.cache/torch/hub/checkpoints/alexnet-owt-7be5be79.pth
100%|██████████| 233M/233M [00:12<00:00, 19.3MB/s] 


AlexNet(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(11, 11), stride=(4, 4), padding=(2, 2))
    (1): ReLU(inplace=True)
    (2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(64, 192, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (4): ReLU(inplace=True)
    (5): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): Conv2d(192, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): ReLU(inplace=True)
    (8): Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): ReLU(inplace=True)
    (10): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (avgpool): AdaptiveAvgPool2d(output_size=(6, 6))
  (classifier): Sequential(
    (0): Dropout(p=0.5, inplace=False)
    (1): Linear(in_features=9216, out_features=4096, bias=True)
 

In [3]:
# Specify image transformations
transform = transforms.Compose([ # Defining the variable
    transforms.Resize(256), # Resize the image to 256×256 pixels
    transforms.CenterCrop(224), #Crop the image to 224×224 pixels about the center
    transforms.ToTensor(), # Convert the image to PyTorch Tensor data type
    transforms.Normalize( # Normalize the image by setting its mean and standard deviation to the specified values
        mean=[0.485, 0.456, 0.406],
        std=[0.229, 0.224, 0.225])
])

In [27]:
# Load the input image and pre-process it
from PIL import Image
img = Image.open(r"C:\Users\3bood\Pictures\OIP (3).jpg")

In [28]:
img_t = transform(img)
batch_t = torch.unsqueeze(img_t, 0) # Change to batch format [from (channels, height, width) to (batch_size, channels, height, width)]

In [29]:
# Model Inference
alexnet.eval()
out = alexnet(batch_t)
print(out.shape)

torch.Size([1, 1000])


In [30]:
with open(r"C:\Users\3bood\Downloads\imagenet_classes.txt") as f:
    classes = [line.strip() for line in f.readlines()]
    
print(classes[0])

0, tench


In [31]:
_, indices = torch.sort(out, descending=True)  
percentage = torch.nn.functional.softmax(out, dim=1)[0] * 100  
[(classes[idx], f"{percentage[idx].item():.2f}%") for idx in indices[0][:5]]

[('207, golden_retriever', '59.76%'),
 ('208, Labrador_retriever', '34.08%'),
 ('852, tennis_ball', '1.37%'),
 ('219, cocker_spaniel', '0.71%'),
 ('159, Rhodesian_ridgeback', '0.71%')]

In [46]:
import torch
import torchvision
from torchvision import datasets, transforms, models

resnet = models.resnet50(weights=models.ResNet50_Weights.IMAGENET1K_V1) # Same as pretrained=True
print(resnet)

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 

In [47]:
# Specify image transformations
transform = transforms.Compose([ # Defining the variable
    transforms.Resize(256), # Resize the image to 256×256 pixels
    transforms.CenterCrop(224), #Crop the image to 224×224 pixels about the center
    transforms.ToTensor(), # Convert the image to PyTorch Tensor data type
    transforms.Normalize( # Normalize the image by setting its mean and standard deviation to the specified values
        mean=[0.485, 0.456, 0.406],
        std=[0.229, 0.224, 0.225])
])

In [48]:
# Load the input image and pre-process it
from PIL import Image
img = Image.open(r"C:\Users\3bood\Pictures\OIP (3).jpg")

In [49]:
img_t = transform(img)
batch_t = torch.unsqueeze(img_t, 0) # Change to batch format [from (channels, height, width) to (batch_size, channels, height, width)]

In [50]:
# Model Inference
resnet.eval()
out = resnet(batch_t)
print(out.shape)

torch.Size([1, 1000])


In [51]:
with open(r"C:\Users\3bood\Downloads\imagenet_classes.txt") as f:
    classes = [line.strip() for line in f.readlines()]
    
print(classes[0])

0, tench


In [52]:
_, indices = torch.sort(out, descending=True)  
percentage = torch.nn.functional.softmax(out, dim=1)[0] * 100  
[(classes[idx], f"{percentage[idx].item():.2f}%") for idx in indices[0][:5]]

[('208, Labrador_retriever', '60.81%'),
 ('207, golden_retriever', '30.63%'),
 ('211, vizsla', '1.48%'),
 ('216, clumber', '0.83%'),
 ('215, Brittany_spaniel', '0.61%')]