In [5]:
import kagglehub

# Download latest version
path = kagglehub.dataset_download("s076923/pytorch-transformer")

print("Path to dataset files:", path)

Downloading from https://www.kaggle.com/api/v1/datasets/download/s076923/pytorch-transformer?dataset_version_number=4...


100%|██████████| 916M/916M [02:21<00:00, 6.78MB/s] 

Extracting model files...





Path to dataset files: /home/juhwan/.cache/kagglehub/datasets/s076923/pytorch-transformer/versions/4


In [2]:
from torchvision import models
from torchinfo import summary

model = models.alexnet(weights="AlexNet_Weights.IMAGENET1K_V1")
summary(model, input_size=(1, 3, 224, 224), device="cpu")

Layer (type:depth-idx)                   Output Shape              Param #
AlexNet                                  [1, 1000]                 --
├─Sequential: 1-1                        [1, 256, 6, 6]            --
│    └─Conv2d: 2-1                       [1, 64, 55, 55]           23,296
│    └─ReLU: 2-2                         [1, 64, 55, 55]           --
│    └─MaxPool2d: 2-3                    [1, 64, 27, 27]           --
│    └─Conv2d: 2-4                       [1, 192, 27, 27]          307,392
│    └─ReLU: 2-5                         [1, 192, 27, 27]          --
│    └─MaxPool2d: 2-6                    [1, 192, 13, 13]          --
│    └─Conv2d: 2-7                       [1, 384, 13, 13]          663,936
│    └─ReLU: 2-8                         [1, 384, 13, 13]          --
│    └─Conv2d: 2-9                       [1, 256, 13, 13]          884,992
│    └─ReLU: 2-10                        [1, 256, 13, 13]          --
│    └─Conv2d: 2-11                      [1, 256, 13, 13]         

In [6]:
with open(path + "/datasets/imagenet_classes.txt", "r") as file:
    classes = file.read().splitlines()

print(f"class num: {len(classes)}")
print(f"first class label: {classes[0]}")

class num: 1000
first class label: tench


data proprecessing

In [8]:
import torch
from PIL import Image
from torchvision import models, transforms

transform = transforms.Compose(
    [
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ]
)

device = "cuda" if torch.cuda.is_available() else "cpu"
model = models.alexnet(weights="AlexNet_Weights.IMAGENET1K_V1").to(device).eval()

tensors = []
files = [path + "/datasets/images/airplane.jpg", path + "/datasets/images/bus.jpg"]
for file in files:
    image = Image.open(file)
    tensors.append(transform(image))
    
tensors = torch.stack(tensors)
print(f"Input tensor shape: {tensors.shape}")

Input tensor shape: torch.Size([2, 3, 224, 224])


Inference

In [9]:
import numpy as np
from torch.nn import functional as F

with torch.no_grad():
    outputs = model(tensors.to(device))
    probs = F.softmax(outputs, dim=-1)
    top_probs, top_idxs = probs.topk(5)
    
top_probs = top_probs.cpu().numpy()
top_idxs = top_idxs.cpu().numpy()
top_classes = np.array(classes)[top_idxs]

for idx, (cls, prob) in enumerate(zip(top_classes, top_probs)):
    print(f"{files[idx]} Inference Result")
    for c, p in zip(cls, prob):
        print(f" - {c:<30} : {p * 100:>5.2f}%")

/home/juhwan/.cache/kagglehub/datasets/s076923/pytorch-transformer/versions/4/datasets/images/airplane.jpg Inference Result
 - airliner                       : 66.83%
 - warplane                       : 20.12%
 - wing                           :  9.29%
 - space shuttle                  :  2.89%
 - missile                        :  0.38%
/home/juhwan/.cache/kagglehub/datasets/s076923/pytorch-transformer/versions/4/datasets/images/bus.jpg Inference Result
 - streetcar                      : 60.25%
 - trolleybus                     : 37.99%
 - minibus                        :  1.54%
 - passenger car                  :  0.17%
 - recreational vehicle           :  0.03%
