In [1]:
import torch
import torch.nn as nn

In [None]:
from datasets import load_dataset

imagenet = load_dataset(
    'Maysee/tiny-imagenet',
    split='valid',
    ignore_verifications=True  # set to True if seeing splits Error
)

In [4]:
from torchvision import transforms
from tqdm.auto import tqdm

In [5]:
imagenet

Dataset({
    features: ['image', 'label'],
    num_rows: 10000
})

In [14]:
preprocess = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(227),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

inputs = []

for image in tqdm(imagenet[:50]['image']):
    if image.mode != 'RGB':
        image = image.convert('RGB')
    input_tensor = preprocess(image)
    inputs.append(input_tensor)

inputs = torch.stack(inputs)
inputs.size()

  0%|          | 0/50 [00:00<?, ?it/s]

torch.Size([50, 3, 227, 227])

In [15]:
class AlexNet(nn.Module):
    def __init__(self, in_channels, num_classes):
        super().__init__()
        self.conv1 = nn.Conv2d(in_channels, out_channels=96, kernel_size=11, stride=4)
        self.relu1 = nn.ReLU()
        self.pooling1 = nn.MaxPool2d(kernel_size=3, stride=2)
        self.norm1 = nn.LocalResponseNorm(size=5)

        self.conv2 = nn.Conv2d(in_channels=96, out_channels=256, kernel_size=5, stride=1, padding=2)
        self.relu2 = nn.ReLU()
        self.pooling2 = nn.MaxPool2d(kernel_size=3, stride=2)
        self.norm2 = nn.LocalResponseNorm(size=5)

        self.conv3 = nn.Conv2d(in_channels=256, out_channels=384, kernel_size=3, stride=1, padding=1)
        self.relu3 = nn.ReLU()

        self.conv4 = nn.Conv2d(in_channels=384, out_channels=384, kernel_size=3, stride=1, padding=1)
        self.relu4 = nn.ReLU()

        self.conv5 = nn.Conv2d(in_channels=384, out_channels=256, kernel_size=3, stride=1, padding=1)
        self.relu5 = nn.ReLU()

        self.pooling3 = nn.MaxPool2d(kernel_size=3, stride=2)

        self.flatten = nn.Flatten()

        self.FC1 = nn.Linear(in_features=9216, out_features=4096)
        self.relu6 = nn.ReLU()
        self.drop1 = nn.Dropout(0.5)

        self.FC2 = nn.Linear(in_features=4096, out_features=4096)
        self.relu7 = nn.ReLU()
        self.drop2 = nn.Dropout(0.5)
        self.FC3 = nn.Linear(in_features=4096, out_features=num_classes)

    def forward(self, x):
        out = self.norm1(self.pooling1(self.relu1(self.conv1(x))))
        out = self.norm2(self.pooling2(self.relu2(self.conv2(out))))
        out = self.relu3(self.conv3(out))
        out = self.relu4(self.conv4(out))
        out = self.pooling3(self.relu5(self.conv5(out)))
        out = self.flatten(out)

        result = self.drop1(self.relu6(self.FC1(out)))
        result = self.drop2(self.relu7(self.FC2(result)))
        result = self.FC3(result)
        return result

In [16]:
# move to device if available
device = torch.device(
    'cuda' if torch.cuda.is_available() else (
        'mps' if torch.backends.mps.is_available() else 'cpu'
    )
)

In [17]:
model = AlexNet(3, 1000)
model.eval()

AlexNet(
  (conv1): Conv2d(3, 96, kernel_size=(11, 11), stride=(4, 4))
  (relu1): ReLU()
  (pooling1): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  (norm1): LocalResponseNorm(5, alpha=0.0001, beta=0.75, k=1.0)
  (conv2): Conv2d(96, 256, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
  (relu2): ReLU()
  (pooling2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  (norm2): LocalResponseNorm(5, alpha=0.0001, beta=0.75, k=1.0)
  (conv3): Conv2d(256, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (relu3): ReLU()
  (conv4): Conv2d(384, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (relu4): ReLU()
  (conv5): Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (relu5): ReLU()
  (pooling3): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (FC1): Linear(in_features=9216, out_features=4096, bias=True)
  (relu6): ReLU()
 

In [18]:
inputs = inputs.to(device)
model.to(device)

# run the model
with torch.no_grad():
    output = model(inputs).detach()
print(output.shape)
output

torch.Size([50, 1000])


tensor([[-0.0034,  0.0107,  0.0072,  ..., -0.0091,  0.0029, -0.0018],
        [-0.0036,  0.0108,  0.0059,  ..., -0.0087,  0.0031, -0.0003],
        [-0.0038,  0.0103,  0.0062,  ..., -0.0086,  0.0026, -0.0017],
        ...,
        [-0.0033,  0.0107,  0.0054,  ..., -0.0092,  0.0035, -0.0010],
        [-0.0031,  0.0104,  0.0066,  ..., -0.0090,  0.0017, -0.0019],
        [-0.0032,  0.0097,  0.0060,  ..., -0.0081,  0.0034, -0.0013]],
       device='cuda:0')

In [19]:
# prediction
preds = torch.argmax(output, dim=1).cpu().numpy()
print(preds.shape)
preds

(50,)


array([536, 536, 536, 536, 536, 536, 536, 536, 536, 536, 536, 536, 536,
       536, 536, 536, 536, 536, 536, 536, 536, 401, 536, 536, 536, 180,
       536, 536, 536, 536, 536, 536, 536, 536, 536, 536, 536, 536, 536,
       536, 536, 536, 536, 536, 536, 536, 536, 536, 536, 536])

In [20]:
import requests

res = requests.get("https://raw.githubusercontent.com/pytorch/hub/master/imagenet_classes.txt")

In [24]:
pred_labels = res.text.split('\n')
print(f"{len(pred_labels)}\n{pred_labels[536]}")

1000
dock


In [22]:
sum(preds == 1) / len(preds)

np.float64(0.0)

In [10]:
model

AlexNet(
  (conv1): Conv2d(3, 96, kernel_size=(11, 11), stride=(4, 4))
  (relu1): ReLU()
  (pooling1): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  (norm1): LocalResponseNorm(5, alpha=0.0001, beta=0.75, k=1.0)
  (conv2): Conv2d(96, 256, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
  (relu2): ReLU()
  (pooling2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  (norm2): LocalResponseNorm(5, alpha=0.0001, beta=0.75, k=1.0)
  (conv3): Conv2d(256, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (relu3): ReLU()
  (conv4): Conv2d(384, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (relu4): ReLU()
  (conv5): Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (relu5): ReLU()
  (pooling3): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (FC1): Linear(in_features=9216, out_features=4096, bias=True)
  (relu6): ReLU()
 

In [11]:
out = model(dummy)

In [12]:
out.shape

torch.Size([1, 1000])