<a href="https://colab.research.google.com/github/davidandw190/pytorch-deep-learning-workspace/blob/main/notebooks-for-learning/02_image_classification_cnn.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [35]:
!pip install numpy pillow torch torchvision



In [36]:
import numpy as np
from PIL import Image

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

import torchvision
import torchvision.transforms as transforms

In [37]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

Using device: cuda:0


In [38]:
transform_train = transforms.Compose([
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])

transform_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])


In [39]:
train_data = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform_train)
test_data = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform_test)

Files already downloaded and verified
Files already downloaded and verified


In [40]:
train_loader = torch.utils.data.DataLoader(train_data, batch_size=32, shuffle=True, num_workers=2)
test_loader = torch.utils.data.DataLoader(test_data, batch_size=32, shuffle=False, num_workers=2)

In [41]:
image, label = train_data[0]

In [42]:
image.shape
print(f"Image shape: {image.shape}")

Image shape: torch.Size([3, 32, 32])


In [43]:
class_name = ['plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck']

In [44]:
class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        self.conv1 = nn.Conv2d(3, 12, 5)  # Output: (12, 28, 28)
        self.pool = nn.MaxPool2d(2, 2)    # Output: (12, 14, 14)
        self.conv2 = nn.Conv2d(12, 24, 5) # Output: (24, 10, 10)
        # After second pooling: (24, 5, 5)
        self.fc1 = nn.Linear(24 * 5 * 5, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = torch.flatten(x, 1)  # flatten all dimensions except batch
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x

In [45]:
model = CNN().to(device)
loss_fn = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)

In [49]:
EPOCHS = 30

for epoch in range(EPOCHS):
  print(f'Training epoch {epoch}')

  running_loss = 0.0
  for i, data in enumerate(train_loader, 0):
    inputs, labels = data[0].to(device), data[1].to(device)

    optimizer.zero_grad()

    outputs = model(inputs)
    loss = loss_fn(outputs, labels)
    loss.backward()
    optimizer.step()

    running_loss += loss.item()
  print(f'Loss: {running_loss / len(train_loader):.4f}')

Training epoch 0
Loss: 2.0610
Training epoch 1
Loss: 1.7084
Training epoch 2
Loss: 1.5481
Training epoch 3
Loss: 1.4331
Training epoch 4
Loss: 1.3442
Training epoch 5
Loss: 1.2821
Training epoch 6
Loss: 1.2269
Training epoch 7
Loss: 1.1851
Training epoch 8
Loss: 1.1419
Training epoch 9
Loss: 1.1114
Training epoch 10
Loss: 1.0785
Training epoch 11
Loss: 1.0536
Training epoch 12
Loss: 1.0337
Training epoch 13
Loss: 1.0085
Training epoch 14
Loss: 0.9879
Training epoch 15
Loss: 0.9662
Training epoch 16
Loss: 0.9559
Training epoch 17
Loss: 0.9396
Training epoch 18
Loss: 0.9226
Training epoch 19
Loss: 0.9111
Training epoch 20
Loss: 0.8961
Training epoch 21
Loss: 0.8864
Training epoch 22
Loss: 0.8802
Training epoch 23
Loss: 0.8664
Training epoch 24
Loss: 0.8642
Training epoch 25
Loss: 0.8568
Training epoch 26
Loss: 0.8460
Training epoch 27
Loss: 0.8399
Training epoch 28
Loss: 0.8339
Training epoch 29
Loss: 0.8256


In [50]:
torch.save(model.state_dict(), 'trained_image_classifier.pth')

In [52]:
loaded_model = CNN().to(device)
loaded_model.load_state_dict(torch.load('trained_image_classifier.pth'))

  loaded_model.load_state_dict(torch.load('trained_image_classifier.pth'))


<All keys matched successfully>

In [None]:
correct = 0
total = 0

loaded_model.eval()

with torch.inference_mode():
  for data in test_loader:
    images, labels = data
    outputs = loaded_model(images)
    _, predicted = torch.max(outputs.data, 1)
    total += labels.size(0)
    correct += (predicted == labels).sum().item()

accuracy = 100 * correct / total

print(f'Accuracy: {accuracy:.2f}%')