In [1]:
import numpy as np
from PIL import Image

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

import torchvision
import torchvision.transforms as transforms

In [2]:
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

In [3]:
train_data = torchvision.datasets.CIFAR10(root='./data', train=True, transform=transform, download=False)
test_data = torchvision.datasets.CIFAR10(root='./data', train=False, transform=transform, download=False)

train_loader = torch.utils.data.DataLoader(train_data, batch_size=32, shuffle=True, num_workers=2)
test_loader = torch.utils.data.DataLoader(test_data, batch_size=32, shuffle=True, num_workers=2)

In [6]:
class_names = ['plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck']

image, label = train_data[0]
print(image.size())
print(image)

torch.Size([3, 32, 32])
tensor([[[-0.5373, -0.6627, -0.6078,  ...,  0.2392,  0.1922,  0.1608],
         [-0.8745, -1.0000, -0.8588,  ..., -0.0353, -0.0667, -0.0431],
         [-0.8039, -0.8745, -0.6157,  ..., -0.0745, -0.0588, -0.1451],
         ...,
         [ 0.6314,  0.5765,  0.5529,  ...,  0.2549, -0.5608, -0.5843],
         [ 0.4118,  0.3569,  0.4588,  ...,  0.4431, -0.2392, -0.3490],
         [ 0.3882,  0.3176,  0.4039,  ...,  0.6941,  0.1843, -0.0353]],

        [[-0.5137, -0.6392, -0.6235,  ...,  0.0353, -0.0196, -0.0275],
         [-0.8431, -1.0000, -0.9373,  ..., -0.3098, -0.3490, -0.3176],
         [-0.8118, -0.9451, -0.7882,  ..., -0.3412, -0.3412, -0.4275],
         ...,
         [ 0.3333,  0.2000,  0.2627,  ...,  0.0431, -0.7569, -0.7333],
         [ 0.0902, -0.0353,  0.1294,  ...,  0.1608, -0.5137, -0.5843],
         [ 0.1294,  0.0118,  0.1137,  ...,  0.4431, -0.0745, -0.2784]],

        [[-0.5059, -0.6471, -0.6627,  ..., -0.1529, -0.2000, -0.1922],
         [-0.8431, -1

In [7]:
class NeuralNet(nn.Module):
    
    def __init__(self):
        super().__init__()
        
        # 3 input channels, 12 feature maps to produce, 3 * 3 filter (kernel), default stride of 1
        self.conv1 = nn.Conv2d(3, 12, 3)   # (32 - 3) / 1 + 1 = 30 pixels. New shape: (12, 30, 30)
        
        self.pool1 = nn.MaxPool2d(2, 2)    # New shape: (12, 30 / 2 = 15, 30 / 2 = 15)
        
        # 12 input feature maps, 24 feature maps to produce, 4 * 4 filter (kernel), default stride of 1
        self.conv2 = nn.Conv2d(12, 24, 4)  # (15 - 4) / 1 + 1 = 12 pixels. New shape: (24, 12, 12)
        
        # For better understanding, this max pooling layer is constructed separetely, though it's the same as self.pool1
        self.pool2 = nn.MaxPool2d(2, 2)    # New shape: (24, 6, 6)
        
        # Flatten to 24 * 6 * 6 and output to 128 then 64 neurons and finally 10 classes
        self.fc1 = nn.Linear(24 * 6 * 6, 128)
        self.fc2 = nn.Linear(128, 64)
        self.fc3 = nn.Linear(64, 10)
        
    def forward(self, x):
        x = self.conv1(x)
        x = F.relu(x)
        x = self.pool1(x)
        
        x = self.conv2(x)
        x = F.relu(x)
        x = self.pool2(x)
        
        x = torch.flatten(x, 1)
        
        x = self.fc1(x)
        x = F.relu(x)
        
        x = self.fc2(x)
        x = F.relu(x)
        
        x = self.fc3(x)
        
        return x

In [8]:
net = NeuralNet()
loss_function = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)

In [9]:
for epoch in range(30):
    print(f'Training epoch {epoch + 1} ...')
    
    running_loss = 0
    
    for data in train_loader:
        inputs, labels = data
        
        optimizer.zero_grad()
        
        outputs = net(inputs)
        
        loss = loss_function(outputs, labels)
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
        
    print(f'Loss: {running_loss / len(train_loader):.4f}')

Training epoch 1 ...
Loss: 2.2688
Training epoch 2 ...
Loss: 1.8905
Training epoch 3 ...
Loss: 1.6184
Training epoch 4 ...
Loss: 1.4486
Training epoch 5 ...
Loss: 1.3368
Training epoch 6 ...
Loss: 1.2394
Training epoch 7 ...
Loss: 1.1599
Training epoch 8 ...
Loss: 1.0902
Training epoch 9 ...
Loss: 1.0322
Training epoch 10 ...
Loss: 0.9838
Training epoch 11 ...
Loss: 0.9394
Training epoch 12 ...
Loss: 0.8979
Training epoch 13 ...
Loss: 0.8601
Training epoch 14 ...
Loss: 0.8210
Training epoch 15 ...
Loss: 0.7870
Training epoch 16 ...
Loss: 0.7547
Training epoch 17 ...
Loss: 0.7218
Training epoch 18 ...
Loss: 0.6878
Training epoch 19 ...
Loss: 0.6559
Training epoch 20 ...
Loss: 0.6278
Training epoch 21 ...
Loss: 0.5939
Training epoch 22 ...
Loss: 0.5662
Training epoch 23 ...
Loss: 0.5400
Training epoch 24 ...
Loss: 0.5086
Training epoch 25 ...
Loss: 0.4806
Training epoch 26 ...
Loss: 0.4528
Training epoch 27 ...
Loss: 0.4271
Training epoch 28 ...
Loss: 0.4051
Training epoch 29 ...
Loss: 0

In [10]:
torch.save(net.state_dict(), 'trained_net.pth')

In [14]:
new_net = NeuralNet()
new_net.load_state_dict(torch.load('trained_net.pth'))

<All keys matched successfully>

In [15]:
correct = 0
total = 0

new_net.eval()

with torch.no_grad():
    for data in test_loader:
        inputs, labels = data
        outputs = net(inputs)
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
        
accuracy = 100 * correct / total

print(f'Accuracy: {accuracy}%')

Accuracy: 68.12%


In [16]:
new_transform = transforms.Compose([
    transforms.Resize((32, 32)),
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

def load_image(image_path):
    image = Image.open(image_path)
    image = new_transform(image)
    image = image.unsqueeze(0)  # Images have to be presented as a batch
    return image

image_paths = ['example1.jpg', 'example2.jpg']  # example1: dog, example2: car
images = [load_image(img) for img in image_paths]

net.eval()
with torch.no_grad():
    for i, image in enumerate(images):
        output = new_net(image)
        _, predicted = torch.max(output, 1)
        print(f'Prediction of example{i + 1}: {class_names[predicted.item()]}')

Prediction of example1: dog
Prediction of example2: car
