# Libraries

In [1]:
import os
cwd = os.getcwd()

import warnings
warnings.filterwarnings("ignore")

In [2]:
import torch
import torch.optim as optim
import torch.nn.functional as F

from torch import nn
from torchvision import datasets, transforms
from torch.utils.data import DataLoader

from PIL import Image

# Helpers

In [3]:
base_transform = transforms.Compose([
    transforms.Resize((244, 244)),
    transforms.ToTensor()
])

def mean_std_calculator(loader):
    mean, std = 0.0, 0.0

    for image, _ in loader:
        batch_samples = image.size(0)
        images = image.view(batch_samples, image.size(1), -1)
        mean += images.mean(2).sum()
        std += images.std(2).sum()

    mean /= len(loader.dataset)
    std /= len(loader.dataset)

    return mean, std

# Configuration

In [4]:
train_path = os.path.join(cwd, "data/train")
test_path = os.path.join(cwd, "data/test")
models_folder = "models"
model_path = os.path.join(models_folder, "model.pth")

In [5]:
BATCH_SIZE = 16
NUM_EPOCHS = 50
LEARNING_RATE = 0.001
KERNEL = 5

# Data Reading & Preprocessing

In [6]:
train_data = datasets.ImageFolder(train_path, transform=base_transform)
train_loader = DataLoader(train_data, batch_size=BATCH_SIZE, shuffle=True)
# mean, std = mean_std_calculator(train_loader)

In [7]:
train_transform = transforms.Compose([
    transforms.Resize((244, 244)),
    transforms.ToTensor(),
    transforms.RandomRotation(30),
    transforms.Normalize((0.5,)*3, (0.5,)*3),
    transforms.RandomHorizontalFlip(),
    transforms.ColorJitter()
])

test_transform = transforms.Compose([
    transforms.Resize((244, 244)),
    transforms.ToTensor(),
    transforms.Normalize((0.5,)*3, (0.5,)*3)
])

In [8]:
train_data = datasets.ImageFolder(train_path, transform=train_transform)
test_data = datasets.ImageFolder(test_path, transform=test_transform)

In [9]:
train_loader = DataLoader(train_data, batch_size=BATCH_SIZE, shuffle=True)
test_loader = DataLoader(test_data, batch_size=BATCH_SIZE, shuffle=False)

In [10]:
classes = train_data.class_to_idx.keys()
list(classes)

['butterfly',
 'cat',
 'chicken',
 'cow',
 'dog',
 'elephant',
 'horse',
 'sheep',
 'spider',
 'squirrel']

# Model

In [11]:
class Net(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(3, 6, KERNEL)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(6, 15, KERNEL)
        self.fc1 = nn.Linear(15 * 58 * 58, 256)
        self.fc2 = nn.Linear(256, 128)
        self.fc3 = nn.Linear(128, 10)
    
    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = torch.flatten(x, 1)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x

# Training

In [15]:
net = Net()

In [16]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(net.parameters(), lr=LEARNING_RATE)

In [17]:
losses = []
accuracies = []

for epoch in range(NUM_EPOCHS):
    running_loss = 0.0
    running_total = 0
    running_correct = 0

    for i, data in enumerate(train_loader):
        inputs, labels = data
        optimizer.zero_grad()
        
        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

        _, predicted = torch.max(outputs, 1)  # Get the class index with the max score
        running_total += labels.size(0)  # Update total number of samples
        running_correct += (predicted == labels).sum().item()

        if i%10 == 9:
            avg_loss = running_loss / 10
            avg_acc = (running_correct / running_total) * 100

            print(f'[{epoch + 1}, {i + 1:5d}] loss: {avg_loss:.3f} accuracy: {avg_acc:.2f}%')

            running_loss = 0.0
            running_correct = 0
            running_total = 0

    
    avg_loss = running_loss/len(train_loader)
    avg_acc = (running_correct/running_total) * 100

print("Finished Training")

[1,    10] loss: 2.371 accuracy: 12.50%
[1,    20] loss: 2.302 accuracy: 11.88%
[2,    10] loss: 2.293 accuracy: 18.75%
[2,    20] loss: 2.292 accuracy: 7.50%
[3,    10] loss: 2.246 accuracy: 10.62%
[3,    20] loss: 2.257 accuracy: 13.75%
[4,    10] loss: 2.136 accuracy: 24.38%
[4,    20] loss: 2.255 accuracy: 16.88%
[5,    10] loss: 2.249 accuracy: 15.62%
[5,    20] loss: 2.094 accuracy: 21.25%
[6,    10] loss: 2.105 accuracy: 21.25%
[6,    20] loss: 2.144 accuracy: 20.00%
[7,    10] loss: 2.140 accuracy: 18.12%
[7,    20] loss: 2.098 accuracy: 28.12%
[8,    10] loss: 2.049 accuracy: 20.62%
[8,    20] loss: 1.908 accuracy: 28.75%
[9,    10] loss: 1.878 accuracy: 35.00%
[9,    20] loss: 2.071 accuracy: 25.00%
[10,    10] loss: 1.927 accuracy: 33.12%
[10,    20] loss: 1.886 accuracy: 35.62%
[11,    10] loss: 1.670 accuracy: 42.50%
[11,    20] loss: 1.841 accuracy: 30.00%
[12,    10] loss: 1.687 accuracy: 39.38%
[12,    20] loss: 1.638 accuracy: 44.38%
[13,    10] loss: 1.486 accuracy: 5

In [21]:
correct = 0
total = 0

with torch.no_grad():
    for data in test_loader:
        images, labels = data
        outputs = net(images)
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f'Accuracy of the network: {100 * correct // total} %')

Accuracy of the network: 98 %


# Save Model

In [None]:
torch.save(net.state_dict(), model_path)

# Test Model

In [None]:
test = Net()
test.load_state_dict(torch.load(model_path))
test.eval()

Net(
  (conv1): Conv2d(3, 6, kernel_size=(5, 5), stride=(1, 1))
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv2d(6, 15, kernel_size=(5, 5), stride=(1, 1))
  (fc1): Linear(in_features=50460, out_features=256, bias=True)
  (fc2): Linear(in_features=256, out_features=128, bias=True)
  (fc3): Linear(in_features=128, out_features=10, bias=True)
)

In [62]:
image = Image.open(cwd + "/lala.jpg")
input_tensor = test_transform(image).unsqueeze(0)