# Importing the necessary libraries
The absolute first thing we must do is to import all the necessary libraries for this project. 

In [173]:
# Imports
import os
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
import matplotlib.pyplot as plt

from torch.utils.data import DataLoader
device = ("cuda" if torch.cuda.is_available() else ("mps" if torch.has_mps else "cpu"))
print(f"device: {device}")

device: mps


# Reading data
After importing everything we need, we have to read the data from the files. We also have to pre-process the data slightly, by e.g. resizing the images. 

In [174]:
# Reading the data from the folders as well as creating dataloaders
transform = transforms.Compose([transforms.ToTensor(), transforms.Resize(size=(224, 224))])
training_data = torchvision.datasets.ImageFolder("data/train/", transform=transform)
test_data = torchvision.datasets.ImageFolder("data/test/", transform=transform)

batch_size = 64
training_loader = DataLoader(dataset=training_data, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(dataset=test_data, batch_size=batch_size)

print(f"Number of training examples: {len(training_data)}")
print(f"Number of test examples: {len(test_data)}")

Number of training examples: 15557
Number of test examples: 4002


# Creating a CNN classifier
The following code creates a CNN that is used in order to classify the images. Then, we are going to train this classifier, and finally test the classifier on the test set. 

In [176]:
class Net(nn.Module):
    def __init__(self):
        super().__init__()
        self.input_size = (224, 224)
        self.num_channels = 16
        self.num_labels = 23

        self.conv1 = nn.Conv2d(in_channels=3, out_channels=6, kernel_size=3, padding=1)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
        self.conv2 = nn.Conv2d(in_channels=6, out_channels=16, kernel_size=3, padding=1)

        self.fc1 = nn.Linear(
            (self.input_size[0] // 4) * (self.input_size[1] // 4) * self.num_channels,
            120,
        )
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, self.num_labels)

    def forward(self, x):
        x = self.conv1(x)
        x = self.pool(x)

        x = self.conv2(x)
        x = self.pool(x)

        x = torch.flatten(x, 1)  # flatten all dimensions except batch
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x


In [177]:
net = Net()
net = net.to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(net.parameters(), weight_decay=1e-3)

## Training the neural network

In [178]:
from time import time

num_epochs = 20

start = time()
epoch_losses = []
for epoch in range(num_epochs):  # loop over the dataset multiple times
    epoch_loss = 0
    for i, data in enumerate(training_loader, 0):
        # get the inputs; data is a list of [inputs, labels]
        inputs, labels = data
        inputs, labels = inputs.to(device), labels.to(device)

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = net(inputs)
        loss = criterion(outputs, labels)

        epoch_loss += loss.item()

        loss.backward()
        optimizer.step()

    # Gathering statistics 
    end = time()
    epoch_losses.append(epoch_loss)
    print(f"Epoch: {epoch + 1}, loss: {epoch_loss:.3f}, time: {end - start:.2f}s")

end = time()
total_time = end - start
print(f'Finished Training, took {total_time:.2f} seconds.')

Epoch: 1, loss: 179.089, time: 88.57s
Epoch: 2, loss: 170.339, time: 169.87s
Epoch: 3, loss: 166.726, time: 251.06s
Epoch: 4, loss: 163.177, time: 335.00s


: 

: 

## Evaluating the neural network

In [None]:
correct = 0
total = 0
# since we're not training, we don't need to calculate the gradients for our outputs
with torch.no_grad():
    for data in test_loader:
        images, labels = data
        images, labels = images.to(device), labels.to(device)
        # calculate outputs by running images through the network
        outputs = net(images)
        # the class with the highest energy is what we choose as prediction
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f'Accuracy of the network on the test images: {100 * correct // total} %')

Accuracy of the network on the test images: 29 %


In [None]:
# Storing the network 
torch.save(net, "cnn_models/cnn_model1")

In [None]:
# Loading in the network again
net2 = torch.load("cnn_models/cnn_model1")

In [None]:

correct = 0
total = 0
# since we're not training, we don't need to calculate the gradients for our outputs
with torch.no_grad():
    for data in test_loader:
        images, labels = data
        images, labels = images.to(device), labels.to(device)
        # calculate outputs by running images through the network
        outputs = net2(images)
        # the class with the highest energy is what we choose as prediction
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f'Accuracy of the network on the test images: {100 * correct // total} %')



Accuracy of the network on the test images: 29 %
