In [1]:
import torch
from matplotlib import pyplot as plt
import torch
from torch import nn
from torch.utils.data import DataLoader, Dataset
from sklearn.model_selection import train_test_split
import os
from PIL import Image
import numpy as np
import torchsummary
%matplotlib inline

In [2]:
device = "cuda" if torch.cuda.is_available() else "mps" if torch.backends.mps.is_available() else "cpu"
print(f"Using {device} device")

Using cpu device


In [3]:
class Vehicle(Dataset):
    def __init__(self, vehicles, labels):
        self.vehicles = vehicles
        self.labels = labels
    
    def __len__(self):
        return len(self.vehicles)

    def __getitem__(self, idx):
        return self.vehicles[idx], self.labels[idx]

In [4]:
root = os.getcwd()
images_directory = os.path.join(root, "vehicle_classification")
images = []
labels = []
current_label = 0
for label in os.listdir(images_directory): #loop over every class
    if not label.startswith('.'): # skip hidden directories
        label_directory = os.path.join(images_directory, label)
        for pic in os.listdir(label_directory): 
            image = Image.open(os.path.join(label_directory, pic))
            image_array = np.array(image)
            image_array = np.transpose(image_array, (2, 0, 1)) #make channels-first, as this is what PyTorch supports
            images.append(image_array)
            labels.append(current_label)
        current_label += 1
images = np.array(images)
labels = np.array(labels)
classes = np.unique(labels)

In [5]:
num_classes = len(classes)
image_height, image_length = len(images[0][0]), len(images[0][0][0])
print(f"There are {len(images)} images in the dataset")
print(f"Each image has dimensions {image_length}x{image_height}")
print(f"The total number of classes is {num_classes}")

There are 26378 images in the dataset
Each image has dimensions 64x64
The total number of classes is 8


In [6]:
X_train, X_test, y_train, y_test = train_test_split(
images, labels, test_size=0.20, random_state=42) #split into random train and test sets
X_train = torch.tensor(X_train).to(device)
X_test = torch.tensor(X_test).to(device)
y_train = torch.tensor(y_train).type(torch.LongTensor).to(device)
y_test = torch.tensor(y_test).type(torch.LongTensor).to(device)

In [7]:
batch_size = 64
kernel_size = 3
train_dataloader = DataLoader(Vehicle(X_train, y_train), batch_size=batch_size)
test_dataloader = DataLoader(Vehicle(X_test, y_test), batch_size=batch_size)

In [8]:
class CNN(nn.Module):
    def __init__(self):
        super().__init__()
        modules = []
        modules.append(
            nn.Conv2d(in_channels=3, out_channels=16, kernel_size=kernel_size, padding='same')) 
        modules.append(
            nn.Dropout(0.5)) # some inputs are randomly dropped
        modules.append(
            nn.ReLU())
        modules.append(
            nn.MaxPool2d(kernel_size=kernel_size * 2))
        
        modules.append(
            nn.Conv2d(in_channels=16, out_channels=16, kernel_size=kernel_size, padding='same')) 
        modules.append(
            nn.Dropout(0.5)) # some inputs are randomly dropped
        modules.append(
            nn.ReLU())
        modules.append(
            nn.MaxPool2d(kernel_size=kernel_size * 2)) 
        
        modules.append(nn.Flatten())
        modules.append(nn.Linear(16, 256)) # dense layer with 256 neurons 
        modules.append(nn.ReLU())
        modules.append(
            nn.Linear(256, num_classes))
        modules.append(
            nn.LogSoftmax(dim=1))
    
        self.network = nn.Sequential(*modules) # unpack the modules
    
    # Returns the summary of the model architecture given an input size
    def summary(self): 
        return torchsummary.summary(self, input_size=(3, image_height, image_length))
    
    def forward(self, x):
        prob = self.network(x)
        return prob

In [13]:
def train(dataloader, model, loss_fn, optimizer):
    size = len(dataloader.dataset)
    model.train()
    correct = 0
    for batch, (X, y) in enumerate(dataloader):
        X = X.type(torch.FloatTensor)
        # Compute prediction error
        pred = model(X).to(device)
        loss = loss_fn(pred, y).to(device)
        
        _, predicted = torch.max(pred.data, 1)
        correct += (predicted == y).sum().item()
        
        # Backpropagation
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if batch % 100 == 0:
            loss, current = loss.item(), batch * len(X)
            print(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")
    accuracy = correct / size
    print(f"Training accuracy: {accuracy:>2f}")
    

In [14]:
def test(dataloader, model, loss_fn):
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    model.eval()
    test_loss, correct = 0, 0
    correct = 0
    with torch.no_grad():
        for X, y in dataloader:
            X = X.type(torch.FloatTensor)
            pred = model(X).to(device) # compute the model prediction
            
            # the predicted class is the one with the highest probability 
            # in the output distribution
            _, predicted = torch.max(pred.data, 1)
            correct += (predicted == y).sum().item()
            
            test_loss += loss_fn(pred, y).to(device).item()
            
            
    test_loss /= num_batches
    accuracy = correct / (size)
    print(f"Avg loss: {test_loss:>8f}")
    print(f"Testing accuracy: {accuracy:>2f}")

In [15]:
model = CNN().to(device)
print(model.summary())

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 16, 64, 64]             448
           Dropout-2           [-1, 16, 64, 64]               0
              ReLU-3           [-1, 16, 64, 64]               0
         MaxPool2d-4           [-1, 16, 10, 10]               0
            Conv2d-5           [-1, 16, 10, 10]           2,320
           Dropout-6           [-1, 16, 10, 10]               0
              ReLU-7           [-1, 16, 10, 10]               0
         MaxPool2d-8             [-1, 16, 1, 1]               0
           Flatten-9                   [-1, 16]               0
           Linear-10                  [-1, 256]           4,352
             ReLU-11                  [-1, 256]               0
           Linear-12                    [-1, 8]           2,056
       LogSoftmax-13                    [-1, 8]               0
Total params: 9,176
Trainable params: 9

In [16]:
learning_rate = 0.001
loss_fn = nn.NLLLoss().to(device)
optimizer = torch.optim.AdamW(model.parameters(), lr=learning_rate)

plt.figure(figsize=(20, 3))
epochs = 100
for t in range(epochs):
    print(f"Epoch {t+1}\n-------------------------------")
    train(train_dataloader, model, loss_fn, optimizer)
    test(test_dataloader, model, loss_fn)
print("Done!")


Epoch 1
-------------------------------
loss: 37.268097  [    0/21102]
loss: 1.523756  [ 6400/21102]
loss: 1.736339  [12800/21102]
loss: 1.416410  [19200/21102]
Training accuracy: 0.485357
Avg loss: 1.549337
Testing accuracy: 0.582828
Epoch 2
-------------------------------
loss: 1.432497  [    0/21102]
loss: 1.411723  [ 6400/21102]
loss: 1.503060  [12800/21102]
loss: 1.194555  [19200/21102]
Training accuracy: 0.571178
Avg loss: 1.390392
Testing accuracy: 0.605951
Epoch 3
-------------------------------
loss: 1.362448  [    0/21102]
loss: 1.494856  [ 6400/21102]
loss: 1.035034  [12800/21102]
loss: 1.105680  [19200/21102]
Training accuracy: 0.596010
Avg loss: 1.314736
Testing accuracy: 0.605951
Epoch 4
-------------------------------
loss: 1.198830  [    0/21102]
loss: 1.207952  [ 6400/21102]
loss: 1.047988  [12800/21102]
loss: 1.061783  [19200/21102]
Training accuracy: 0.613070
Avg loss: 1.228951
Testing accuracy: 0.601213
Epoch 5
-------------------------------
loss: 1.130468  [    0/

loss: 0.607335  [ 6400/21102]
loss: 0.740734  [12800/21102]
loss: 0.667762  [19200/21102]
Training accuracy: 0.711260
Avg loss: 1.128214
Testing accuracy: 0.619598
Epoch 37
-------------------------------
loss: 0.760747  [    0/21102]
loss: 0.672378  [ 6400/21102]
loss: 0.767558  [12800/21102]
loss: 0.619254  [19200/21102]
Training accuracy: 0.710691
Avg loss: 1.037144
Testing accuracy: 0.641016
Epoch 38
-------------------------------
loss: 0.812468  [    0/21102]
loss: 0.613409  [ 6400/21102]
loss: 0.652341  [12800/21102]
loss: 0.651745  [19200/21102]
Training accuracy: 0.710312
Avg loss: 1.135476
Testing accuracy: 0.619598
Epoch 39
-------------------------------
loss: 0.787280  [    0/21102]
loss: 0.695933  [ 6400/21102]
loss: 0.763229  [12800/21102]
loss: 0.638254  [19200/21102]
Training accuracy: 0.711876
Avg loss: 1.026963
Testing accuracy: 0.635330
Epoch 40
-------------------------------
loss: 0.796341  [    0/21102]
loss: 0.589584  [ 6400/21102]
loss: 0.728975  [12800/21102]


loss: 0.612040  [ 6400/21102]
loss: 0.777130  [12800/21102]
loss: 0.552812  [19200/21102]
Training accuracy: 0.721543
Avg loss: 0.995419
Testing accuracy: 0.663002
Epoch 72
-------------------------------
loss: 0.763955  [    0/21102]
loss: 0.625608  [ 6400/21102]
loss: 0.695350  [12800/21102]
loss: 0.591969  [19200/21102]
Training accuracy: 0.722822
Avg loss: 1.036278
Testing accuracy: 0.654663
Epoch 73
-------------------------------
loss: 0.732814  [    0/21102]
loss: 0.590491  [ 6400/21102]
loss: 0.802977  [12800/21102]
loss: 0.539441  [19200/21102]
Training accuracy: 0.725998
Avg loss: 1.026806
Testing accuracy: 0.653715
Epoch 74
-------------------------------
loss: 0.760738  [    0/21102]
loss: 0.651336  [ 6400/21102]
loss: 0.823899  [12800/21102]
loss: 0.528033  [19200/21102]
Training accuracy: 0.717941
Avg loss: 1.004054
Testing accuracy: 0.656558
Epoch 75
-------------------------------
loss: 0.799690  [    0/21102]
loss: 0.695331  [ 6400/21102]
loss: 0.753501  [12800/21102]


<Figure size 2000x300 with 0 Axes>