# Car Classification using Stanford Cars Dataset

In [5]:
#Car image and metadata manipulation
from cv2 import imread, imwrite, waitKey
from scipy import io as spio

#File management
import os
import shutil

#PyTorch
import torch
from torchvision import transforms, datasets
from torch import nn #neural network framework
from torch import optim

#hyperparameter tuning
from ray import tune

##### 1. Load and match car metadata with images

In [6]:
car_metadata = spio.loadmat("cardata/cars_meta.mat")["class_names"]
car_train_raw = spio.loadmat("cardata/cars_train.mat")["annotations"]
car_test_raw = spio.loadmat("cardata/cars_test.mat")["annotations"]

#train/test metadata: [box x1, box y1, box x2, box y2, classnum, filename]

In [7]:
#minimal prep work required for the data as just need to load the metadata and match it up with the images
car_classes = [x[0] for x in car_metadata[0]]

car_train = {
    car[5][0]: car_classes[car[4][0][0]-1] for car in car_train_raw[0]
} #class gives num not index therefore -1

car_test = {
    car[5][0]: car_classes[car[4][0][0]-1] for car in car_test_raw[0]
}

#format: {"xxxxx.jpg": "Make Model Year"}

##### 2. Crop the car images to remove unnecessary background

In [None]:
#crop car images to only include the relevant bit, not background stuff
for car in car_train_raw[0]:
    x1, y1, x2, y2 = car[0][0][0], car[1][0][0], car[2][0][0], car[3][0][0]
    path = f"cardata/cars_train/{car[5][0]}"
    
    image = imread(path)
    crop = image[y1:y2, x1:x2]
    
    imwrite(path, crop)
    waitKey(0)
    
for car in car_test_raw[0]:
    x1, y1, x2, y2 = car[0][0][0], car[1][0][0], car[2][0][0], car[3][0][0]
    path = f"cardata/cars_test/{car[5][0]}"
    
    image = imread(path)
    crop = image[y1:y2, x1:x2]
    
    imwrite(path, crop)
    waitKey(0)

##### 3. Move images into class folders

In [None]:
for img_class in car_classes:
    os.makedirs(f"cardata/cars_train/{img_class}/")
    os.makedirs(f"cardata/cars_test/{img_class}/")

In [None]:
for car in car_train:
    shutil.move(f"cardata/cars_train/{car}", f"cardata/cars_train/{car_train[car]}/{car}")

for car in car_test:
    shutil.move(f"cardata/cars_test/{car}", f"cardata/cars_test/{car_test[car]}/{car}")

##### 4. Convert, resize and normalize the image data

In [8]:
mean = (0.5,0.5,0.5) #rgb; (x - mean)/std will give -1 to 1 with these values of mean and std
std = (0.5,0.5,0.5)

transform = transforms.Compose(
    [
        transforms.ToTensor(),
        transforms.Resize((128, 128)),
        transforms.Normalize(mean, std),
    ]
)

batch_size = 5 #n images per iteration of training
workers = 2 #utilise multiple cores

train_dataset = datasets.ImageFolder(
    "cardata/cars_train/", transform=transform
)
test_dataset = datasets.ImageFolder(
    "cardata/cars_test/", transform=transform
)

train_loader = torch.utils.data.DataLoader(
    train_dataset,
    batch_size=batch_size,
    shuffle=True,
    num_workers=workers,
)

test_loader = torch.utils.data.DataLoader(
    test_dataset,
    batch_size=batch_size,
    shuffle=False, #no shuffle for test set
    num_workers=workers,
)

##### 5. Defining convolutional neural network

In [19]:
class CNN(nn.Module):

    def __init__(self):
        super(CNN, self).__init__()

        #CONVOLUTIONAL LAYERS
        #input channels (3 as RGB), output channels, n by n square kernel
        self.conv1 = nn.Conv2d(
            in_channels=3, out_channels=16, kernel_size=3, padding=1
        )
        #in channels of conv2 = out channels of conv1
        self.conv2 = nn.Conv2d(
            in_channels=16, out_channels=32, kernel_size=3, padding=1
        )

        #POOl
        self.pool = nn.MaxPool2d(2, 2)

        #FULLY CONNECTED LAYERS
        self.fc1 = nn.Linear(32 * 32 * 32, 128) #output from conv2 * length * breadth of images
        self.fc2 = nn.Linear(128, 196) #196 classes

    def forward(self, x):
        """Forward pass"""
        #reLU allows for non-linearity
        x = self.pool(nn.functional.relu(self.conv1(x)))
        x = self.pool(nn.functional.relu(self.conv2(x)))

        x = x.view(-1, 32 * 32 * 32) #to manipulate tensors
        x = nn.functional.relu(self.fc1(x))
        x = nn.functional.relu(self.fc2(x))

        return x

In [20]:
model = CNN()
print(model)

CNN(
  (conv1): Conv2d(3, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv2): Conv2d(16, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (fc1): Linear(in_features=32768, out_features=128, bias=True)
  (fc2): Linear(in_features=128, out_features=196, bias=True)
)


##### 6. Optimising

In [21]:
cel = nn.CrossEntropyLoss()

#Stochastic Gradient Descent: softmax + negative log-likelihood
optimiser = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)

##### 7. Training

In [31]:
for epoch in range(20):
    for batch_id, (input, target) in enumerate(train_loader):
        optimiser.zero_grad() #reset
        output = model(input) #forward pass

        #calculate loss
        ce_loss = cel(output, target)
        ce_loss.backward() #backward pass

        #optimisation
        optimiser.step()

        #metrics
        if batch_id % 1000 == 999: #print at intervals
            print(f"{epoch + 1} [{batch_id+1}]: Loss is {round(ce_loss.item(), 3)}")


1 [1000]: Loss is 0.001
2 [1000]: Loss is 0.001
3 [1000]: Loss is 0.001
4 [1000]: Loss is 0.0
5 [1000]: Loss is 0.001
6 [1000]: Loss is 0.0
7 [1000]: Loss is 0.001
8 [1000]: Loss is 1.057
9 [1000]: Loss is 0.0
10 [1000]: Loss is 0.0
11 [1000]: Loss is 0.001
12 [1000]: Loss is 0.0
13 [1000]: Loss is 0.0
14 [1000]: Loss is 1.056
15 [1000]: Loss is 0.0
16 [1000]: Loss is 0.0
17 [1000]: Loss is 0.0
18 [1000]: Loss is 0.0
19 [1000]: Loss is 1.056
20 [1000]: Loss is 0.0


##### 8. Saving the NN for future use

In [32]:
filename = "CNN-40.pth"
torch.save(model.state_dict(), filename)

In [17]:
#reloading trained model if needed
model = CNN()
model.load_state_dict(torch.load(filename))

<All keys matched successfully>

##### 9. Testing

In [33]:
t = 0 #total
c = 0 #total correct

with torch.no_grad():
    for car in test_loader:
        input, target = car
        output = model(input)
        _, pred = torch.max(output.data, 1)

        t += target.size(0)
        c += (pred == target).sum().item()

accuracy = 100*(c/t)
print(f"{accuracy}% accuracy")

11.603034448451686% accuracy


##### 5. Tuning
WIP: using ray tune