<a href="https://colab.research.google.com/github/iammuhammad41/Automatic-Vehicle-Classification/blob/main/vehicle_classification.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import matplotlib.pyplot as plt
from PIL import Image
import numpy as np
import pandas as pd
import os

from copy import deepcopy
from sklearn.preprocessing import LabelEncoder

import torch
from torch.optim.lr_scheduler import ReduceLROnPlateau
from torch.utils.data import Dataset, DataLoader
import torchvision.transforms as transforms
from torchvision.models import resnet50, vgg16, mobilenet_v2

import torch.nn as nn
import torch.nn.functional as F

In [1]:
import kagglehub

# Download the dataset
base_dir = kagglehub.dataset_download("marquis03/vehicle-classification")
print(f"{base_dir = }")

In [None]:
dataset = os.listdir(base_dir)
dataset

['val', 'test', 'train']

In [None]:
def create_df(base_dir, labeled=True):
    if labeled:
        dd = {"images": [], "labels": []}
        for i in os.listdir(base_dir):
            img_dirs = os.path.join(base_dir, i)
            for j in os.listdir(img_dirs):
                img = os.path.join(img_dirs, j)
                dd["images"] += [img]
                dd["labels"] += [i]

    else:
        dd = {"images": []}
        for i in os.listdir(base_dir):
            img_dirs = os.path.join(base_dir, i)
            dd["images"] += [img_dirs]

    return pd.DataFrame(dd)

In [None]:
train = create_df(os.path.join(base_dir, "train"))
val = create_df(os.path.join(base_dir, "val"))
test = create_df(os.path.join(base_dir, "test"), False)

In [4]:
print(train.info())

In [None]:
le = LabelEncoder()
train["labels"] = le.fit_transform(train.loc[:, "labels"].values)
val["labels"] = le.transform(val.loc[:, "labels"].values)

In [5]:
print(train.info())

In [None]:
# Constants

MAX_EPOCHS = 20
LR = 1e-5
BATCH_SIZE = 32
IM_SIZE= 224
STEP = 5
GAMMA = 0.2
DECAY = 0.9
NUM_CLASSES = train["labels"].nunique()

In [None]:
train_transform = transforms.Compose([
            transforms.RandomResizedCrop(224),
            transforms.RandomHorizontalFlip(),
            transforms.RandomRotation(15),
            transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                std=[0.229, 0.224, 0.225])
])

val_transform = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                        std=[0.229, 0.224, 0.225])
])

In [None]:
class ImageDataset(Dataset):
    def __init__(self, data, transform=None):
        self.data = data
        self.transform = transform

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        image_path = self.data.iloc[idx, 0]
        label = self.data.iloc[idx, -1]

        # Load image
        image = Image.open(image_path).convert('RGB')

        # Apply transformations if specified
        if self.transform:
            image = self.transform(image)

        return image, label

In [None]:
train_ds = ImageDataset(train, train_transform)
val_ds = ImageDataset(val, val_transform)
test_ds = ImageDataset(test)

In [6]:
plt.imshow(test_ds[1][0]) # test_ds[idx][0:images]
plt.axis("off")

In [7]:
print(f"{len(train_ds[0][0][0][0]) = }")
print()
print(train_ds[0][0][0][0])
# train[idx][0:image, 1:label][Channel: RGB][transformed pixel values of nth row][nth column]

In [None]:
print(np.array(train_ds[0][0]).shape)

(3, 224, 224)


In [None]:
train_loader = DataLoader(train_ds, batch_size=BATCH_SIZE, shuffle=True, num_workers=4)
val_loader = DataLoader(val_ds, batch_size=BATCH_SIZE, shuffle=False, num_workers=4)


In [8]:
# Get the next batch of images and labels
images, labels = next(iter(train_loader))

images = images.permute(0, 2, 3, 1)  # Change shape from (8, C, H, W) to (8, H, W, C)

fig, axes = plt.subplots(2, 4, figsize=(12, 6))  # 2 rows, 4 columns

for i in range(8):
    ax = axes[i // 4, i % 4]
    ax.imshow(images[i].numpy())
    ax.set_title(f"Label: {labels[i].item()}")
    ax.axis('off')

plt.tight_layout()
plt.show()

In [None]:
from torchsummary import summary
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

device(type='cuda')

In [9]:
class MyCNN(nn.Module):
    def __init__(self, num_classes=10):
        super(MyCNN, self).__init__()

        # First convolution block
        self.conv1 = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=3, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(inplace=True),
            nn.Conv2d(64, 64, kernel_size=3, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(2, 2)
        )

        # Second convolution block
        self.conv2 = nn.Sequential(
            nn.Conv2d(64, 128, kernel_size=3, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(inplace=True),
            nn.Conv2d(128, 128, kernel_size=3, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(2, 2)
        )

        # Third convolution block
        self.conv3 = nn.Sequential(
            nn.Conv2d(128, 256, kernel_size=3, padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU(inplace=True),
            nn.Conv2d(256, 256, kernel_size=3, padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(2, 2)
        )

        # Fourth convolution block
        self.conv4 = nn.Sequential(
            nn.Conv2d(256, 512, kernel_size=3, padding=1),
            nn.BatchNorm2d(512),
            nn.ReLU(inplace=True),
            nn.Conv2d(512, 512, kernel_size=3, padding=1),
            nn.BatchNorm2d(512),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(2, 2)
        )

        # Adaptive pooling to handle different input sizes
        self.adaptive_pool = nn.AdaptiveAvgPool2d((7, 7))

        # Fully connected layers with dropout
        self.classifier = nn.Sequential(
            nn.Linear(512 * 7 * 7, 4096),
            nn.ReLU(inplace=True),
            nn.Dropout(0.5),
            nn.Linear(4096, 1024),
            nn.ReLU(inplace=True),
            nn.Dropout(0.5),
            nn.Linear(1024, num_classes)
        )

        # Initialize weights
        self._initialize_weights()

    def _initialize_weights(self):
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
                if m.bias is not None:
                    nn.init.constant_(m.bias, 0)
            elif isinstance(m, nn.BatchNorm2d):
                nn.init.constant_(m.weight, 1)
                nn.init.constant_(m.bias, 0)
            elif isinstance(m, nn.Linear):
                nn.init.normal_(m.weight, 0, 0.01)
                nn.init.constant_(m.bias, 0)

    def forward(self, x):
        x = self.conv1(x)
        x = self.conv2(x)
        x = self.conv3(x)
        x = self.conv4(x)
        x = self.adaptive_pool(x)
        x = torch.flatten(x, 1)
        x = self.classifier(x)
        return x

my_model = MyCNN().to(device)

summary(my_model, (3, 224, 224), device = device.type)

In [None]:
resnet = resnet50(weights=None).to(device)
resnet.fc = nn.Linear(resnet.fc.in_features, NUM_CLASSES).to(device)

vgg = vgg16(weights=None).to(device)
vgg.classifier[6] = nn.Linear(vgg.classifier[6].in_features, NUM_CLASSES).to(device)

mobilenet = mobilenet_v2(weights=None).to(device)
mobilenet.classifier[1] = nn.Linear(mobilenet.classifier[1].in_features, NUM_CLASSES).to(device)

In [10]:
# Print model summaries
print("ResNet50 Summary:")
summary(resnet, (3, 224, 224), device=device.type)

print("\nVGG16 Summary:")
summary(vgg, (3, 224, 224), device=device.type)

print("\nMobileNetV2 Summary:")
summary(mobilenet, (3, 224, 224), device=device.type)

In [None]:
models = {
    'my_model': my_model,
    'resnet': resnet,
    'vgg16': vgg,
    'mobilenet': mobilenet
}

In [None]:
# import shutil
# shutil.rmtree("/kaggle/working/")

In [None]:
def train_model(model, model_name, train_loader, val_loader, num_epochs=50):
    model = model.to(device)
    criterion = nn.CrossEntropyLoss()

    # Using AdamW optimizer with weight decay
    optimizer = torch.optim.AdamW(model.parameters(), lr=0.001, weight_decay=0.01)

    # Cosine Annealing scheduler
    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=num_epochs, eta_min=1e-6)

    best_val_acc = 0.0
    patience = 10  # Increased patience
    patience_counter = 0

    scaler = torch.cuda.amp.GradScaler()  # For mixed precision training

    train_losses = []
    train_accs = []
    val_accs = []

    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0
        correct = 0
        total = 0

        for batch_idx, (images, labels) in enumerate(train_loader):
            images, labels = images.to(device), labels.to(device)

            optimizer.zero_grad()

            # Mixed precision training
            with torch.cuda.amp.autocast():
                outputs = model(images)
                loss = criterion(outputs, labels)

            scaler.scale(loss).backward()
            scaler.step(optimizer)
            scaler.update()

            running_loss += loss.item()
            _, predicted = outputs.max(1)
            total += labels.size(0)
            correct += predicted.eq(labels).sum().item()

            if batch_idx % 20 == 0:
                print(f'Epoch: {epoch+1} [{batch_idx * len(images)}/{len(train_loader.dataset)} '
                      f'({100. * batch_idx / len(train_loader):.0f}%)]\tLoss: {loss.item():.6f}')

        train_loss = running_loss / len(train_loader)
        train_acc = 100. * correct / total

        # Validation phase
        model.eval()
        val_loss = 0.0
        correct = 0
        total = 0

        with torch.no_grad():
            for images, labels in val_loader:
                images, labels = images.to(device), labels.to(device)
                outputs = model(images)
                loss = criterion(outputs, labels)
                val_loss += loss.item()
                _, predicted = outputs.max(1)
                total += labels.size(0)
                correct += predicted.eq(labels).sum().item()

        val_loss = val_loss / len(val_loader)
        val_acc = 100. * correct / total

        train_losses.append(train_loss)
        train_accs.append(train_acc)
        val_accs.append(val_acc)

        print(f'Epoch {epoch+1}/{num_epochs}:')
        print(f'Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.2f}%')
        print(f'Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.2f}%')
        print('-' * 60)

        scheduler.step()

        if val_acc > best_val_acc:
            best_val_acc = val_acc
            torch.save({
                'epoch': epoch,
                'model_state_dict': model.state_dict(),
                'optimizer_state_dict': optimizer.state_dict(),
                'val_acc': val_acc,
            }, f'{model_name}_best.pth')
            patience_counter = 0
            print(f'New best model saved with validation accuracy: {val_acc:.2f}%')
        else:
            patience_counter += 1

        if patience_counter >= patience:
            print(f'Early stopping triggered after {epoch + 1} epochs')
            break

    return train_losses, train_accs, val_accs


In [11]:
for name, model in models.items():
    print(f"\nTraining {name}...")
    print("=" * 60)
    train_losses, train_accs, val_accs = train_model(model, name, train_loader, val_loader)

    # Plot training curves
    plt.figure(figsize=(15, 5))

    plt.subplot(1, 2, 1)
    plt.plot(train_losses, label='Training Loss')
    plt.title(f'{name} Training Loss')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.legend()

    plt.subplot(1, 2, 2)
    plt.plot(train_accs, label='Train Acc')
    plt.plot(val_accs, label='Val Acc')
    plt.title(f'{name} Accuracy')
    plt.xlabel('Epoch')
    plt.ylabel('Accuracy (%)')
    plt.legend()

    plt.tight_layout()
    plt.savefig(f'{name}_training.png')
    plt.show()

In [12]:
import os
from PIL import Image
from matplotlib import pyplot as plt

# Define class names
CLASS_NAMES = ['SUV', 'Bus', 'Family Sedan', 'Fire Engine', 'Heavy Truck',
               'Jeep', 'Minibus', 'Racing Car', 'Taxi', 'Truck']

# Function to predict class of a given image
def predict_image(image_path, model, class_names):
    transform = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    ])
    image = Image.open(image_path).convert('RGB')
    image = transform(image).unsqueeze(0).to(device)  # Add batch dimension

    model.eval()
    with torch.no_grad():
        outputs = model(image)
        _, predicted = outputs.max(1)

    return class_names[predicted.item()]

# Example: Predicting class of a specific image from the dataset
test_image_path = "/kaggle/input/vehicle-classification/test/01ab0c4e74e5cefbc25e78e8b2b4b30d.jpg"  # Replace with the actual image path

# Predict the class
predicted_class = predict_image(test_image_path, my_model, CLASS_NAMES)
print(f"Predicted Class: {predicted_class}")

# Optional: Visualize the prediction
image = Image.open(test_image_path).convert('RGB')
plt.imshow(image)
plt.title(f"Predicted: {predicted_class}")
plt.axis('off')
plt.show()

In [13]:
import os
from PIL import Image
from matplotlib import pyplot as plt

# Define class names
CLASS_NAMES = ['SUV', 'Bus', 'Family Sedan', 'Fire Engine', 'Heavy Truck',
               'Jeep', 'Minibus', 'Racing Car', 'Taxi', 'Truck']

# Function to predict class of a given image
def predict_image(image_path, model, class_names):
    transform = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    ])
    image = Image.open(image_path).convert('RGB')
    image = transform(image).unsqueeze(0).to(device)  # Add batch dimension

    model.eval()
    with torch.no_grad():
        outputs = model(image)
        _, predicted = outputs.max(1)

    return class_names[predicted.item()]

# Example: Predicting class of a specific image from the dataset
test_image_path = "/kaggle/input/vehicle-classification/test/02a43a441f5c71ab9ff8ecab1f33ca49.jpg"

# Predict the class
predicted_class = predict_image(test_image_path, my_model, CLASS_NAMES)
print(f"Predicted Class: {predicted_class}")

# Optional: Visualize the prediction
image = Image.open(test_image_path).convert('RGB')
plt.imshow(image)
plt.title(f"Predicted: {predicted_class}")
plt.axis('off')
plt.show()

In [14]:
# kaggle kernels output arslan750/vehicle-type-classification