In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from torchvision.transforms import functional as F
from PIL import Image
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
import tqdm
import os 

In [2]:
os.environ['CUDA_VISIBLE_DEVICES'] = 'MIG-4c6deb33-4c3f-5990-89bf-891bd00dac17'

In [3]:
training_file = "training_data.txt"
validation_file = "validation_data.txt"
test_file = "testing_data.txt"

In [None]:
def extract_unique_labels(data_file):
    """extracts unique labels from the dataset

    Args:
        data_file (str): File path which contains the images. Each line is the image's path.

    Returns:
        list: A list which is sorted with the unique labels found in the dataset.
    """
    unique_labels = []
    with open(data_file, 'r') as f:
        for line in f:
            img_path = line.strip()
            label = img_path.split('/')[-2]  
            
            if label not in unique_labels:
                unique_labels.append(label)
                
    return sorted(unique_labels) 

In [5]:
unique_labels = extract_unique_labels(training_file)
label_to_index = {label: idx for idx, label in enumerate(unique_labels)}
num_classes = len(unique_labels)

In [6]:
transform = transforms.Compose([
    transforms.Resize((64, 64)),  # Resize images to 64x64
    transforms.ToTensor(),
    transforms.Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5))  
])

In [None]:
class CustomTransportDataset(Dataset):
    """ 
    Args:
        Dataset (str): Loads the images and their labels. 
    """
    def __init__(self, data_file, label_to_index, transform=None):
        """Initializer of the class with the data files, labels and transform choice 

        Args:
            data_file (str): image's path 
            label_to_index (dict): Dictionary with the mapping labels and indices.
            transform (callable, optional): Optional transformations to each image. 
        """
        self.file_paths = []
        self.labels = []
        self.transform = transform
        self.label_to_index = label_to_index

        valid_extensions = ('.png') 
        with open(data_file, 'r') as f:
            for line in f:
                img_path = line.strip()
                if img_path.lower().endswith(valid_extensions): 
                    self.file_paths.append(img_path)
                    label = img_path.split('/')[-2]  
                    if label in self.label_to_index:
                        self.labels.append(self.label_to_index[label])
                    else:
                        print(f"Label {label} not found in label_to_index mapping.")

    def __len__(self):
        """Total number of image paths

        Returns:
            int: Number of images in the dataset
        """
        return len(self.file_paths)

    def __getitem__(self, idx):
        """Gets the image from the index, applies any available transformations

        Args:
            idx (int): Index of the image

        Returns:
            tuple: index of the image with the label
        """
        img_path = self.file_paths[idx]
        label = self.labels[idx]
        img = Image.open(img_path).convert('RGB')
    

        if self.transform:
            img = self.transform(img)
        else:
            img = transforms.ToTensor()(img)

        return img, label

In [None]:
class TransportClass(nn.Module):
    """A CNN for vehicle image classification, consists of 4 convolutional layers
        each one followed by a MaxPool layer. 
    Args:
        num_classes(int): The number of unique classes which defines the size of the last output layer
    """
    def __init__(self, num_classes):
        """Initializer of the TransportClass with the number of output classes

        Args:
            num_classes (int): The number of unique classes which defines the size of the last output layer
        """
        super(TransportClass, self).__init__()

        self.conv_layers = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1),  # 3 input channels for RGB
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(128, 256, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(256, 512, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(512),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2)
        )

        self.fc_layers = nn.Sequential(
            nn.Linear(512 * 4 * 4, 1024),  
            nn.ReLU(),
            nn.Dropout(p=0.5),
            nn.Linear(1024, num_classes),
            nn.LogSoftmax(dim=1)
        )

    def forward(self, x):
        """Forward pass of the CNN model 

        Args:
            x (torch.Tensor): Input tensor 

        Returns:
            torch.Tensor: Output tensor
        """
        x = self.conv_layers(x)
        x = x.view(x.size(0), -1)  
        x = self.fc_layers(x)
        return x

In [None]:
def train_model(train_loader, model, criterion, optimizer, num_epochs=5):
    """Training of the CNN model using the already given model, loss function optimizer and a number of epochs

    Args:
        train_loader (Dataloader): loads batches of input images and labels
        model (nn.Module): The neural network model which is trained 
        criterion (callable): the loss function
        optimizer (Optimizer): Adjusts model weights on gradients
        num_epochs (int): Number of epochs that the model will be trained on

    Returns:
        nn.Module: The trained model 
    """
    device = torch.device("cuda:0") 
    model.to(device)

    for epoch in range(num_epochs):
        print(f"Epoch {epoch + 1}/{num_epochs}")
        running_loss = 0
        for inputs, labels in tqdm.tqdm(train_loader):
            inputs, labels = inputs.to(device), labels.to(device)

            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()

        print(f"Training Loss after epoch {epoch + 1}: {running_loss / len(train_loader)}")

    return model

In [None]:
def evaluate_model(loader, model):
    """Evaluates the performance of the trained model after a specified number of epochs.
    Using sklearn evaluation metrics: accuracy, precision, recall, F1 score.

    Args:
        loader (Dataloader): Batches of test or validation data 
        model (nn.Module): The already trained model in order to get the evaluation metrics.

    Returns:
        tuple: Contains the accuracy, the precision, the recall and the f1 score
    """
    model.eval()  
    device = torch.device("cuda:0")
    model.to(device)

    predicted_labels = []
    true_labels = []

    with torch.no_grad(): 
        for inputs, labels in tqdm.tqdm(loader):
            inputs, labels = inputs.to(device), labels.to(device)

            outputs = model(inputs)
            predlab = outputs.argmax(dim=1).cpu().numpy()
            predicted_labels.extend(predlab)
            true_labels.extend(labels.cpu().numpy())

    accuracy = accuracy_score(true_labels, predicted_labels)
    precision = precision_score(true_labels, predicted_labels, average='macro')
    recall = recall_score(true_labels, predicted_labels, average='macro')
    f1 = f1_score(true_labels, predicted_labels, average='macro')

    return accuracy, precision, recall, f1

In [11]:
model = TransportClass(num_classes=num_classes)
optimizer = optim.Adam(model.parameters(), lr=0.001)
criterion = nn.NLLLoss()

In [12]:
train_dataset = CustomTransportDataset(training_file, label_to_index, transform=transform)
val_dataset = CustomTransportDataset(validation_file, label_to_index, transform=transform)
test_dataset = CustomTransportDataset(test_file, label_to_index, transform=transform)

In [13]:
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

In [14]:
print(f"Training dataset size: {len(train_dataset)}")
print(f"Number of unique labels: {num_classes}")
print(f"Label to index mapping: {label_to_index}")

Training dataset size: 5418
Number of unique labels: 5
Label to index mapping: {'bus': 0, 'car': 1, 'motorcycle': 2, 'train': 3, 'truck': 4}


In [15]:
trained_model = train_model(train_loader, model, criterion, optimizer, num_epochs=50)


Epoch 1/50


100%|█████████████████████████████████████████████████████████████████████████████████| 170/170 [00:10<00:00, 15.87it/s]


Training Loss after epoch 1: 1.828856839502559
Epoch 2/50


100%|█████████████████████████████████████████████████████████████████████████████████| 170/170 [00:10<00:00, 16.19it/s]


Training Loss after epoch 2: 1.1480253307258381
Epoch 3/50


100%|█████████████████████████████████████████████████████████████████████████████████| 170/170 [00:10<00:00, 16.33it/s]


Training Loss after epoch 3: 1.0168192460256464
Epoch 4/50


100%|█████████████████████████████████████████████████████████████████████████████████| 170/170 [00:10<00:00, 16.39it/s]


Training Loss after epoch 4: 0.9362379827920129
Epoch 5/50


100%|█████████████████████████████████████████████████████████████████████████████████| 170/170 [00:10<00:00, 15.94it/s]


Training Loss after epoch 5: 0.8576250020195456
Epoch 6/50


100%|█████████████████████████████████████████████████████████████████████████████████| 170/170 [00:10<00:00, 16.41it/s]


Training Loss after epoch 6: 0.7927009273977841
Epoch 7/50


100%|█████████████████████████████████████████████████████████████████████████████████| 170/170 [00:10<00:00, 16.34it/s]


Training Loss after epoch 7: 0.7228583058890174
Epoch 8/50


100%|█████████████████████████████████████████████████████████████████████████████████| 170/170 [00:10<00:00, 16.32it/s]


Training Loss after epoch 8: 0.6931928156053319
Epoch 9/50


100%|█████████████████████████████████████████████████████████████████████████████████| 170/170 [00:10<00:00, 16.37it/s]


Training Loss after epoch 9: 0.6274031050064984
Epoch 10/50


100%|█████████████████████████████████████████████████████████████████████████████████| 170/170 [00:10<00:00, 16.26it/s]


Training Loss after epoch 10: 0.5759508572080556
Epoch 11/50


100%|█████████████████████████████████████████████████████████████████████████████████| 170/170 [00:10<00:00, 16.38it/s]


Training Loss after epoch 11: 0.5532844358507325
Epoch 12/50


100%|█████████████████████████████████████████████████████████████████████████████████| 170/170 [00:10<00:00, 16.37it/s]


Training Loss after epoch 12: 0.5006451690021683
Epoch 13/50


100%|█████████████████████████████████████████████████████████████████████████████████| 170/170 [00:10<00:00, 16.34it/s]


Training Loss after epoch 13: 0.4589357511085622
Epoch 14/50


100%|█████████████████████████████████████████████████████████████████████████████████| 170/170 [00:10<00:00, 16.34it/s]


Training Loss after epoch 14: 0.42265013517702327
Epoch 15/50


100%|█████████████████████████████████████████████████████████████████████████████████| 170/170 [00:10<00:00, 16.40it/s]


Training Loss after epoch 15: 0.39989961223567233
Epoch 16/50


100%|█████████████████████████████████████████████████████████████████████████████████| 170/170 [00:10<00:00, 16.29it/s]


Training Loss after epoch 16: 0.3594222417210831
Epoch 17/50


100%|█████████████████████████████████████████████████████████████████████████████████| 170/170 [00:10<00:00, 16.47it/s]


Training Loss after epoch 17: 0.3325357441078214
Epoch 18/50


100%|█████████████████████████████████████████████████████████████████████████████████| 170/170 [00:10<00:00, 16.43it/s]


Training Loss after epoch 18: 0.2865192969932276
Epoch 19/50


100%|█████████████████████████████████████████████████████████████████████████████████| 170/170 [00:10<00:00, 16.33it/s]


Training Loss after epoch 19: 0.2642230790984981
Epoch 20/50


100%|█████████████████████████████████████████████████████████████████████████████████| 170/170 [00:10<00:00, 16.12it/s]


Training Loss after epoch 20: 0.2144608503097997
Epoch 21/50


100%|█████████████████████████████████████████████████████████████████████████████████| 170/170 [00:10<00:00, 16.20it/s]


Training Loss after epoch 21: 0.22478918331291745
Epoch 22/50


100%|█████████████████████████████████████████████████████████████████████████████████| 170/170 [00:10<00:00, 16.33it/s]


Training Loss after epoch 22: 0.19703067710951847
Epoch 23/50


100%|█████████████████████████████████████████████████████████████████████████████████| 170/170 [00:10<00:00, 15.74it/s]


Training Loss after epoch 23: 0.18160144202411174
Epoch 24/50


100%|█████████████████████████████████████████████████████████████████████████████████| 170/170 [00:11<00:00, 14.23it/s]


Training Loss after epoch 24: 0.1478075103943839
Epoch 25/50


100%|█████████████████████████████████████████████████████████████████████████████████| 170/170 [00:10<00:00, 15.50it/s]


Training Loss after epoch 25: 0.13434918737586807
Epoch 26/50


100%|█████████████████████████████████████████████████████████████████████████████████| 170/170 [00:10<00:00, 15.54it/s]


Training Loss after epoch 26: 0.12403653863790061
Epoch 27/50


100%|█████████████████████████████████████████████████████████████████████████████████| 170/170 [00:11<00:00, 14.98it/s]


Training Loss after epoch 27: 0.11882759044768618
Epoch 28/50


100%|█████████████████████████████████████████████████████████████████████████████████| 170/170 [00:12<00:00, 13.59it/s]


Training Loss after epoch 28: 0.09764417507685721
Epoch 29/50


100%|█████████████████████████████████████████████████████████████████████████████████| 170/170 [00:12<00:00, 13.40it/s]


Training Loss after epoch 29: 0.11991730383049477
Epoch 30/50


100%|█████████████████████████████████████████████████████████████████████████████████| 170/170 [00:12<00:00, 13.40it/s]


Training Loss after epoch 30: 0.10251352192871684
Epoch 31/50


100%|█████████████████████████████████████████████████████████████████████████████████| 170/170 [00:12<00:00, 13.53it/s]


Training Loss after epoch 31: 0.09439575391383294
Epoch 32/50


100%|█████████████████████████████████████████████████████████████████████████████████| 170/170 [00:12<00:00, 13.51it/s]


Training Loss after epoch 32: 0.0934262369375895
Epoch 33/50


100%|█████████████████████████████████████████████████████████████████████████████████| 170/170 [00:12<00:00, 13.62it/s]


Training Loss after epoch 33: 0.07362550426452585
Epoch 34/50


100%|█████████████████████████████████████████████████████████████████████████████████| 170/170 [00:12<00:00, 13.28it/s]


Training Loss after epoch 34: 0.09174558561901315
Epoch 35/50


100%|█████████████████████████████████████████████████████████████████████████████████| 170/170 [00:11<00:00, 14.84it/s]


Training Loss after epoch 35: 0.05356213508545325
Epoch 36/50


100%|█████████████████████████████████████████████████████████████████████████████████| 170/170 [00:12<00:00, 13.20it/s]


Training Loss after epoch 36: 0.06993469004485044
Epoch 37/50


100%|█████████████████████████████████████████████████████████████████████████████████| 170/170 [00:13<00:00, 12.59it/s]


Training Loss after epoch 37: 0.05286634648030432
Epoch 38/50


100%|█████████████████████████████████████████████████████████████████████████████████| 170/170 [00:12<00:00, 13.76it/s]


Training Loss after epoch 38: 0.06509139700262014
Epoch 39/50


100%|█████████████████████████████████████████████████████████████████████████████████| 170/170 [00:11<00:00, 14.97it/s]


Training Loss after epoch 39: 0.06426868450567674
Epoch 40/50


100%|█████████████████████████████████████████████████████████████████████████████████| 170/170 [00:11<00:00, 15.11it/s]


Training Loss after epoch 40: 0.04277547077346004
Epoch 41/50


100%|█████████████████████████████████████████████████████████████████████████████████| 170/170 [00:11<00:00, 14.80it/s]


Training Loss after epoch 41: 0.057287160560201084
Epoch 42/50


100%|█████████████████████████████████████████████████████████████████████████████████| 170/170 [00:12<00:00, 14.14it/s]


Training Loss after epoch 42: 0.027259293964872245
Epoch 43/50


100%|█████████████████████████████████████████████████████████████████████████████████| 170/170 [00:11<00:00, 15.21it/s]


Training Loss after epoch 43: 0.06190203030795708
Epoch 44/50


100%|█████████████████████████████████████████████████████████████████████████████████| 170/170 [00:11<00:00, 14.59it/s]


Training Loss after epoch 44: 0.06173610666232622
Epoch 45/50


100%|█████████████████████████████████████████████████████████████████████████████████| 170/170 [00:11<00:00, 14.66it/s]


Training Loss after epoch 45: 0.0655102372794371
Epoch 46/50


100%|█████████████████████████████████████████████████████████████████████████████████| 170/170 [00:11<00:00, 14.73it/s]


Training Loss after epoch 46: 0.07646256964311332
Epoch 47/50


100%|█████████████████████████████████████████████████████████████████████████████████| 170/170 [00:11<00:00, 14.84it/s]


Training Loss after epoch 47: 0.06118536611337571
Epoch 48/50


100%|█████████████████████████████████████████████████████████████████████████████████| 170/170 [00:12<00:00, 13.55it/s]


Training Loss after epoch 48: 0.05175095971593263
Epoch 49/50


100%|█████████████████████████████████████████████████████████████████████████████████| 170/170 [00:11<00:00, 14.84it/s]


Training Loss after epoch 49: 0.030125460925688352
Epoch 50/50


100%|█████████████████████████████████████████████████████████████████████████████████| 170/170 [00:10<00:00, 15.46it/s]

Training Loss after epoch 50: 0.0518523360887176





In [16]:
val_accuracy, val_precision, val_recall, val_f1 = evaluate_model(val_loader, trained_model)
print(f"Validation Accuracy: {val_accuracy}")
print(f"Validation Precision: {val_precision}")
print(f"Validation Recall: {val_recall}")
print(f"Validation F1-Score: {val_f1}")

test_accuracy, test_precision, test_recall, test_f1 = evaluate_model(test_loader, trained_model)
print(f"Test Accuracy: {test_accuracy}")
print(f"Test Precision: {test_precision}")
print(f"Test Recall: {test_recall}")
print(f"Test F1-Score: {test_f1}")


100%|███████████████████████████████████████████████████████████████████████████████████| 23/23 [00:01<00:00, 14.20it/s]


Validation Accuracy: 0.7799717912552891
Validation Precision: 0.7608650531647075
Validation Recall: 0.7568015010363742
Validation F1-Score: 0.7570874097671587


100%|███████████████████████████████████████████████████████████████████████████████████| 23/23 [00:01<00:00, 14.43it/s]

Test Accuracy: 0.788135593220339
Test Precision: 0.765845410047796
Test Recall: 0.7647661060634905
Test F1-Score: 0.7651275438509482





In [17]:
torch.save(trained_model.state_dict(), "RGBinitialdatasetfinal.pth")
print("Model saved successfully.")

Model saved successfully.
