In [11]:
import os
import os.path as osp
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt


from cs_dataset import city_scapes


import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader
from torchvision import transforms

import torch.optim as optim
from sklearn.metrics import confusion_matrix



In [12]:
#check GPU availability
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(f'Available device:{device}')

Available device:cuda:0


In [24]:
 # Data augmentation and normalization for training
    # Just normalization for validation
train_dataset_transform = transforms.Compose([transforms.Resize(128), transforms.ToTensor(),
                                                            transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                                                               std=[0.229, 0.224, 0.225])])
val_dataset_transform = transforms.Compose([transforms.Resize(128), transforms.ToTensor(),
                                                            transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                                                               std=[0.229, 0.224, 0.225])])
test_dataset_transform = transforms.Compose([transforms.Resize(128), transforms.ToTensor(),
                                                          transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                                                               std=[0.229, 0.224, 0.225])])

In [14]:
# Hardcoded parameters
lr = 0.0001
num_epochs = 100
momentum = 0.9
weight_decay = 0.0005
batch_size = 64

In [15]:
# fetch training data
train_path = "cityscapesExtracted/cityscapesExtractedResized"
train_dataset = city_scapes(datapath=train_path,
                            transform= train_dataset_transform)
train_dataloader = DataLoader(train_dataset, batch_size=batch_size)

# fetch validation data
val_path = "cityscapesExtracted/cityscapesExtractedValResized"
val_dataset = city_scapes(datapath=val_path,
                            transform=val_dataset_transform)

val_dataloader = DataLoader(val_dataset, batch_size=1)

# fetch evaluation data
test_path = "cityscapesExtracted/cityscapesExtractedTestResized"
test_dataset = city_scapes(datapath=test_path,
                            transform=test_dataset_transform)

test_dataloader = DataLoader(test_dataset, batch_size=1)

cityscapesExtracted/cityscapesExtractedResized
fetching data from the data directory
Number of Images 37911
Number of file names 37911
cityscapesExtracted/cityscapesExtractedValResized
fetching data from the data directory
Number of Images 1626
Number of file names 1626
cityscapesExtracted/cityscapesExtractedTestResized
fetching data from the data directory
Number of Images 5348
Number of file names 5348


In [16]:
def val(net, val_dataloader):
    net.eval()
    accuracy = 0.0
    total = 0.0
    with torch.no_grad():
        for sample in enumerate(val_dataloader):
            image = sample[1]['image'].to(device)
            label = sample[1]['label'].to(device)
            loss, logits = net(image, label)
            pred_label = torch.argmax(logits)
            total += label.size(0)
            accuracy += torch.sum(pred_label == label)
    return accuracy, total


def test(net, test_dataloader, classes):
    net.eval()
    predicted_labels = []
    true_labels = []
    count = 0
    for sample in enumerate(test_dataloader):

        with torch.no_grad():
            image = sample[1]['image'].to(device)
            label = sample[1]['label'].to(device)

            _, logits = net(image, label)
            index = torch.argmax(logits)

            if classes[index] == label:
                count += 1

            predicted_labels.append(classes[index])
            true_labels.append(label.cpu().numpy()[0])
    accuracy = count / len(test_dataloader) * 100
    cf_matrix = confusion_matrix(predicted_labels, true_labels)
    df_cm = pd.DataFrame(cf_matrix / np.sum(cf_matrix) * 10, index=[i for i in classes],
                         columns=[i for i in classes])
    plt.figure(figsize=(12, 7))
    plt.savefig('output.png')
    return accuracy, df_cm

In [26]:
class Convnet(nn.Module):
    def __init__(self):
        super(Convnet, self).__init__()
        self.conv1 = nn.Conv2d(3, 24, 3, 1)
        self.bn1 = nn.BatchNorm2d(24)
        self.conv2 = nn.Conv2d(24, 48, 3, 1)
        self.bn2 = nn.BatchNorm2d(48)
        self.conv3 = nn.Conv2d(48, 96, 3, 1)
        self.bn3 = nn.BatchNorm2d(96)
        self.pool = nn.MaxPool2d(2, 2)
        self.fc1 = nn.Linear(96*6*6, 100)
        self.fc2 = nn.Linear(100, 50)
        self.fc3 = nn.Linear(50, 3)
        self.lrelu = nn.LeakyReLU()

    def forward(self, x, label):
        x = self.pool(self.lrelu(self.bn1(self.conv1(x))))
        x = self.pool(self.lrelu(self.bn2(self.conv2(x))))
        x = self.pool(self.lrelu(self.bn3(self.conv3(x))))
        x = x.view(x.size(0), -1)  # Flatten the tensor
        x = self.lrelu(self.fc1(x))
        x = self.lrelu(self.fc2(x))
        x = self.fc3(x)
        logits = F.log_softmax(x, dim=1)
        loss = F.cross_entropy(logits, target=label)
        return loss, logits

In [20]:
# define paths
folder = "saves"
save_network = os.path.join("./", folder)

# check if directory exists
if not os.path.exists(save_network):
    # if not, create it
    os.makedirs(save_network)

# GT classes
classes = [0, 1, 2]

# build model
net = Convnet()
net.to(device)

# define optimizers
optimizer = optim.SGD(net.parameters(), lr=lr, momentum=momentum, weight_decay=weight_decay)
# scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=30, gamma=0.1)

# Early stopping parameters
patience = 10
best_val_acc = 0.0
early_stop_counter = 0

print('-' * 10)

for epoch in range(num_epochs):
    net.train()
    train_loss = 0.0
    train_acc = 0.0
    total = 0.0
    for sample in enumerate(train_dataloader):
        image = sample[1]['image'].to(device)
        label = sample[1]['label'].to(device)

        loss, logits = net(image, label)
        train_loss += loss.item() * image.size(0)

        pred_label = torch.argmax(logits, 1)
        total += label.size(0)
        train_acc += torch.sum(pred_label == label)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    train_loss = train_loss / total
    train_acc = (100 * train_acc / total)


    val_acc, total = val(net, val_dataloader)
    val_acc = (100 * val_acc / total)

    # # Check if early stopping condition is met
    # if val_acc > best_val_acc:
    #     best_val_acc = val_acc
    #     early_stop_counter = 0
    # else:
    #     early_stop_counter += 1
    #     if early_stop_counter >= patience:
    #         print("Early stopping")
    #         break

    print(f'Epoch {epoch + 1}/{num_epochs}')
    print('-' * 10)
    print(f'Training Loss:{train_loss:.4f}')
    print(f'Train Accuracy:{train_acc:.4f}')
    print(f'Val Accuracy:{val_acc:.4f}')

    filename = "checkpoint_epoch_" + str(epoch + 1) + "_tb.pth.tar"
    torch.save(net.state_dict(), osp.join(save_network, filename))
 
    print("Model saved at", osp.join(save_network, filename))
    print('-' * 10)
    # scheduler.step()

acc, df_cm = test(net, test_dataloader, classes)
print(f'Test Accuracy:{acc:.4f}')
print("Model Successfully trained and tested!")
print('-' * 10)

----------


RuntimeError: Calculated padded input size per channel: (1 x 1). Kernel size: (3 x 3). Kernel size can't be greater than actual input size

In [10]:
print(df_cm.to_string())

          0         1         2
0  2.913239  0.456245  0.185116
1  0.000000  0.000000  0.000000
2  0.783470  0.228123  5.433807
