In [1]:
from basic_fcn import *
import time
from torch.utils.data import DataLoader
import torch
import gc
import voc
import torchvision.transforms as standard_transforms
import util
import numpy as np
import sys

class MaskToTensor(object):
    def __call__(self, img):
        return torch.from_numpy(np.array(img, dtype=np.int32)).long()


def init_weights(m):
    if isinstance(m, nn.Conv2d) or isinstance(m, nn.ConvTranspose2d):
        torch.nn.init.xavier_uniform_(m.weight.data)
        torch.nn.init.normal_(m.bias.data) #xavier not applicable for biases
        
# TODO Get class weights
# def getClassWeights():
#     ans = [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]

#     for iter, (inputs, labels) in enumerate(train_loader):
#         unique_elements, counts = torch.unique(labels, return_counts=True)

#         for i in range(len(unique_elements)):
#             ans[unique_elements[i]] += counts[i]
            
#     normalized = [tensor.tolist() for tensor in ans]
#     total_count = sum(normalized)
#     normalized = [total_count / (len(normalized) * count) for count in normalized]
#     print(normalized)
    
#     return 1 + torch.tensor(normalized)

def getClassWeights():
    ans = [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]

    for iter, (inputs, labels) in enumerate(train_loader):
        unique_elements, counts = torch.unique(labels, return_counts=True)

        for i in range(len(unique_elements)):
            ans[unique_elements[i]] += counts[i]

    normalized = [tensor.tolist() for tensor in ans]
    normalized = [(num / sum(normalized)) + 1.1 for num in normalized]
    normalized[0] -= 0.9
    return torch.tensor(normalized)

In [2]:
mean_std = ([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])

input_transform = standard_transforms.Compose([
        standard_transforms.ToTensor(),
        standard_transforms.Normalize(*mean_std)
    ])
target_transform = MaskToTensor()


train_dataset = voc.VOC('train', transform=input_transform, target_transform=target_transform)
val_dataset = voc.VOC('val', transform=input_transform, target_transform=target_transform)
test_dataset = voc.VOC('test', transform=input_transform, target_transform=target_transform)

train_loader = DataLoader(dataset=train_dataset, batch_size= 16, shuffle=True)
val_loader = DataLoader(dataset=val_dataset, batch_size= 16, shuffle=False)
# test_loader = DataLoader(dataset=test_dataset, batch_size= len(test_dataset), shuffle=False)
test_loader = DataLoader(dataset=test_dataset, batch_size= 16, shuffle=False)

# Add horizontally mirrored images to training data
# mirror_input_transform = standard_transforms.Compose([
#         standard_transforms.ToTensor(),
#         standard_transforms.Normalize(*mean_std),
#         standard_transforms.RandomHorizontalFlip(p=1.0)
#     ])

# mirror_target_transform = standard_transforms.Compose([
#         MaskToTensor(),
#         standard_transforms.RandomHorizontalFlip(p=1.0)
#     ])

# mirrored_dataset = voc.VOC('train', transform=mirror_input_transform, target_transform=mirror_target_transform)
# mirror_train_loader = DataLoader(dataset=mirrored_dataset, batch_size= 16, shuffle=True)




epochs = 30

n_class = 21

fcn_model = FCN(n_class=n_class)
fcn_model.apply(init_weights)


FCN(
  (conv1): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
  (bnd1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv2): Conv2d(32, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
  (bnd2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv3): Conv2d(64, 128, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
  (bnd3): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv4): Conv2d(128, 256, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
  (bnd4): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv5): Conv2d(256, 512, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
  (bnd5): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (deconv1): ConvTranspose2d(512, 256, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), output_padding=(1, 1))
  (bn1): Ba

In [3]:
device = torch.device("cuda") if torch.cuda.is_available() else "cpu"

optimizer = torch.optim.Adam(fcn_model.parameters(), lr=5e-4)

# Choose an appropriate loss function from https://pytorch.org/docs/stable/_modules/torch/nn/modules/loss.html
class_weights = getClassWeights()
print()
print(class_weights)
class_weights = class_weights.to(device)
criterion = nn.CrossEntropyLoss(class_weights)

# criterion = nn.CrossEntropyLoss()
fcn_model = fcn_model.to(device)


tensor([0.9365, 1.1046, 1.1041, 1.1085, 1.1045, 1.1068, 1.1190, 1.1071, 1.1179,
        1.1106, 1.1074, 1.1125, 1.1137, 1.1082, 1.1076, 1.1863, 1.1030, 1.1022,
        1.1159, 1.1114, 1.1124])


In [4]:
def train():
    """
    Train a deep learning model using mini-batches.

    - Perform forward propagation in each epoch.
    - Compute loss and conduct backpropagation.
    - Update model weights.
    - Evaluate model on validation set for mIoU score.
    - Save model state if mIoU score improves.
    - Implement early stopping if necessary.

    Returns:
        None.
    """

    best_iou_score = 0.0

    for epoch in range(epochs):
        ts = time.time()
        for iter, (inputs, labels) in enumerate(train_loader):
            # TODO / DONE  reset optimizer gradients
            
            optimizer.zero_grad()

            # both inputs and labels have to reside in the same device as the model's
            inputs =  inputs.to(device)# TODO / DONE transfer the input to the same device as the model's
            labels =  labels.to(device) # TODO / DONE transfer the labels to the same device as the model's

            outputs = fcn_model(inputs) # TODO / DONE Compute outputs. we will not need to transfer the output, it will be automatically in the same device as the model's!

            loss = criterion(outputs, labels) #TODO / DONE calculate loss

            loss.backward() # TODO / DONE backpropagate
            
            optimizer.step() # TODO / DONE update the weights

        print("Finish epoch {}, time elapsed {}".format(epoch, time.time() - ts))

        current_miou_score = val(epoch)

        if current_miou_score > best_iou_score:
            best_iou_score = current_miou_score
            # save the best model

In [5]:
def val(epoch):
    """
    Validate the deep learning model on a validation dataset.

    - Set model to evaluation mode. DONE
    - Disable gradient calculations. DONE
    - Iterate over validation data loader:
        - Perform forward pass to get outputs.
        - Compute loss and accumulate it.
        - Calculate and accumulate mean Intersection over Union (IoU) scores and pixel accuracy.
    - Print average loss, IoU, and pixel accuracy for the epoch.
    - Switch model back to training mode.

    Args:
        epoch (int): The current epoch number.

    Returns:
        tuple: Mean IoU score and mean loss for this validation epoch.
    """
    fcn_model.eval() # Put in eval mode (disables batchnorm/dropout) !
    
    losses = []
    mean_iou_scores = []
    accuracy = []

    with torch.no_grad(): # we don't need to calculate the gradient in the validation/testing

        # Iterate through Validation Set
        for iter, (input, label) in enumerate(val_loader):
            # label = (16, 224, 224) / batch size 16 of 244*244 masks
            # output = (16, 21, 224, 224) / batch size 16 of 21 possible classes of 244*244 masks

            # Take advantage of cuda if possible
#             if device == "cuda":
#                 input = input.cuda()

            input =  input.to(device)
            label = label.to(device)
    
            # Perform forward pass to get outputs.
            output = fcn_model.forward(input)
            N, numClass, H, W = output.shape

            # Find the prediction for each pixel
            prediction = output.view(N, n_class, -1).argmax(dim=1).view(N, H, W)

            # Compute loss and accumulate it.
            
            # loss = criterion(prediction.float(), label.float())
            loss = criterion(output, label)
            losses.append(loss.item())
            
            # Calculate and accumulate mean Intersection over Union (IoU) scores and pixel accuracy.
            meanIOU = util.iou(prediction, label, n_class)
            mean_iou_scores.append(meanIOU)

            acc = util.pixel_acc(prediction, label)
            accuracy.append(acc)

            
    print(f"Loss at epoch: {epoch} is {np.mean(losses)}")
    print(f"IoU at epoch: {epoch} is {np.mean(mean_iou_scores)}")
    print(f"Pixel acc at epoch: {epoch} is {np.mean(accuracy)}")

    fcn_model.train() #TURNING THE TRAIN MODE BACK ON TO ENABLE BATCHNORM/DROPOUT!!

    return np.mean(mean_iou_scores)

#  #TODO
def modelTest():
    """
    Test the deep learning model using a test dataset.

    - Load the model with the best weights.
    - Set the model to evaluation mode.
    - Iterate over the test data loader:
        - Perform forward pass and compute loss.
        - Accumulate loss, IoU scores, and pixel accuracy.
    - Print average loss, IoU, and pixel accuracy for the test data.
    - Switch model back to training mode.

    Returns:
        None. Outputs average test metrics to the console.
    """

    # Asssume model loaded with the best weights.

    fcn_model.eval()  # Put in eval mode (disables batchnorm/dropout) !

    losses = []
    mean_iou_scores = []
    accuracy = []

    with torch.no_grad():  # we don't need to calculate the gradient in the validation/testing

        # Iterate through Test Set
        for iter, (input, label) in enumerate(test_loader):
            # Take advantage of cuda if possible
#             if device == "cuda":
#                 input = input.cuda()

            input =  input.to(device)
            label = label.to(device)

            # Perform forward pass to get outputs.
            output = fcn_model.forward(input)
            N, numClass, H, W = output.shape

            # Find the prediction for each pixel
            prediction = output.view(N, n_class, -1).argmax(dim=1).view(N, H, W)

            # Compute loss and accumulate it.
            # loss = criterion(prediction.float(), label.float())
            loss = criterion(output, label)
            losses.append(loss.item())
            
            # Calculate and accumulate mean Intersection over Union (IoU) scores and pixel accuracy.
            meanIOU = util.iou(prediction, label, n_class, verbose = True)
            mean_iou_scores.append(meanIOU)

            acc = util.pixel_acc(prediction, label)
            accuracy.append(acc)

    print(f"Loss at Test: {np.mean(losses)}")
    print(f"IoU at Test: {np.mean(mean_iou_scores)}")
    print(f"Pixel acc at Test: {np.mean(accuracy)}")

    fcn_model.train()  #TURNING THE TRAIN MODE BACK ON TO ENABLE BATCHNORM/DROPOUT!!



In [6]:
def exportModel(inputs):    
    """
    Export the output of the model for given inputs.

    - Set the model to evaluation mode.
    - Load the model with the best saved weights.
    - Perform a forward pass with the model to get output.
    - Switch model back to training mode.

    Args:
        inputs: Input data to the model.

    Returns:
        Output from the model for the given inputs.
    """

    fcn_model.eval() # Put in eval mode (disables batchnorm/dropout) !
    
    saved_model_path = "Fill Path To Best Model"
    # TODO Then Load your best model using saved_model_path
    
    inputs = inputs.to(device)
    
    output_image = fcn_model(inputs)
    
    fcn_model.train()  #TURNING THE TRAIN MODE BACK ON TO ENABLE BATCHNORM/DROPOUT!!
    
    return output_image

# if __name__ == "__main__":

#     val(0)  # show the accuracy before training
#     train()
#     modelTest()

#     # housekeeping
#     gc.collect()
#     torch.cuda.empty_cache()

In [7]:
train()

Finish epoch 0, time elapsed 3.7908403873443604
Loss at epoch: 0 is 3.0091733762196133
IoU at epoch: 0 is 0.0005520714793098197
Pixel acc at epoch: 0 is 0.00803012681076895
Finish epoch 1, time elapsed 2.4840970039367676
Loss at epoch: 1 is 2.859506777354649
IoU at epoch: 1 is 0.002399663307423243
Pixel acc at epoch: 1 is 0.009381245871674564
Finish epoch 2, time elapsed 2.5524768829345703
Loss at epoch: 2 is 2.679209198270525
IoU at epoch: 2 is 0.009984263000696688
Pixel acc at epoch: 2 is 0.092730712890625
Finish epoch 3, time elapsed 2.6109883785247803
Loss at epoch: 3 is 2.476930924824306
IoU at epoch: 3 is 0.018677102412650574
Pixel acc at epoch: 3 is 0.21973022816827623
Finish epoch 4, time elapsed 2.6277570724487305
Loss at epoch: 4 is 2.5265542439052036
IoU at epoch: 4 is 0.020550810228065517
Pixel acc at epoch: 4 is 0.20897615393813776
Finish epoch 5, time elapsed 2.6093809604644775
Loss at epoch: 5 is 2.3494677543640137
IoU at epoch: 5 is 0.022471053039043835
Pixel acc at epo

In [8]:
modelTest()

iou list
[0.79742840755667, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, nan, 0.0, 0.0, 0.0, nan, 0.0, 0.0, 0.27852414330218067, nan, 0.0, 0.0007936507936507937, 0.0010820381664371434, nan]
iou list
[0.7716847037044081, 0.0, 0.0, nan, 0.0, 0.0, nan, 0.0, nan, 0.0, nan, 0.0, 0.0, 0.001752514477293508, nan, 0.32263296407368874, nan, 0.0, 0.005037254696190576, 0.0, nan]
iou list
[0.7231118715477967, 0.0, nan, nan, 0.0, nan, 0.0, 0.0, 0.0, nan, 0.0, nan, 0.0, 0.0, 0.0, 0.06395683830077, nan, nan, 0.005470792049115555, 0.0, 0.0]
iou list
[0.7420216074006415, 0.0, 0.0, 0.0, 0.0, nan, nan, 0.0, 0.0, nan, 0.0, 0.0, 0.0, 0.0, 0.0, 0.06547542252612566, nan, nan, 0.0002188183807439825, 0.0, 0.0]
iou list
[0.6604499930006745, nan, nan, 0.0, 0.0, 0.0, 0.0, 0.0, nan, 0.0, 0.0, 0.0, nan, 0.0, nan, 0.13372179039251614, 0.0, 0.0, 0.0, 0.0025742449927795568, nan]
iou list
[0.7207542035281164, nan, nan, 0.0, 0.0, nan, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, nan, 0.055549099360836726, 0.0, nan, 0.0, 0.0, 0.0]
iou l

In [9]:
# for iter, ((inputs1, labels1), (inputs2, labels2)) in enumerate(zip(train_loader, mirror_train_loader)):
#     print("----")
#     print(inputs1.shape)
#     print(labels1.shape)
#     inputs = torch.cat((inputs1, inputs2), dim=0)
#     labels = torch.cat((labels1, labels2), dim=0)
    
#     print(inputs.shape)
#     print(labels.shape)