In [39]:
# from resnet_18_fcn import *
from resnet_34_fcn import *
import time
from torch.utils.data import DataLoader
import torch
import gc
import voc
import torchvision.transforms as standard_transforms
import util
import numpy as np
import sys
import math
import copy

class MaskToTensor(object):
    def __call__(self, img):
        return torch.from_numpy(np.array(img, dtype=np.int32)).long()

# Initialize Weights with Xavier Weight Initialization
def init_weights(m):
    if isinstance(m, nn.Conv2d) or isinstance(m, nn.ConvTranspose2d):
        torch.nn.init.xavier_uniform_(m.weight.data)
        if m.bias is not None:  # Check if the bias exists
            torch.nn.init.normal_(m.bias.data)  # Xavier not applicable for biases

In [40]:
mean_std = ([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
input_transform = standard_transforms.Compose([
        standard_transforms.ToTensor(),
        standard_transforms.Normalize(*mean_std)
    ])
target_transform = MaskToTensor()

train_dataset =voc.VOC('train', transform=input_transform, target_transform=target_transform)
val_dataset = voc.VOC('val', transform=input_transform, target_transform=target_transform)
test_dataset = voc.VOC('test', transform=input_transform, target_transform=target_transform)

train_loader = DataLoader(dataset=train_dataset, batch_size= 16, shuffle=True)
val_loader = DataLoader(dataset=val_dataset, batch_size= 16, shuffle=False)
test_loader = DataLoader(dataset=test_dataset, batch_size= 16, shuffle=False)

epochs = 20
n_class = 21

fcn_model = FCN_ResNet34(n_class=n_class)
fcn_model.apply(init_weights)

FCN_ResNet34(
  (encoder): Sequential(
    (0): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU(inplace=True)
    (3): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (4): Sequential(
      (0): BasicBlock(
        (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
      (1): BasicBlock(
        (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=

In [41]:
device = torch.device("cuda") if torch.cuda.is_available() else "cpu"

optimizer = torch.optim.Adam(fcn_model.parameters(), lr=5e-4)

# Choose an appropriate loss function from https://pytorch.org/docs/stable/_modules/torch/nn/modules/loss.html
criterion = nn.CrossEntropyLoss()
fcn_model = fcn_model.to(device)
max_model = fcn_model

earlystop = 3

In [4]:
def train():
    """
    Train a deep learning model using mini-batches.

    - Perform forward propagation in each epoch.
    - Compute loss and conduct backpropagation.
    - Update model weights.
    - Evaluate model on validation set for mIoU score.
    - Save model state if mIoU score improves.
    - Implement early stopping if necessary.

    Returns:
        None.
    """

    patience = 0
    best_iou_score = 0.0
    losses = []

    for epoch in range(epochs):
        ts = time.time()
        for iter, (inputs, labels) in enumerate(train_loader):
            
            optimizer.zero_grad() # reset optimizer gradients

            inputs, labels = inputs.to(device), labels.to(device) # both inputs and labels in device as model

            outputs = fcn_model(inputs) #  Compute outputs. Automatically in the same device as the model's

            loss = criterion(outputs, labels) #Calculate loss

            loss.backward() # Bckpropagate model

            optimizer.step() # Update the weights
            
            losses.append(loss.item())

            #if iter % 10 == 0:
            #    print("epoch{}, iter{}, loss: {}".format(epoch, iter, loss.item()))

        print("Finish epoch {}, time elapsed {}".format(epoch, time.time() - ts))
        print("Train Avg Loss: {}".format(np.mean(losses)))

        current_miou_score = val(epoch)

        # Save current IoU if better than stored best
        if current_miou_score > best_iou_score:
            best_iou_score = current_miou_score
            patience = 0
            max_model = copy.deepcopy(fcn_model) # save the best model
        else:
            patience += 1
            
        # Early stop if patience level is met
        if patience >= earlystop:
            print("Early stop at epoch " + str(epoch))
            break
    

In [5]:
def val(epoch):
    """
    Validate the deep learning model on a validation dataset.

    - Set model to evaluation mode. DONE
    - Disable gradient calculations. DONE
    - Iterate over validation data loader:
        - Perform forward pass to get outputs.
        - Compute loss and accumulate it.
        - Calculate and accumulate mean Intersection over Union (IoU) scores and pixel accuracy.
    - Print average loss, IoU, and pixel accuracy for the epoch.
    - Switch model back to training mode.

    Args:
        epoch (int): The current epoch number.

    Returns:
        tuple: Mean IoU score and mean loss for this validation epoch.
    """
    fcn_model.eval() # Put in eval mode (disables batchnorm/dropout) !
    
    losses = []
    mean_iou_scores = []
    accuracy = []
    
    with torch.no_grad(): # we don't need to calculate the gradient in the validation/testing

        # Iterate through Validation Set
        for iter, (input, label) in enumerate(val_loader):
            # label = (16, 224, 224) / batch size 16 of 244*244 masks
            # output = (16, 21, 224, 224) / batch size 16 of 21 possible classes of 244*244 masks
            
            input, label = input.to(device), label.to(device) # both inputs and labels in device as model
            
            output = fcn_model.forward(input) # Perform forward pass to get outputs.
            N, numClass, H, W = output.shape

            prediction = output.view(N, n_class, -1).argmax(dim=1).view(N, H, W) # Find the prediction for each pixel
            
            loss = criterion(output, label) # Compute loss and accumulate it.
            losses.append(loss.item())
            
            meanIOU = util.iou(prediction, label, n_class) # Calculate Intersection over Union (IoU) scores
            mean_iou_scores.append(meanIOU)

            acc = util.pixel_acc(prediction, label) # Calculate pixel accuracy
            accuracy.append(acc)
    
    print(f"Validation Loss: {np.mean(losses)}")
    print(f"Validation IoU: {np.mean(mean_iou_scores)}")
    print(f"Validation Pixel Acc: {np.mean(accuracy)}")
    print("\n")

    fcn_model.train() #TURNING THE TRAIN MODE BACK ON TO ENABLE BATCHNORM/DROPOUT!!

    return np.mean(mean_iou_scores)

def modelTest():
    """
    Test the deep learning model using a test dataset.

    - Load the model with the best weights.
    - Set the model to evaluation mode.
    - Iterate over the test data loader:
        - Perform forward pass and compute loss.
        - Accumulate loss, IoU scores, and pixel accuracy.
    - Print average loss, IoU, and pixel accuracy for the test data.
    - Switch model back to training mode.

    Returns:
        None. Outputs average test metrics to the console.
    """

    fcn_model = copy.deepcopy(max_model) # Asssume model loaded with the best weights.
    
    fcn_model.eval()  # Put in eval mode (disables batchnorm/dropout) !

    losses = []
    mean_iou_scores = []
    accuracy = []

    with torch.no_grad():  # we don't need to calculate the gradient in the validation/testing

        # Iterate through Test Set
        for iter, (input, label) in enumerate(test_loader):

            input, label = input.to(device), label.to(device) # both inputs and labels in device as model

            output = fcn_model.forward(input) # Perform forward pass to get outputs.
            N, numClass, H, W = output.shape

            prediction = output.view(N, n_class, -1).argmax(dim=1).view(N, H, W) # Find the prediction for each pixel
            
            loss = criterion(output, label) # Compute loss and accumulate it.
            losses.append(loss.item())
            
            meanIOU = util.iou(prediction, label, n_class) # Calculate Intersection over Union (IoU) scores
            mean_iou_scores.append(meanIOU)

            acc = util.pixel_acc(prediction, label) # Calculate pixel accuracy
            accuracy.append(acc)

    print(f"Test Loss at Test: {np.mean(losses)}")
    print(f"Test IoU at Test: {np.mean(mean_iou_scores)}")
    print(f"Test Pixel acc at Test: {np.mean(accuracy)}")

    fcn_model.train()  #TURNING THE TRAIN MODE BACK ON TO ENABLE BATCHNORM/DROPOUT!!

In [6]:
def exportModel(inputs):    
    """
    Export the output of the model for given inputs.

    - Set the model to evaluation mode.
    - Load the model with the best saved weights.
    - Perform a forward pass with the model to get output.
    - Switch model back to training mode.

    Args:
        inputs: Input data to the model.

    Returns:
        Output from the model for the given inputs.
    """

    fcn_model.eval() # Put in eval mode (disables batchnorm/dropout) !
    
    saved_model_path = "Fill Path To Best Model"
    # TODO Then Load your best model using saved_model_path
    
    inputs = inputs.to(device)
    
    output_image = fcn_model(inputs)
    
    fcn_model.train()  #TURNING THE TRAIN MODE BACK ON TO ENABLE BATCHNORM/DROPOUT!!
    
    return output_image

# if __name__ == "__main__":

#     val(0)  # show the accuracy before training
#     train()
#     modelTest()

#     # housekeeping
#     gc.collect()
#     torch.cuda.empty_cache()

In [7]:
train()

Finish epoch 0, time elapsed 21.06040668487549
Train Avg Loss: 2.89590162890298
Validation Loss: 2.5461331946509227
Validation IoU: 0.00132408230750982
Validation Pixel Acc: 0.01300769505625911


Finish epoch 1, time elapsed 17.316808700561523
Train Avg Loss: 2.8260586857795715
Validation Loss: 2.5046353340148926
Validation IoU: 0.003329347376031725
Validation Pixel Acc: 0.03693920613725401


Finish epoch 2, time elapsed 17.39508080482483
Train Avg Loss: 2.7753842785244895
Validation Loss: 2.4603898525238037
Validation IoU: 0.016166048771360197
Validation Pixel Acc: 0.18590826362632104


Finish epoch 3, time elapsed 20.31821846961975
Train Avg Loss: 2.739593484572002
Validation Loss: 2.342496361051287
Validation IoU: 0.04246616804681991
Validation Pixel Acc: 0.6280745703694425


Finish epoch 4, time elapsed 21.62053680419922
Train Avg Loss: 2.7002184765679496
Validation Loss: 2.962302259036473
Validation IoU: 0.025361407756756833
Validation Pixel Acc: 0.4230834427102314


Finish epoch 

In [8]:
modelTest()

Test Loss at Test: 2.6380134310041154
Test IoU at Test: 0.04158568968972791
Test Pixel acc at Test: 0.6266295319048378


## Q4.a COSINE ANNEALING LR

In [48]:
fcn_model = FCN_ResNet34(n_class=n_class)
fcn_model.apply(init_weights)
fcn_model = fcn_model.to(device)

earlystop = 3
max_model = fcn_model

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(fcn_model.parameters(), lr=1e-3)

#Test Cosine Annealing Learning Rate
iterMax = math.floor(len(train_dataset)/16)
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer=optimizer, T_max=iterMax, eta_min=1e-4)

In [49]:
def train1():
    patience = 0
    best_iou_score = 0.0

    for epoch in range(epochs):
        losses = []
        ts = time.time()
        for iter, (inputs, labels) in enumerate(train_loader):
            
            optimizer.zero_grad() # reset optimizer gradients

            inputs, labels = inputs.to(device), labels.to(device) # both inputs and labels in device as model

            outputs = fcn_model(inputs) #  Compute outputs. Automatically in the same device as the model's

            loss = criterion(outputs, labels) #Calculate loss

            loss.backward() # Bckpropagate model

            optimizer.step() # Update the weights
            
            losses.append(loss.item())
            
            scheduler.step() # For cosine annealing learning rate

        print("Finish epoch {}, time elapsed {}".format(epoch, time.time() - ts))
        print("Train Avg Loss: {}".format(np.mean(losses)))
        
        current_miou_score = val(epoch)

        if current_miou_score > best_iou_score:
            best_iou_score = current_miou_score
            patience = 0
            max_model = copy.deepcopy(fcn_model)
            # save the best model
        else:
            patience += 1
            
        # Early stop if patience level is met
#         if patience >= earlystop:
#             print("Early stop at epoch " + str(epoch))
#             break
    

In [50]:
train1()

Finish epoch 0, time elapsed 4.617120027542114
Train Avg Loss: 4.197633164269583
Validation Loss: 3.8772651297705516
Validation IoU: 0.0003656049594933903
Validation Pixel Acc: 0.00207813140602223


Finish epoch 1, time elapsed 4.583094835281372
Train Avg Loss: 4.062610472951617
Validation Loss: 3.7255057096481323
Validation IoU: 0.0001291393948011038
Validation Pixel Acc: 0.001965848071929665


Finish epoch 2, time elapsed 4.380997657775879
Train Avg Loss: 3.98191351549966
Validation Loss: 3.577138696398054
Validation IoU: 0.00012911886267857073
Validation Pixel Acc: 0.001965581154336735


Finish epoch 3, time elapsed 4.323671579360962
Train Avg Loss: 3.8732859066554477
Validation Loss: 3.5634100266865323
Validation IoU: 0.0003793546633420247
Validation Pixel Acc: 0.00201416015625


Finish epoch 4, time elapsed 4.081014394760132
Train Avg Loss: 3.790125318935939
Validation Loss: 3.5263048240116666
Validation IoU: 0.001283863158040488
Validation Pixel Acc: 0.0046394370387664


Finish e

In [12]:
modelTest()

Test Loss at Test: 3.3685128007616316
Test IoU at Test: 0.039544193085749406
Test Pixel acc at Test: 0.5856814370558491


## Q4.b Image Transformation

In [13]:
fcn_model = FCN_ResNet34(n_class=n_class)
fcn_model.apply(init_weights)
fcn_model = fcn_model.to(device)

earlystop = 3
max_model = fcn_model

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(fcn_model.parameters(), lr=1e-3)
#Test Cosine Annealing Learning Rate
iterMax = math.floor(len(train_dataset)/16)
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer=optimizer, T_max=iterMax, eta_min=1e-4)

In [14]:
train1()

Finish epoch 0, time elapsed 3.117100238800049
Train Avg Loss: 3.2413190603256226
Validation Loss: 3.3104273762021745
Validation IoU: 0.0005423732197926635
Validation Pixel Acc: 0.008012421277104591


Finish epoch 1, time elapsed 3.042057514190674
Train Avg Loss: 3.18077746459416
Validation Loss: 2.7423292228153775
Validation IoU: 0.0005460681219005091
Validation Pixel Acc: 0.00806491507038083


Finish epoch 2, time elapsed 3.0474495887756348
Train Avg Loss: 3.1124085301444646
Validation Loss: 2.6109058686665128
Validation IoU: 0.0062793098441153395
Validation Pixel Acc: 0.07164415187112792


Finish epoch 3, time elapsed 3.0640385150909424
Train Avg Loss: 3.0645368780408586
Validation Loss: 2.8934601034436906
Validation IoU: 0.010576875472371386
Validation Pixel Acc: 0.12947978472918184


Finish epoch 4, time elapsed 3.0904598236083984
Train Avg Loss: 3.02535457611084
Validation Loss: 3.6116258927753995
Validation IoU: 0.021616062753321015
Validation Pixel Acc: 0.2912935395977588


Fin

In [15]:
modelTest()

Test Loss at Test: 1.856745915753501
Test IoU at Test: 0.04361906536443096
Test Pixel acc at Test: 0.7138462789552206


## Q4.c Weight Imbalance + (Image Transformation)

In [37]:
def getClassWeights():
    ans = [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
    
    # Iterate through the training set
    for iter, (inputs, labels) in enumerate(train_loader):  
        unique_elements, counts = torch.unique(labels, return_counts=True)
        
        # Count number of each class
        for i in range(len(unique_elements)):
            ans[unique_elements[i]] += counts[i]

    normalized = [tensor.tolist() for tensor in ans]
    #normalized = [num/sum(normalized) for num in normalized]
    normalized = [1/(1-pow(0.1,num/50000)) for num in normalized]
    return torch.tensor(normalized)
#     class_counts = [0] * 21  # Assuming 21 classes
    
#     # Iterate through the training set
#     for _, labels in train_loader:
#         labels = labels.view(-1)  # Flatten labels tensor to 1D, if necessary
#         unique_elements, counts = torch.unique(labels, return_counts=True)
        
#         # Ensure unique_elements is a tensor of integers for indexing
#         unique_elements = unique_elements.to(torch.int64)
        
#         # Update counts for each class
#         for i in range(len(unique_elements)):
#             element = unique_elements[i].item()  # Convert to Python int
#             count = counts[i].item()
#             if element < len(class_counts):  # Check is now valid
#                 class_counts[element] += count
    
#     # Avoid division by zero for classes not present in the dataset
#     class_counts = [max(count, 1) for count in class_counts]

#     # Calculate weights inversely proportional to class frequencies
#     total_count = sum(class_counts)
#     weights = [total_count / count for count in class_counts]
    
#     # Normalize weights (optional)
#     min_weight = min(weights)
#     normalized_weights = [weight / min_weight for weight in weights]

#     return torch.tensor(normalized_weights, dtype=torch.float)

In [20]:
class_weights = getClassWeights()
print(class_weights)
class_weights = class_weights.to(device)

tensor([1.0000, 1.1216, 1.1607, 1.0169, 1.1303, 1.0409, 1.0001, 1.0333, 1.0002,
        1.0060, 1.0280, 1.0024, 1.0013, 1.0198, 1.0270, 1.0000, 1.3043, 1.5267,
        1.0005, 1.0040, 1.0025])


In [45]:
fcn_model = FCN_ResNet34(n_class=n_class)
fcn_model.apply(init_weights)
fcn_model = fcn_model.to(device)

earlystop = 5
max_model = fcn_model

optimizer = torch.optim.Adam(fcn_model.parameters(), lr=1e-1)
criterion = nn.CrossEntropyLoss(weight=class_weights)
#Test Cosine Annealing Learning Rate
# T_max = 10
iterMax = math.floor(len(train_dataset)/16)
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer=optimizer, T_max=iterMax, eta_min=1e-4)
# scheduler = torch.optim.lr_scheduler.CosineAnnealingWarmRestarts(optimizer, T_0=T_max, T_mult=1, eta_min=1e-4)

In [46]:
train1()

Finish epoch 0, time elapsed 3.3205885887145996
Train Avg Loss: 2.013990342617035
Validation Loss: 1.9089797309466772
Validation IoU: 0.05085004890565888
Validation Pixel Acc: 0.7375907697761024


Finish epoch 1, time elapsed 3.2036216259002686
Train Avg Loss: 1.3851521100316728
Validation Loss: 1.518652183668954
Validation IoU: 0.05383695172653363
Validation Pixel Acc: 0.7294901411317876


Finish epoch 2, time elapsed 3.0787460803985596
Train Avg Loss: 1.3215112345559257
Validation Loss: 1.3880389290196555
Validation IoU: 0.05573232070334965
Validation Pixel Acc: 0.7517313342747813


Finish epoch 3, time elapsed 2.9081571102142334
Train Avg Loss: 1.3224413565226965
Validation Loss: 1.33985350387437
Validation IoU: 0.05435184846332752
Validation Pixel Acc: 0.7519261307341016


Finish epoch 4, time elapsed 3.3318681716918945
Train Avg Loss: 1.2865652697426933
Validation Loss: 1.3327766060829163
Validation IoU: 0.052261375346117925
Validation Pixel Acc: 0.7513819035566236


Finish epoch 

In [47]:
modelTest()

Test Loss at Test: 1.3940854242869787
Test IoU at Test: 0.05322990909617587
Test Pixel acc at Test: 0.7246881156899143
