In [1]:
# header files needed
import numpy as np
import torch
import torch.nn as nn
import torchvision
import random
from PIL import Image
import glob
from torchvision import models
import torch.nn.functional as F

In [2]:
# ensure the experiment produces same result on each run
random.seed(1234)
np.random.seed(1234)
torch.manual_seed(1234)
torch.cuda.manual_seed(1234)

In [4]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
# transforms
input_transform = torchvision.transforms.Compose([
    torchvision.transforms.Resize(256),                                              
    torchvision.transforms.CenterCrop(256),
    torchvision.transforms.ToTensor(),
    torchvision.transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

target_transform = torchvision.transforms.Compose([
    torchvision.transforms.Resize(256),
    torchvision.transforms.CenterCrop(256),
    torchvision.transforms.ToTensor()
])

In [None]:
# dataset
train_dataset = torchvision.datasets.VOCSegmentation("/content/drive/My Drive/", year='2012', image_set='train', download=False, transform=input_transform, target_transform=target_transform)
val_dataset = torchvision.datasets.VOCSegmentation("/content/drive/My Drive/", year='2012', image_set='val', download=False, transform=input_transform, target_transform=target_transform)

In [None]:
# data loader
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=1, shuffle=True, num_workers=8)
val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=1, shuffle=True, num_workers=8)

In [None]:
# loss
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
weight = torch.ones(22)
weight[0] = 0
criterion = torch.nn.NLLLoss2d(weight=weight.to(device))

In [None]:
# model
class FCN32(torch.nn.Module):

  # init function
  def __init__(self, num_classes=22):
    super(FCN32, self).__init__()

    # vgg16 backbone for encoder part
    self.encoder_block_1 = torch.nn.Sequential(
        
        # block 1
        torch.nn.Conv2d(3, 64, kernel_size=3, padding=100),
        torch.nn.BatchNorm2d(64),
        torch.nn.ReLU(inplace=True),
        torch.nn.Conv2d(64, 64, kernel_size=3, padding=1),
        torch.nn.BatchNorm2d(64),
        torch.nn.ReLU(inplace=True),
        torch.nn.MaxPool2d(kernel_size=2, stride=2),         # 227 x 227 x 64
    )

    self.encoder_block_2 = torch.nn.Sequential(
        
        # block 2
        torch.nn.Conv2d(64, 128, kernel_size=3, padding=1),
        torch.nn.BatchNorm2d(128),
        torch.nn.ReLU(inplace=True),
        torch.nn.Conv2d(128, 128, kernel_size=3, padding=1),
        torch.nn.BatchNorm2d(128),
        torch.nn.ReLU(inplace=True),
        torch.nn.MaxPool2d(kernel_size=2, stride=2),         # 113 x 113 x 128
    )

    self.encoder_block_3 = torch.nn.Sequential(
        
        # block 3
        torch.nn.Conv2d(128, 256, kernel_size=3, padding=1),
        torch.nn.BatchNorm2d(256),
        torch.nn.ReLU(inplace=True),
        torch.nn.Conv2d(256, 256, kernel_size=3, padding=1),
        torch.nn.BatchNorm2d(256),
        torch.nn.ReLU(inplace=True),
        torch.nn.Conv2d(256, 256, kernel_size=3, padding=1),
        torch.nn.BatchNorm2d(256),
        torch.nn.ReLU(inplace=True),
        torch.nn.MaxPool2d(kernel_size=2, stride=2),         # 56 x 56 x 256
    )

    self.encoder_block_4 = torch.nn.Sequential(
        
        # block 4
        torch.nn.Conv2d(256, 512, kernel_size=3, padding=1),
        torch.nn.BatchNorm2d(512),
        torch.nn.ReLU(inplace=True),
        torch.nn.Conv2d(512, 512, kernel_size=3, padding=1),
        torch.nn.BatchNorm2d(512),
        torch.nn.ReLU(inplace=True),
        torch.nn.Conv2d(512, 512, kernel_size=3, padding=1),
        torch.nn.BatchNorm2d(512),
        torch.nn.ReLU(inplace=True),
        torch.nn.MaxPool2d(kernel_size=2, stride=2),         # 28 x 28 x 512
    )

    self.encoder_block_5 = torch.nn.Sequential(
        
        # block 5
        torch.nn.Conv2d(512, 512, kernel_size=3, padding=1),
        torch.nn.BatchNorm2d(512),
        torch.nn.ReLU(inplace=True),
        torch.nn.Conv2d(512, 512, kernel_size=3, padding=1),
        torch.nn.BatchNorm2d(512),
        torch.nn.ReLU(inplace=True),
        torch.nn.Conv2d(512, 512, kernel_size=3, padding=1),
        torch.nn.BatchNorm2d(512),
        torch.nn.ReLU(inplace=True),
        torch.nn.MaxPool2d(kernel_size=2, stride=2),          # 14 x 14 x 512
    )

    self.encoder_fc = torch.nn.Sequential(  
              
        # fc
        torch.nn.Conv2d(512, 4096, kernel_size=7),
        torch.nn.ReLU(inplace=True),
        torch.nn.Dropout2d(),
        torch.nn.Conv2d(4096, 4096, kernel_size=1),
        torch.nn.ReLU(inplace=True),
        torch.nn.Dropout2d(),                                  # 8 x 8 x 4096
        torch.nn.Conv2d(4096, num_classes, kernel_size=1)
    )


  # forward func
  def forward(self, x):
    # block 1
    enc_block_1 = self.encoder_block_1(x)

    # block 2
    enc_block_2 = self.encoder_block_2(enc_block_1)

    # block 3
    enc_block_3 = self.encoder_block_3(enc_block_2)

    # block 4
    enc_block_4 = self.encoder_block_4(enc_block_3)

    # block 5
    enc_block_5 = self.encoder_block_5(enc_block_4)

    # fc
    enc_fc = self.encoder_fc(enc_block_5)

    # upsample to match image spatial size
    output = torch.nn.functional.upsample_bilinear(enc_fc, [256, 256])
    output = torch.nn.functional.log_softmax(output)

    # return the predicted label image
    return output

In [None]:
model = FCN32()
model.to(device)

In [None]:
# create optimizer
optimizer = torch.optim.SGD(model.parameters(), lr=0.0001, momentum=0.9, weight_decay=5e-4)

In [None]:
loss_train = []
loss_valid = []
acc_train = []
acc_valid = []

# training and val loop
for epoch in range(0, 5000):

  # train
  model.train()
  training_loss = 0.0
  correct = 0
  total = 0
  first_batch = next(iter(train_loader))
  for step, (images, labels) in enumerate(train_loader):
    
    # if cuda
    images = images.to(device)
    labels = labels.to(device)
    labels = labels.long()
    labels = labels[:, 0]

    # get predicted outputs
    optimizer.zero_grad()
    outputs = model(images)
    loss = criterion(outputs, labels)
    loss.backward()
    optimizer.step()
    training_loss = training_loss + loss.item()

    # calculate accuracy
    _, predicted = torch.max(outputs.data, 1)
    predicted = predicted.cpu()
    labels = labels.cpu()

    predicted = predicted.int() + 1
    labels = labels.int() + 1
    total = total + (labels > 0).sum()
    correct = correct + ((predicted == labels) * (labels > 0)).sum()   
  training_loss = training_loss / float(len(train_loader))
  training_accuracy = str(100.0 * (float(correct) / float(total)))
  loss_train.append(training_loss)

  model.eval()
  valid_loss = 0.0
  total = 0
  correct = 0
  for step, (images, labels) in enumerate(val_loader):
    with torch.no_grad():

      # if cuda
      images = images.to(device)
      labels = labels.to(device)
      labels = labels.long()
      labels = labels[:, 0]

      # get loss
      outputs = model(images)
      loss = criterion(outputs, labels)

      # get predictions
      _, predicted = torch.max(outputs.data, 1)
      predicted = predicted.cpu()
      labels = labels.cpu()

      predicted = predicted.int() + 1
      labels = labels.int() + 1
      total = total + (labels > 0).sum()
      correct = correct + ((predicted == labels) * (labels > 0)).sum() 
    valid_loss = valid_loss + loss.item()
  valid_loss = valid_loss / float(len(val_loader))
  valid_accuracy = str(100.0 * (float(correct) / float(total)))
  loss_valid.append(valid_loss)

  print()
  print("Epoch" + str(epoch) + ":")
  print("Training Loss: " + str(training_loss) + "    Validation Loss: " + str(valid_loss))
  print("Training Accuracy: " + str(training_accuracy) + "    Validation Accuracy: " + str(valid_accuracy))
  print()