In [1]:
import glob
import os
import numpy as np
import seaborn as sns
from tqdm import tqdm
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix, average_precision_score

from PIL import Image
import torch
from torch import nn
from torch.utils import data
from torchvision.transforms import ToTensor
from torch.autograd import Variable
import torch.nn.functional as F

In [2]:
import os
os.chdir("D:/CS543 CV/mp4")
os. getcwd() 
DATASET_PATH = 'data/sbd/'

In [3]:
DATASET_PATH = 'data/sbd/'

class SegmentationDataset(data.Dataset):
    """
    Data loader for the Segmentation Dataset. If data loading is a bottleneck, 
    you may want to optimize this in for faster training. Possibilities include
    pre-loading all images and annotations into memory before training, so as 
    to limit delays due to disk reads.
    """
    def __init__(self, split="train", data_dir=DATASET_PATH):
        assert(split in ["train", "val", "test"])
        self.img_dir = os.path.join(data_dir, split)
        self.classes = []
        with open(os.path.join(data_dir, 'classes.txt'), 'r') as f:
          for l in f:
            self.classes.append(l.rstrip())
        self.n_classes = len(self.classes)
        self.split = split
        self.data = glob.glob(self.img_dir + '/*.jpg') 
        self.data = [os.path.splitext(l)[0] for l in self.data]

    def __len__(self):
        return len(self.data)

    def __getitem__(self, index):
        img = Image.open(self.data[index] + '.jpg')
        gt = Image.open(self.data[index] + '.png')
        
        img = ToTensor()(img)
        gt = torch.LongTensor(np.asarray(gt)).unsqueeze(0)
        return img, gt


## Unet: Self Defined
##### reference:Olaf Ronneberger, Philipp Fischer, and Thomas Brox. U-net: Convolutional networks for biomedical image segmentation. In International Conference on Medical image computing and computer-assisted intervention, pages 234–241. Springer, 2015.

In [4]:
##########
#TODO: design your own network here. The expectation is to write from scratch. But it's okay to get some inspiration 
#from conference paper. The bottom line is that you will not just copy code from other repo
##########

class MyModel(nn.Module):
    def __init__(self, n_channels, n_classes, bilinear=False):
        super(MyModel, self).__init__()
        self.n_channels = n_channels
        self.n_classes = n_classes
        self.bilinear = bilinear

        self.inc = DoubleConv(n_channels, 64)
        self.down1 = Down(64, 128)
        self.down2 = Down(128, 256)
        self.down3 = Down(256, 512)
        factor = 2 if bilinear else 1
        self.down4 = Down(512, 1024 // factor)
        self.up1 = Up(1024, 512, bilinear)
        self.up2 = Up(512, 256, bilinear)
        self.up3 = Up(256, 128, bilinear)
        self.up4 = Up(128, 64 * factor, bilinear)
        self.outc = OutConv(64, n_classes)

    def forward(self, x):
        x1 = self.inc(x)
        x2 = self.down1(x1)
        x3 = self.down2(x2)
        x4 = self.down3(x3)
        x5 = self.down4(x4)
        x = self.up1(x5, x4)
        x = self.up2(x, x3)
        x = self.up3(x, x2)
        x = self.up4(x, x1)
        logits = self.outc(x)
        return logits
    
    
class DoubleConv(nn.Module):

    def __init__(self, in_channels, out_channels, mid_channels=None):
        super().__init__()
        if not mid_channels:
            mid_channels = out_channels
        self.double_conv = nn.Sequential(
            nn.Conv2d(in_channels, mid_channels, kernel_size=3, padding=1),
            nn.BatchNorm2d(mid_channels),
            nn.ReLU(inplace=True),
            nn.Conv2d(mid_channels, out_channels, kernel_size=3, padding=1),
            nn.BatchNorm2d(out_channels),
            nn.ReLU(inplace=True)
        )

    def forward(self, x):
        return self.double_conv(x)


class Down(nn.Module):

    def __init__(self, in_channels, out_channels):
        super().__init__()
        self.maxpool_conv = nn.Sequential(
            nn.MaxPool2d(2),
            DoubleConv(in_channels, out_channels)
        )

    def forward(self, x):
        return self.maxpool_conv(x)


class Up(nn.Module):

    def __init__(self, in_channels, out_channels, bilinear=False):
        super().__init__()
        if bilinear:
            self.up = nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True)
            self.conv = DoubleConv(in_channels, out_channels // 2, in_channels // 2)
        else:
            self.up = nn.ConvTranspose2d(in_channels , in_channels // 2, kernel_size=2, stride=2)
            self.conv = DoubleConv(in_channels, out_channels)


    def forward(self, x1, x2):
        x1 = self.up(x1)
        # input is CHW
        diffY = torch.tensor([x2.size()[2] - x1.size()[2]])
        diffX = torch.tensor([x2.size()[3] - x1.size()[3]])

        x1 = F.pad(x1, [diffX // 2, diffX - diffX // 2,
                        diffY // 2, diffY - diffY // 2])
        x = torch.cat([x2, x1], dim=1)
        return self.conv(x)


class OutConv(nn.Module):
    def __init__(self, in_channels, out_channels):
        super(OutConv, self).__init__()
        self.conv = nn.Conv2d(in_channels, out_channels, kernel_size=1)

    def forward(self, x):
        return self.conv(x)

In [5]:
def segmentation_eval(gts, preds, classes, plot_file_name):
    """
    @param    gts               numpy.ndarray   ground truth labels
    @param    preds             numpy.ndarray   predicted labels
    @param    classes           string          class names
    @param    plot_file_name    string          plot file names
    """
    ious, counts = compute_confusion_matrix(gts, preds)
    aps = compute_ap(gts, preds)
    plot_results(counts, ious, aps, classes, plot_file_name)
    for i in range(len(classes)):
        print('{:>20s}: AP: {:0.2f}, IoU: {:0.2f}'.format(classes[i], aps[i], ious[i]))
    print('{:>20s}: AP: {:0.2f}, IoU: {:0.2f}'.format('mean', np.mean(aps), np.mean(ious)))
    return aps, ious

def plot_results(counts, ious, aps, classes, file_name):
    fig, ax = plt.subplots(1,1)
    conf = counts / np.sum(counts, 1, keepdims=True)
    conf = np.concatenate([conf, np.array(aps).reshape(-1,1), 
                           np.array(ious).reshape(-1,1)], 1)
    conf = conf * 100.
    sns.heatmap(conf, annot=True, ax=ax, fmt='3.0f') 
    arts = [] 
    # labels, title and ticks
    _ = ax.set_xlabel('Predicted labels')
    arts.append(_)
    _ = ax.set_ylabel('True labels')
    arts.append(_)
    _ = ax.set_title('Confusion Matrix, mAP: {:5.1f}, mIoU: {:5.1f}'.format(
      np.mean(aps)*100., np.mean(ious)*100.))
    arts.append(_)
    _ = ax.xaxis.set_ticklabels(classes + ['AP', 'IoU'], rotation=90)
    arts.append(_)
    _ = ax.yaxis.set_ticklabels(classes, rotation=0)
    arts.append(_)
    fig.savefig(file_name, bbox_inches='tight')

def compute_ap(gts, preds):
    aps = []
    for i in range(preds.shape[1]):
      ap, prec, rec = calc_pr(gts == i, preds[:,i:i+1,:,:])
      aps.append(ap)
    return aps

def calc_pr(gt, out, wt=None):
    gt = gt.astype(np.float64).reshape((-1,1))
    out = out.astype(np.float64).reshape((-1,1))

    tog = np.concatenate([gt, out], axis=1)*1.
    ind = np.argsort(tog[:,1], axis=0)[::-1]
    tog = tog[ind,:]
    cumsumsortgt = np.cumsum(tog[:,0])
    cumsumsortwt = np.cumsum(tog[:,0]-tog[:,0]+1)
    prec = cumsumsortgt / cumsumsortwt
    rec = cumsumsortgt / np.sum(tog[:,0])
    ap = voc_ap(rec, prec)
    return ap, rec, prec

def voc_ap(rec, prec):
    rec = rec.reshape((-1,1))
    prec = prec.reshape((-1,1))
    z = np.zeros((1,1)) 
    o = np.ones((1,1))
    mrec = np.vstack((z, rec, o))
    mpre = np.vstack((z, prec, z))

    mpre = np.maximum.accumulate(mpre[::-1])[::-1]
    I = np.where(mrec[1:] != mrec[0:-1])[0]+1;
    ap = np.sum((mrec[I] - mrec[I-1])*mpre[I])
    return ap

def compute_confusion_matrix(gts, preds):
    preds_cls = np.argmax(preds, 1)
    gts = gts[:,0,:,:]
    conf = confusion_matrix(gts.ravel(), preds_cls.ravel())
    inter = np.diag(conf)
    union = np.sum(conf, 0) + np.sum(conf, 1) - np.diag(conf)
    union = np.maximum(union, 1)
    return inter / union, conf

In [6]:
# Colab has GPUs, you will have to move tensors and models to GPU.
device = torch.device("cuda:0")

TOTAL_CLASSES=9
model = MyModel(3,TOTAL_CLASSES).to(device) 
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.RMSprop(model.parameters(), lr=0.001, weight_decay=1e-8, momentum=0.9)

train_loss_over_epochs = []
val_accuracy_over_epochs = []

EPOCHS = 50
train_dataset = SegmentationDataset(split='train')
trainloader = data.DataLoader(train_dataset, batch_size=1,  shuffle=True, num_workers=0, drop_last=True)
IS_GPU = True

val_dataset = SegmentationDataset(split='val')
valloader = data.DataLoader(val_dataset, batch_size=1,  shuffle=True, num_workers=0, drop_last=True)




In [7]:
def calculate_accuracy(dataloader, is_gpu,model):
    """ Util function to calculate val set accuracy,
    both overall and per class accuracy
    Args:
        dataloader (torch.utils.data.DataLoader): val set 
        is_gpu (bool): whether to run on GPU
    Returns:
        tuple: (overall accuracy, class level accuracy)
    """    
    correct = 0.
    total = 0.

    for data in dataloader:
        images, labels = data
        if is_gpu:
            images = images.cuda()
            labels = labels.cuda()
        outputs = model(Variable(images))
        a, predicted = torch.max(outputs.data, 1)
        c = (predicted == labels).squeeze()

        right=torch.sum(c)
        total += (labels.size(2)*labels.size(3))
        correct += right

    return 100*correct/total

In [8]:
# This is a trivial semantic segmentor. For eqch pixel location it computes the 
# distribution of the class label in the training set and uses that as the 
# prediction. Quite unsuprisingly it doesn't perform very well. Though we provide
# this code so that you can understand the data formats for the benchmarking 
# functions.
def simple_train():
    train_dataset = SegmentationDataset(split='train')
    train_dataloader = data.DataLoader(train_dataset, batch_size=1, 
                                       shuffle=True, num_workers=0, 
                                       drop_last=True)
    counts = np.zeros((train_dataset.n_classes, 224, 288))
    N = 0
    for i, batch in enumerate(tqdm(train_dataloader)):
      img, gt = batch
      gt = gt.cpu().numpy()
      for j in range(train_dataset.n_classes):
          counts[j,:,:] += gt[0,0,:,:] == j
      N += 1
    model = counts / N
    return model

def simple_predict(split, model):
    dataset = SegmentationDataset(split=split, data_dir=DATASET_PATH)
    dataloader = data.DataLoader(dataset, batch_size=1, shuffle=False, 
                                 num_workers=0, drop_last=False)
    gts, preds = [], []
    for i, batch in enumerate(tqdm(dataloader)):
      img, gt = batch
      gt = gt.cpu().numpy()
      gts.append(gt[0,:,:,:])
      preds.append(model)
                              
    gts = np.array(gts)
    preds = np.array(preds)
    return gts, preds, list(dataset.classes)


def predict(split, model):
    dataset = SegmentationDataset(split=split, data_dir=DATASET_PATH)
    dataloader = data.DataLoader(dataset, batch_size=1, shuffle=False, 
                                 num_workers=0, drop_last=False)
    gts, preds = [], []
    for i, batch in enumerate(tqdm(dataloader)):
      img, gt = batch
       
      gt = gt.cpu().numpy()
    
      gts.append(gt[0,:,:,:])
      outputs= model(img.to(device))
#       print(outputs.shape)
      sf=nn.Softmax(dim=1)
      outputs= sf(outputs)

    
      outputs=outputs.detach().cpu().numpy()[0,:,:,:]
#       print(outputs.shape)
#       pl=np.argmax(outputs, axis=1)
    
      preds.append(outputs)
                              
    gts = np.array(gts)
    preds = np.array(preds)
    return gts, preds, list(dataset.classes)

In [9]:
########################################################################
# TODO: Implement your training cycles, make sure you evaluate on validation 
# dataset and compute evaluation metrics every so often. 
# You may also want to save models that perform well.
for epoch in tqdm(range(EPOCHS), total=EPOCHS):  # loop over the dataset multiple times

    running_loss = 0.0
    for i, Data in enumerate(trainloader, 0):
        # get the inputs
        inputs, labels = Data
        print(labels.shape)
        if IS_GPU:
            inputs = inputs.cuda()
            labels = labels.cuda()
            
            
        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = model(inputs)

        outputs=outputs.permute(0,2,3,1)
        
        loss = criterion(outputs.view(-1,9), labels.view(-1))


        loss.backward()
        optimizer.step()
#         break
        # print statistics
        running_loss += loss.item()
    
    # Normalizing the loss by the total number of train batches
    running_loss/=len(trainloader)
    print('[%d] loss: %.3f' %
          (epoch + 1, running_loss))

    # Scale of 0.0 to 100.0
    # Calculate validation set accuracy of the existing model
    val_accuracy = \
        calculate_accuracy(valloader, IS_GPU,model)
    print('Accuracy of the network on the val images: %d %%' % (val_accuracy))

    # Optionally print classwise accuracies
#     for c_i in range(TOTAL_CLASSES):
#         print('Accuracy of %5s : %2d %%' % (
#             classes[c_i], 100 * val_classwise_accuracy[c_i]))

    train_loss_over_epochs.append(running_loss)
#     val_accuracy_over_epochs.append(val_accuracy)
# -----------------------------



  0%|                                                                                           | 0/50 [00:00<?, ?it/s]

torch.Size([1, 1, 224, 288])
torch.Size([1, 1, 224, 288])
torch.Size([1, 1, 224, 288])
torch.Size([1, 1, 224, 288])
torch.Size([1, 1, 224, 288])
torch.Size([1, 1, 224, 288])
torch.Size([1, 1, 224, 288])
torch.Size([1, 1, 224, 288])
torch.Size([1, 1, 224, 288])
torch.Size([1, 1, 224, 288])
torch.Size([1, 1, 224, 288])
torch.Size([1, 1, 224, 288])
torch.Size([1, 1, 224, 288])
torch.Size([1, 1, 224, 288])
torch.Size([1, 1, 224, 288])
torch.Size([1, 1, 224, 288])
torch.Size([1, 1, 224, 288])
torch.Size([1, 1, 224, 288])
torch.Size([1, 1, 224, 288])
torch.Size([1, 1, 224, 288])
torch.Size([1, 1, 224, 288])
torch.Size([1, 1, 224, 288])
torch.Size([1, 1, 224, 288])
torch.Size([1, 1, 224, 288])
torch.Size([1, 1, 224, 288])
torch.Size([1, 1, 224, 288])
torch.Size([1, 1, 224, 288])
torch.Size([1, 1, 224, 288])
torch.Size([1, 1, 224, 288])
torch.Size([1, 1, 224, 288])
torch.Size([1, 1, 224, 288])
torch.Size([1, 1, 224, 288])
torch.Size([1, 1, 224, 288])
torch.Size([1, 1, 224, 288])
torch.Size([1,




KeyboardInterrupt: 

In [None]:
########################################################################
# TODO: Evaluate your result, and report Mean average precision on test dataset 
# using provided helper function. Here we show how we can train and evaluate the 
# simple model that we provided on the validation set. You will want to report
# performance on the validation set for the variants you tried, and the 
# performance of the final model on the test set.
# model = simple_train()
# gts, preds, classes = simple_predict('val', model)
gts, preds, classes = predict('test', model)
aps, ious = segmentation_eval(gts, preds, classes, 'cs543-simple-unet.pdf')

### Unet based on Pre-Trained Resnet 18

In [None]:
import torchvision.models as models
resnet18 = models.resnet18(pretrained=True)
print(resnet18)

In [None]:
class decoder_Res18(nn.Module):
    def __init__(self, inC,MiddleC,outC):
        super(decoder_Res18,self).__init__()
        
        self.sequence=nn.Sequential(
            nn.Conv2d(inC, MiddleC,3, padding=1),
            nn.ReLU(inplace=True),
            nn.BatchNorm2d(MiddleC),
            nn.ConvTranspose2d(MiddleC,outC,kernel_size=4,stride=2,padding=1),
            nn.ReLU(inplace=True),
            nn.BatchNorm2d(outC)
        )
    def forward(self,x):
        return self.sequence(x)
    


class Unet_Res18(nn.Module):
    def __init__(self,number_class=9, pretrained=True, Dropout=0.3):
    
        super(Unet_Res18,self).__init__()
        if pretrained:
            self.encoder= models.resnet18(pretrained=pretrained)
        else:
            print('please defined your own model here')

        # fix the parameters in resnet18
        for param in self.encoder.parameters():
            param.requires_grad = False


        self.number_class=number_class
        self.Dropout=Dropout
        self.relu= nn.ReLU(inplace= True)
        self.pool=nn.MaxPool2d(2,2)
        self.dropout_2d = nn.Dropout2d(p=Dropout)
        self.conv1=nn.Sequential(self.encoder.conv1,
                                 self.encoder.bn1,
                                 self.encoder.relu,
                                 self.pool
                                )
        self.conv2=self.encoder.layer1
        self.conv3=self.encoder.layer2
        self.conv4=self.encoder.layer3
        self.conv5=self.encoder.layer4
        
        self.decov=decoder_Res18(512, 512, 256)
        self.decov5=decoder_Res18(512+256, 512, 256)
        self.decov4=decoder_Res18(256+256, 512, 256)
        self.decov3=decoder_Res18(128+256, 256, 64)
        self.decov2=decoder_Res18(64+64, 128, 128)
        self.decov1=decoder_Res18(128, 128, 32)
        self.decov0=nn.Sequential(
            nn.Conv2d(32,self.number_class,kernel_size=1)
        )
            
        
    def Pad_Same(self, x1, x2):
        diffY = torch.tensor([x2.size()[2] - x1.size()[2]])
        diffX = torch.tensor([x2.size()[3] - x1.size()[3]])

        x1 = F.pad(x1, [diffX // 2, diffX - diffX // 2,
                        diffY // 2, diffY - diffY // 2])
        x = torch.cat([x2, x1], dim=1)  
        return x
        
    def forward(self,x):

        conv1 = self.conv1(x)
        conv2 = self.dropout_2d(self.conv2(conv1))
        conv3 = self.dropout_2d(self.conv3(conv2))
        conv4 = self.dropout_2d(self.conv4(conv3))
        conv5 = self.dropout_2d(self.conv5(conv4))

        mid = self.decov(self.pool(conv5))

        decov5= self.decov5( self.Pad_Same(mid,conv5))
        decov4= self.decov4( self.Pad_Same(decov5, conv4))
        decov3= self.decov3( self.Pad_Same(decov4, conv3))
        decov2= self.decov2( self.Pad_Same(decov3, conv2))
        decov1= self.decov1( self.dropout_2d(decov2))
        
        output= self.decov0( decov1)
        
        return output
        

In [None]:
# Colab has GPUs, you will have to move tensors and models to GPU.
device = torch.device("cuda:0")

TOTAL_CLASSES=9
model2 = Unet_Res18(TOTAL_CLASSES).to(device) 
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.RMSprop(model2.parameters(), lr=0.001, weight_decay=1e-8, momentum=0.9)
# optimizer = torch.optim.Adam(model2 .parameters(), lr=0.001, weight_decay=5e-4)
train_loss_over_epochs = []
val_accuracy_over_epochs = []

EPOCHS = 35
train_dataset = SegmentationDataset(split='train')
trainloader = data.DataLoader(train_dataset, batch_size=1,  shuffle=True, num_workers=0, drop_last=True)
IS_GPU = True

val_dataset = SegmentationDataset(split='val')
valloader = data.DataLoader(val_dataset, batch_size=1,  shuffle=True, num_workers=0, drop_last=True)

for epoch in tqdm(range(EPOCHS), total=EPOCHS):  # loop over the dataset multiple times

    running_loss = 0.0
    for i, Data in enumerate(trainloader, 0):
        # get the inputs
        inputs, labels = Data
        if IS_GPU:
            inputs = inputs.cuda()
            labels = labels.cuda()
            

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = model2(inputs)


        outputs=outputs.permute(0,2,3,1)
        
        loss = criterion(outputs.view(-1,9), labels.view(-1))


        loss.backward()
        optimizer.step()
        running_loss += loss.item()
    
    # Normalizing the loss by the total number of train batches
    running_loss/=len(trainloader)
    print('[%d] loss: %.3f' %
          (epoch + 1, running_loss))

    # Calculate validation set accuracy of the existing model
    val_accuracy = \
        calculate_accuracy(valloader, IS_GPU, model2)
    print('Accuracy of the network on the val images: %d %%' % (val_accuracy))



    train_loss_over_epochs.append(running_loss)





In [None]:
gts, preds, classes = predict('test', model2)
aps, ious = segmentation_eval(gts, preds, classes, 'cs543-Resnet-unet-33.pdf')

In [None]:
model

In [None]:
model2