In [1]:
import torch
import matplotlib.pyplot as plt
import numpy as np
import torch.optim as optim
import PIL
import cv2
import csv
import os
import glob
import time
import copy


from torch import nn
from torch.utils.data import Dataset, DataLoader
from torchvision import datasets, transforms, models
from torchvision.models.segmentation.deeplabv3 import DeepLabHead
from sklearn.metrics import roc_auc_score, f1_score
from torchsummary import summary
from tqdm.notebook import tqdm

In [2]:
label_map = {}
i = 0
with open('data/labelNames.csv', newline='') as csvfile:
    reader = csv.reader(csvfile, delimiter=',',)
    for row in reader:
        if i > 0 :
            label_map[row[0]] = row[1]
        i += 1
print(len(label_map))

894


In [3]:
class Normalize(object):
    '''Normalize image'''

    def __call__(self, sample):
        image, label = sample['image'], sample['label']
        return {'image': image.type(torch.FloatTensor)/255,
                
                #*********************************************************************
                #This is definetely wrong
                #Label is a 1 channel uint8 tensor labeling all pixels
                #This is just force it be a float tensor
                
                #In the example code, it uses a binary mask
                #The NYU datasets does include binary masks for all classes
                #I'm not sure if I should use a single channel label image or
                #a 3-dimentional HxWx(# of classes) mask as the ground truth
                #*********************************************************************
                'label': label#.type(torch.FloatTensor)/len(label_map)
               }

In [4]:
class SegDataset(Dataset):
    """Segmentation Dataset"""

    def __init__(self, root_dir, imageFolder, labelFolder, transform=None, 
                 seed=None, fraction=None, subset=None, imagecolormode='rgb', labelcolormode='grayscale'):
        """
        Args:
            root_dir (string): Directory with all the images and should have the following structure.
            root
            --Images
            -----image00001
            -----image__N__
            --Label
            -----label00001
            -----label__N__
            imageFolder (string) = 'Images' : Name of the folder which contains the Images.
            labelFolder (string)  = 'Labels : Name of the folder which contains the Labels.
            transform (callable, optional): Optional transform to be applied on a sample.
            seed: Specify a seed for the train and test split
            fraction: A float value from 0 to 1 which specifies the validation split fraction
            subset: 'Train' or 'Test' to select the appropriate set.
            imagecolormode: 'rgb' or 'grayscale'
            labelcolormode: 'rgb' or 'grayscale'
        """
        self.color_dict = {'rgb': 1, 'grayscale': 0}
        assert(imagecolormode in ['rgb', 'grayscale'])
        assert(labelcolormode in ['rgb', 'grayscale'])

        self.imagecolorflag = self.color_dict[imagecolormode]
        self.labelcolorflag = self.color_dict[labelcolormode]
        self.root_dir = root_dir
        self.transform = transform
        if not fraction:
            self.image_names = sorted(
                glob.glob(os.path.join(self.root_dir, imageFolder, '*')))
            self.label_names = sorted(
                glob.glob(os.path.join(self.root_dir, labelFolder, '*')))
        else:
            assert(subset in ['Train', 'Test'])
            self.fraction = fraction
            self.image_list = np.array(
                sorted(glob.glob(os.path.join(self.root_dir, imageFolder, '*'))))
            self.label_list = np.array(
                sorted(glob.glob(os.path.join(self.root_dir, labelFolder, '*'))))
            if seed:
                np.random.seed(seed)
                indices = np.arange(len(self.image_list))
                np.random.shuffle(indices)
                self.image_list = self.image_list[indices]
                self.label_list = self.label_list[indices]
            if subset == 'Train':
                self.image_names = self.image_list[:int(
                    np.ceil(len(self.image_list)*(1-self.fraction)))]
                self.label_names = self.label_list[:int(
                    np.ceil(len(self.label_list)*(1-self.fraction)))]
            else:
                self.image_names = self.image_list[int(
                    np.ceil(len(self.image_list)*(1-self.fraction))):]
                self.label_names = self.label_list[int(
                    np.ceil(len(self.label_list)*(1-self.fraction))):]

    def __len__(self):
        return len(self.image_names)

    def __getitem__(self, idx):
        img_name = self.image_names[idx]
        if self.imagecolorflag:
            image = cv2.imread(
                img_name, self.imagecolorflag).transpose(2, 0, 1)
        else:
            image = cv2.imread(img_name, self.imagecolorflag)
        lbl_name = self.label_names[idx]
        if self.labelcolorflag:
            label = cv2.imread(lbl_name, self.labelcolorflag).transpose(2, 0, 1)
        else:
            label = cv2.imread(lbl_name, self.labelcolorflag)
        sample = {'image': image, 'label': label}

        if self.transform:
            sample = self.transform(sample)

        return sample

In [5]:
class ToTensor(object):
    """Convert ndarrays in sample to Tensors."""

    def __call__(self, sample, labelresize=None, imageresize=None):
        image, label = sample['image'], sample['label']
        if len(label.shape) == 2:
            label = label.reshape((1,)+label.shape)
        if len(image.shape) == 2:
            image = image.reshape((1,)+image.shape)
        return {'image': torch.from_numpy(image),
                'label': torch.from_numpy(label)}

In [6]:
def get_dataloader_sep_folder(data_dir, imageFolder='data/Train/Image', labelFolder='data/Train/Label', batch_size=4):
    """
        Create Train and Test dataloaders from two separate Train and Test folders.
        The directory structure should be as follows.
        data_dir
        --Train
        ------Image
        ---------image00001
        ---------image__N__
        ------Label
        ---------label00001
        ---------label__N__
        --Test
        ------Image
        ---------image00001
        ---------image__N__
        ------Label
        ---------label00001
        ---------label__N__
    """
    data_transforms = {
        'Train': transforms.Compose([ToTensor(), Normalize()]),
        'Test': transforms.Compose([ToTensor(), Normalize()]),
    }

    image_datasets = {x: SegDataset(root_dir=os.path.join(data_dir, x),
                                    transform=data_transforms[x], labelFolder=labelFolder, imageFolder=imageFolder)
                      for x in ['Train', 'Test']}
    dataloaders = {x: DataLoader(image_datasets[x], batch_size=batch_size,
                                 shuffle=True, num_workers=8)
                   for x in ['Train', 'Test']}
    return dataloaders

In [7]:
def im_convert(tensor):
    image = tensor.cpu().clone().detach().numpy()
    image = image.transpose(1, 2, 0)
    image = image * np.array((0.5, 0.5, 0.5)) + np.array((0.5, 0.5, 0.5))
    image = image.clip(0, 1)
    return image

In [8]:
def createDeepLabv3(outputchannels=1):
    model = models.segmentation.deeplabv3_resnet101(
        pretrained=True, progress=True)
    #summary(model.cuda().classifier,(2048, 80, 60))
    
    #*********************************************************************
    #It seems like the example uses it to replace the classification part of model.
    #*********************************************************************
    # Added a Sigmoid activation after the last convolution layer
    model.classifier = DeepLabHead(2048, outputchannels)
    #summary(model.cuda().classifier,(2048, 80, 60))
    # Set the model in training mode
    model.train()
    return model
createDeepLabv3(outputchannels=1)

DeepLabV3(
  (backbone): IntermediateLayerGetter(
    (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU(inplace=True)
    (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (layer1): Sequential(
      (0): Bottleneck(
        (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
        (downsample): Se

In [9]:
def train_model(model, criterion, dataloaders, optimizer, metrics, bpath, num_epochs=3):
    since = time.time()
    best_model_wts = copy.deepcopy(model.state_dict())
    best_loss = 1e10
    # Initialize the log file for training and testing loss and metrics
    fieldnames = ['epoch', 'Train_loss', 'Test_loss'] + \
        [f'Train_{m}' for m in metrics.keys()] + \
        [f'Test_{m}' for m in metrics.keys()]
#     with open(os.path.join(bpath, 'log.csv'), 'w', newline='') as csvfile:
#         writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
#         writer.writeheader()

    for epoch in range(1, num_epochs+1):
        print('Epoch {}/{}'.format(epoch, num_epochs))
        print('-' * 10)
        # Each epoch has a training and validation phase
        # Initialize batch summary
        batchsummary = {a: [0] for a in fieldnames}

        for phase in ['Train', 'Test']:
            if phase == 'Train':
                model.train()  # Set model to training mode
            else:
                model.eval()   # Set model to evaluate mode

            # Iterate over data.
            for sample in tqdm(iter(dataloaders[phase])):
                inputs = sample['image'].to(device)
                labels = sample['label'].to(device)
                # zero the parameter gradients
                optimizer.zero_grad()

                # track history if only in train
                with torch.set_grad_enabled(phase == 'Train'):
                    outputs = model(inputs)
                    
                    #*********************************************************************
                    #outputs is a [-1, 1, 80, 60] float tensor. See the last cell.
                    #But out labels are uint8 tensors.
                    #Should apply some kind of softmax or classification?
                    #Should I transform the label to be a (# of classes)x640x480 tensor?
                    #*********************************************************************
                    
                    loss = criterion(outputs['out'], labels)
#                     y_pred = outputs['out'].data.cpu().numpy().ravel()
#                     y_true = labels.data.cpu().numpy().ravel()
#                     for name, metric in metrics.items():
#                         if name == 'f1_score':
#                             # Use a classification threshold of 0.1
#                             batchsummary[f'{phase}_{name}'].append(
#                                 metric(y_true > 0, y_pred > 0.1))
#                         else:
#                             batchsummary[f'{phase}_{name}'].append(
#                                 metric(y_true.astype('uint8'), y_pred))

                    # backward + optimize only if in training phase
                    if phase == 'Train':
                        loss.backward()
                        optimizer.step()
            batchsummary['epoch'] = epoch
            epoch_loss = loss
            batchsummary[f'{phase}_loss'] = epoch_loss.item()
            print('{} Loss: {:.4f}'.format(
                phase, loss))
        for field in fieldnames[3:]:
            batchsummary[field] = np.mean(batchsummary[field])
        print(batchsummary)
        with open(os.path.join(bpath, 'log.csv'), 'a', newline='') as csvfile:
            writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
            writer.writerow(batchsummary)
            # deep copy the model
            if phase == 'Test' and loss < best_loss:
                best_loss = loss
                best_model_wts = copy.deepcopy(model.state_dict())

    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(
        time_elapsed // 60, time_elapsed % 60))
    print('Lowest Loss: {:4f}'.format(best_loss))

    # load best model weights
    model.load_state_dict(best_model_wts)
    return model

In [None]:
bpath = 'data/Output'
data_dir = 'data'
image_dir = 'Image/1'
label_dir = 'Label/1'
epochs = 3

#Due to GPU memory limitation
batchsize = 2
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Create the deeplabv3 resnet101 model which is pretrained on a subset of COCO train2017, on the 20 categories that are present in the Pascal VOC dataset.
model = createDeepLabv3().to(device)
# summary(model.backbone, (3, 640, 480))
# Create the experiment directory if not present
if not os.path.isdir(bpath):
    os.mkdir(bpath)

# Specify the loss function

#**********************************************************************************
#Loss function was MSELoss, I changed to cross entropy for classification problems.
#**********************************************************************************
criterion = torch.nn.CrossEntropyLoss()
# Specify the optimizer with a lower learning rate
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)
#**********************************************************************************
#I understand metrics are for tuning hyperparameters but I don't really know how to use it.
#**********************************************************************************
metrics = {'auroc': roc_auc_score} #'f1_score': f1_score, 
dataloaders = get_dataloader_sep_folder(data_dir, imageFolder=image_dir, labelFolder=label_dir, batch_size=batchsize)
trained_model = train_model(model, criterion, dataloaders,
                            optimizer, bpath=bpath, metrics=metrics, num_epochs=epochs)

In [None]:
#Saving/loading the trained models

# savepath='data/Output/checkpoint.pth'
# device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# #torch.save(model.state_dict(), savepath)
# model = createDeepLabv3().to(device)
# model.load_state_dict(torch.load(savepath))
# #model.eval()

In [12]:
#**********************************************************************************
#Backbone, which should be the extracting feature portion of deeplabv3, and outputs
#a [-1, 2048, 80, 60] tensor.

#Newly trained classifier takes 2048 cahnnels and out puts 256 channels of 80x60 tensor
#How should I upsample it back to 640x480 in order to compare with the labels?
#The new classifier used is called Deeplabhead, defined in the following url:
#https://github.com/pytorch/vision/blob/master/torchvision/models/segmentation/deeplabv3.py
#It seems like the example uses it to replace the classification part of model.
#**********************************************************************************
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = createDeepLabv3().to(device)
summary(model.backbone, (3, 640, 480))
summary(model.classifier, (2048, 80, 60))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 64, 320, 240]           9,408
       BatchNorm2d-2         [-1, 64, 320, 240]             128
              ReLU-3         [-1, 64, 320, 240]               0
         MaxPool2d-4         [-1, 64, 160, 120]               0
            Conv2d-5         [-1, 64, 160, 120]           4,096
       BatchNorm2d-6         [-1, 64, 160, 120]             128
              ReLU-7         [-1, 64, 160, 120]               0
            Conv2d-8         [-1, 64, 160, 120]          36,864
       BatchNorm2d-9         [-1, 64, 160, 120]             128
             ReLU-10         [-1, 64, 160, 120]               0
           Conv2d-11        [-1, 256, 160, 120]          16,384
      BatchNorm2d-12        [-1, 256, 160, 120]             512
           Conv2d-13        [-1, 256, 160, 120]          16,384
      BatchNorm2d-14        [-1, 256, 1

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1          [-1, 256, 80, 60]         524,288
       BatchNorm2d-2          [-1, 256, 80, 60]             512
              ReLU-3          [-1, 256, 80, 60]               0
            Conv2d-4          [-1, 256, 80, 60]       4,718,592
       BatchNorm2d-5          [-1, 256, 80, 60]             512
              ReLU-6          [-1, 256, 80, 60]               0
            Conv2d-7          [-1, 256, 80, 60]       4,718,592
       BatchNorm2d-8          [-1, 256, 80, 60]             512
              ReLU-9          [-1, 256, 80, 60]               0
           Conv2d-10          [-1, 256, 80, 60]       4,718,592
      BatchNorm2d-11          [-1, 256, 80, 60]             512
             ReLU-12          [-1, 256, 80, 60]               0
AdaptiveAvgPool2d-13           [-1, 2048, 1, 1]               0
           Conv2d-14            [-1, 25