# Data Loader

In [3]:
from imutils import paths
from pathlib import Path 
from matplotlib import pyplot as plt
import numpy as np
import argparse
import cv2
import os


from torch.utils.data import Dataset, DataLoader
from torchvision.transforms import Resize
import glob
import os
import numpy as np
import cv2
import torch
from torchvision import transforms, utils
from PIL import Image

In [4]:
class SegDataset(Dataset):
    """Segmentation Dataset"""

    def __init__(
        self, 
        root_dir, 
        imageFolder, 
        maskFolder, 
        transform=None, 
        seed=None, 
        fraction=None, 
        subset=None, 
        imagecolormode='rgb', 
        maskcolormode='rgb'
    ):
        """
        Args:
            root_dir (string): Directory with all the images and should have the following structure.
            root
            --Images
            -----Img 1
            -----Img N
            --Mask
            -----Mask 1
            -----Mask N
            imageFolder (string) = 'Images' : Name of the folder which contains the Images.
            maskFolder (string)  = 'Masks : Name of the folder which contains the Masks.
            transform (callable, optional): Optional transform to be applied on a sample.
            seed: Specify a seed for the train and test split
            fraction: A float value from 0 to 1 which specifies the validation split fraction
            subset: 'Train' or 'Test' to select the appropriate set.
            imagecolormode: 'rgb' or 'grayscale'
            maskcolormode: 'rgb' or 'grayscale'
        """
        self.color_dict = {'rgb': 1, 'grayscale': 0}
        assert(imagecolormode in ['rgb', 'grayscale'])
        assert(maskcolormode in ['rgb', 'grayscale'])

        self.imagecolorflag = self.color_dict[imagecolormode]
        self.maskcolorflag = self.color_dict[maskcolormode]
        self.root_dir = root_dir
        self.transform = transform
        if not fraction:
            self.image_names = sorted(
                glob.glob(os.path.join(self.root_dir, imageFolder, '*')))
            self.mask_names = sorted(
                glob.glob(os.path.join(self.root_dir, maskFolder, '*')))
        else:
            assert(subset in ['Train', 'Test'])
            self.fraction = fraction
            self.image_list = np.array(
                sorted(glob.glob(os.path.join(self.root_dir, imageFolder, '*'))))
            self.mask_list = np.array(
                sorted(glob.glob(os.path.join(self.root_dir, maskFolder, '*'))))
            if seed:
                np.random.seed(seed)
                indices = np.arange(len(self.image_list))
                np.random.shuffle(indices)
                self.image_list = self.image_list[indices]
                self.mask_list = self.mask_list[indices]
            if subset == 'Train':
                self.image_names = self.image_list[:int(
                    np.ceil(len(self.image_list)*(1-self.fraction)))]
                self.mask_names = self.mask_list[:int(
                    np.ceil(len(self.mask_list)*(1-self.fraction)))]
            else:
                self.image_names = self.image_list[int(
                    np.ceil(len(self.image_list)*(1-self.fraction))):]
                self.mask_names = self.mask_list[int(
                    np.ceil(len(self.mask_list)*(1-self.fraction))):]

    def __len__(self):
        return len(self.image_names)

    def __getitem__(self, idx):
        img_name = self.image_names[idx]
        if self.imagecolorflag:
            image = cv2.imread(
                img_name, self.imagecolorflag)
        else:
            image = cv2.imread(img_name, self.imagecolorflag)
        
        msk_name = self.mask_names[idx]
        if self.maskcolorflag:
            mask = cv2.imread(msk_name, self.maskcolorflag)
        else:
            mask = cv2.imread(msk_name, self.maskcolorflag)
        

        #image = Image.open(img_name)
        #mask = Image.open()
     
        sample = {'image': image, 'mask': mask}

        if self.transform:
            image = self.transform(image)
            mask = self.transform(mask)

        return {'image': image, 'mask': mask}


class ToTensor(object):
    """Convert ndarrays in sample to Tensors."""

    def __call__(self, sample, maskresize=None, imageresize=None):
        image, mask = sample['image'], sample['mask']
        if len(mask.shape) == 2:
            mask = mask.reshape((1,)+mask.shape)
        if len(image.shape) == 2:
            image = image.reshape((1,)+image.shape)
        return {'image': torch.from_numpy(image),
                'mask': torch.from_numpy(mask)}


class Normalize(object):
    """Normalize image"""

    def __call__(self, sample):
        image, mask = sample['image'], sample['mask']
        return {'image': image.type(torch.FloatTensor)/255,
                'mask': mask.type(torch.FloatTensor)/255}


def get_dataloader_single_folder(data_dir, imageFolder='Images', maskFolder='Masks', fraction=0.2, batch_size=4):
    """Create training and testing dataloaders from a single folder."""
    data_transforms = {
        'Train': transforms.Compose([
            transforms.ToPILImage(), 
            Resize((256, 256)), 
            transforms.ToTensor(), 
            transforms.Normalize(0,1)
        ]),
        'Test': transforms.Compose([
            transforms.ToPILImage(), 
            Resize((256, 256)), 
            transforms.ToTensor(), 
            transforms.Normalize(0,1)
        ]),
    }

    image_datasets = {
        x: SegDataset(
            data_dir, 
            imageFolder=imageFolder, 
            maskFolder=maskFolder, 
            seed=100, 
            fraction=fraction, 
            subset=x, 
            transform=data_transforms[x]
        ) for x in ['Train', 'Test']
    }
    dataloaders = {x: DataLoader(image_datasets[x], batch_size=batch_size,
                                 shuffle=True, num_workers=8)
                   for x in ['Train', 'Test']}
    return dataloaders

In [3]:
dataloaders = get_dataloader_single_folder(
    data_dir="../data/finetune", 
    imageFolder='images', 
    maskFolder='masks', 
    fraction=0.2, 
    batch_size=4,
)

# Model Building

In [4]:
from torchvision import models
from torchvision.models.segmentation.deeplabv3 import DeepLabHead

In [5]:
def createDeepLabv3(outputchannels=1):
    model = models.segmentation.deeplabv3_resnet101(
        pretrained=True, progress=True)
    # Added a Tanh activation after the last convolution layer
    model.classifier = DeepLabHead(2048, outputchannels)
    # Set the model in training mode
    model.train()
    return model


model = createDeepLabv3(3)



# Finetune

In [6]:
import csv
import copy
import time
from tqdm import tqdm
import torch
import numpy as np
import os

from sklearn.metrics import f1_score, roc_auc_score


In [7]:
def train_model(model, criterion, dataloaders, optimizer, metrics, bpath, num_epochs=3):
    since = time.time()
    best_model_wts = copy.deepcopy(model.state_dict())
    best_loss = 1e10
    # Use gpu if available
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    model.to(device)
    # Initialize the log file for training and testing loss and metrics
    fieldnames = ['epoch', 'Train_loss', 'Test_loss'] + \
        [f'Train_{m}' for m in metrics.keys()] + \
        [f'Test_{m}' for m in metrics.keys()]
    with open(os.path.join(bpath, 'log.csv'), 'w', newline='') as csvfile:
        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
        writer.writeheader()

    for epoch in range(1, num_epochs+1):
        print('Epoch {}/{}'.format(epoch, num_epochs))
        print('-' * 10)
        # Each epoch has a training and validation phase
        # Initialize batch summary
        batchsummary = {a: [0] for a in fieldnames}

        for phase in ['Train', 'Test']:
            if phase == 'Train':
                model.train()  # Set model to training mode
            else:
                model.eval()   # Set model to evaluate mode

            # Iterate over data.
            for sample in tqdm(iter(dataloaders[phase])):
                inputs = sample['image'].to(device)
                masks = sample['mask'].to(device)
                # zero the parameter gradients
                optimizer.zero_grad()

                # track history if only in train
                with torch.set_grad_enabled(phase == 'Train'):
                    outputs = model(inputs)
                    loss = criterion(outputs['out'], masks)
                    y_pred = outputs['out'].data.cpu().numpy().ravel()
                    y_true = masks.data.cpu().numpy().ravel()
                    for name, metric in metrics.items():
                        if name == 'f1_score':
                            # Use a classification threshold of 0.1
                            batchsummary[f'{phase}_{name}'].append(
                                metric(y_true > 0, y_pred > 0.1))
                        else:
                            batchsummary[f'{phase}_{name}'].append(
                                metric(y_true.astype('uint8'), y_pred))

                    # backward + optimize only if in training phase
                    if phase == 'Train':
                        loss.backward()
                        optimizer.step()
            batchsummary['epoch'] = epoch
            epoch_loss = loss
            batchsummary[f'{phase}_loss'] = epoch_loss.item()
            print('{} Loss: {:.4f}'.format(
                phase, loss))
        for field in fieldnames[3:]:
            batchsummary[field] = np.mean(batchsummary[field])
        print(batchsummary)
        with open(os.path.join(bpath, 'log.csv'), 'a', newline='') as csvfile:
            writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
            writer.writerow(batchsummary)
            # deep copy the model
            if phase == 'Test' and loss < best_loss:
                best_loss = loss
                best_model_wts = copy.deepcopy(model.state_dict())

    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(
        time_elapsed // 60, time_elapsed % 60))
    print('Lowest Loss: {:4f}'.format(best_loss))

    # load best model weights
    model.load_state_dict(best_model_wts)
    return model

In [8]:
epochs = 25
bpath = "../exp/"

# Specify the loss function
criterion = torch.nn.MSELoss(reduction='mean')
# Specify the optimizer with a lower learning rate
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)

# Specify the evalutation metrics
metrics = {'f1_score': f1_score, 'auroc': roc_auc_score}

trained_model = train_model(model, criterion, dataloaders,
                            optimizer, bpath=bpath, metrics=metrics, num_epochs=epochs)

torch.save(trained_model, os.path.join(bpath, 'weights.pt'))

Epoch 1/25
----------


100%|██████████| 653/653 [05:01<00:00,  2.17it/s]

Train Loss: 0.0636



100%|██████████| 163/163 [00:57<00:00,  2.86it/s]

Test Loss: 0.0449
{'epoch': 1, 'Train_loss': 0.06356216967105865, 'Test_loss': 0.04493154212832451, 'Train_f1_score': 0.7226524072023118, 'Train_auroc': 0.9257491828273825, 'Test_f1_score': 0.8184593609955575, 'Test_auroc': 0.9484445226694365}
Epoch 2/25
----------



100%|██████████| 653/653 [04:53<00:00,  2.22it/s]

Train Loss: 0.0187



100%|██████████| 163/163 [00:57<00:00,  2.84it/s]

Test Loss: 0.0161
{'epoch': 2, 'Train_loss': 0.018674857914447784, 'Test_loss': 0.01612965390086174, 'Train_f1_score': 0.7908194230599062, 'Train_auroc': 0.9562250328313125, 'Test_f1_score': 0.8209721159161227, 'Test_auroc': 0.9526583523284339}
Epoch 3/25
----------



100%|██████████| 653/653 [04:52<00:00,  2.23it/s]

Train Loss: 0.0369



100%|██████████| 163/163 [00:57<00:00,  2.84it/s]

Test Loss: 0.0973
{'epoch': 3, 'Train_loss': 0.036896757781505585, 'Test_loss': 0.09726639091968536, 'Train_f1_score': 0.820811037790332, 'Train_auroc': 0.9658890457649096, 'Test_f1_score': 0.8067105252059175, 'Test_auroc': 0.9536594820598772}
Epoch 4/25
----------



100%|██████████| 653/653 [04:52<00:00,  2.23it/s]

Train Loss: 0.0441



100%|██████████| 163/163 [00:57<00:00,  2.85it/s]

Test Loss: 0.0439
{'epoch': 4, 'Train_loss': 0.044059135019779205, 'Test_loss': 0.043930232524871826, 'Train_f1_score': 0.8345720498227323, 'Train_auroc': 0.9678078974524844, 'Test_f1_score': 0.8352400630482794, 'Test_auroc': 0.9538793580500065}
Epoch 5/25
----------



100%|██████████| 653/653 [04:52<00:00,  2.23it/s]

Train Loss: 0.0438



100%|██████████| 163/163 [00:57<00:00,  2.82it/s]

Test Loss: 0.0234
{'epoch': 5, 'Train_loss': 0.043825116008520126, 'Test_loss': 0.02343893051147461, 'Train_f1_score': 0.8519529020545927, 'Train_auroc': 0.9724638031054111, 'Test_f1_score': 0.8061088489591394, 'Test_auroc': 0.9589762802995786}
Epoch 6/25
----------



100%|██████████| 653/653 [04:58<00:00,  2.19it/s]

Train Loss: 0.0226



100%|██████████| 163/163 [00:58<00:00,  2.81it/s]

Test Loss: 0.0287
{'epoch': 6, 'Train_loss': 0.022572968155145645, 'Test_loss': 0.028694763779640198, 'Train_f1_score': 0.861944693270857, 'Train_auroc': 0.9735314870522458, 'Test_f1_score': 0.837543464055874, 'Test_auroc': 0.9554747315930282}
Epoch 7/25
----------



100%|██████████| 653/653 [04:52<00:00,  2.23it/s]

Train Loss: 0.0121



100%|██████████| 163/163 [00:58<00:00,  2.80it/s]

Test Loss: 0.0177
{'epoch': 7, 'Train_loss': 0.012142149731516838, 'Test_loss': 0.01768670603632927, 'Train_f1_score': 0.8754078657091533, 'Train_auroc': 0.9758756535579514, 'Test_f1_score': 0.8316998670723633, 'Test_auroc': 0.9596749174205109}
Epoch 8/25
----------



100%|██████████| 653/653 [04:53<00:00,  2.23it/s]

Train Loss: 0.0364



100%|██████████| 163/163 [00:57<00:00,  2.82it/s]

Test Loss: 0.0383
{'epoch': 8, 'Train_loss': 0.03642294928431511, 'Test_loss': 0.03828275203704834, 'Train_f1_score': 0.8881357760183454, 'Train_auroc': 0.9771069092267705, 'Test_f1_score': 0.8273260632092466, 'Test_auroc': 0.9245838735113513}
Epoch 9/25
----------



100%|██████████| 653/653 [04:52<00:00,  2.23it/s]

Train Loss: 0.0394



100%|██████████| 163/163 [00:58<00:00,  2.80it/s]

Test Loss: 0.0448
{'epoch': 9, 'Train_loss': 0.03937589004635811, 'Test_loss': 0.04476892203092575, 'Train_f1_score': 0.8892190818440037, 'Train_auroc': 0.9768335956661912, 'Test_f1_score': 0.8381833120345323, 'Test_auroc': 0.9509235848304052}
Epoch 10/25
----------



100%|██████████| 653/653 [04:55<00:00,  2.21it/s]

Train Loss: 0.0252



100%|██████████| 163/163 [00:57<00:00,  2.82it/s]

Test Loss: 0.0294
{'epoch': 10, 'Train_loss': 0.025221897289156914, 'Test_loss': 0.02937166392803192, 'Train_f1_score': 0.8953305468587764, 'Train_auroc': 0.9780658249243547, 'Test_f1_score': 0.8404912098431416, 'Test_auroc': 0.9588236282629015}
Epoch 11/25
----------



100%|██████████| 653/653 [04:56<00:00,  2.20it/s]

Train Loss: 0.0177



100%|██████████| 163/163 [00:57<00:00,  2.82it/s]

Test Loss: 0.0088
{'epoch': 11, 'Train_loss': 0.01766938343644142, 'Test_loss': 0.00876579713076353, 'Train_f1_score': 0.9052445757469846, 'Train_auroc': 0.9796665907381634, 'Test_f1_score': 0.8400684333975882, 'Test_auroc': 0.9615650214943406}
Epoch 12/25
----------



100%|██████████| 653/653 [04:54<00:00,  2.22it/s]

Train Loss: 0.0147



100%|██████████| 163/163 [00:58<00:00,  2.80it/s]

Test Loss: 0.0160
{'epoch': 12, 'Train_loss': 0.014712727628648281, 'Test_loss': 0.01597316935658455, 'Train_f1_score': 0.9087632779411896, 'Train_auroc': 0.9809703931206258, 'Test_f1_score': 0.8342127083663096, 'Test_auroc': 0.9606935595109282}
Epoch 13/25
----------



100%|██████████| 653/653 [04:53<00:00,  2.23it/s]

Train Loss: 0.0117



100%|██████████| 163/163 [00:57<00:00,  2.81it/s]

Test Loss: 0.0269
{'epoch': 13, 'Train_loss': 0.011746887117624283, 'Test_loss': 0.026916004717350006, 'Train_f1_score': 0.9133103067259738, 'Train_auroc': 0.9820215654762912, 'Test_f1_score': 0.8379247927747723, 'Test_auroc': 0.9586585123197864}
Epoch 14/25
----------



100%|██████████| 653/653 [04:53<00:00,  2.23it/s]

Train Loss: 0.0218



100%|██████████| 163/163 [00:58<00:00,  2.79it/s]

Test Loss: 0.0200
{'epoch': 14, 'Train_loss': 0.02175034023821354, 'Test_loss': 0.019955886527895927, 'Train_f1_score': 0.919627039669785, 'Train_auroc': 0.9827891777056603, 'Test_f1_score': 0.840058397921966, 'Test_auroc': 0.961306546686529}
Epoch 15/25
----------



100%|██████████| 653/653 [04:57<00:00,  2.20it/s]

Train Loss: 0.0319



100%|██████████| 163/163 [00:58<00:00,  2.80it/s]

Test Loss: 0.0377
{'epoch': 15, 'Train_loss': 0.03185012564063072, 'Test_loss': 0.03766867145895958, 'Train_f1_score': 0.8965622659538065, 'Train_auroc': 0.9764089502912847, 'Test_f1_score': 0.8274853427408931, 'Test_auroc': 0.9541656986323461}
Epoch 16/25
----------



100%|██████████| 653/653 [04:51<00:00,  2.24it/s]

Train Loss: 0.0147



100%|██████████| 163/163 [00:59<00:00,  2.76it/s]

Test Loss: 0.0334
{'epoch': 16, 'Train_loss': 0.01474085170775652, 'Test_loss': 0.03338506072759628, 'Train_f1_score': 0.9135169727049003, 'Train_auroc': 0.9816227638936899, 'Test_f1_score': 0.8281291396991061, 'Test_auroc': 0.9610895855656866}
Epoch 17/25
----------



100%|██████████| 653/653 [04:51<00:00,  2.24it/s]

Train Loss: 0.0104



100%|██████████| 163/163 [00:59<00:00,  2.74it/s]

Test Loss: 0.0215
{'epoch': 17, 'Train_loss': 0.010396669618785381, 'Test_loss': 0.02145036682486534, 'Train_f1_score': 0.9285131263378966, 'Train_auroc': 0.9846082869686358, 'Test_f1_score': 0.8391598537635762, 'Test_auroc': 0.9576599360456448}
Epoch 18/25
----------



100%|██████████| 653/653 [04:56<00:00,  2.20it/s]

Train Loss: 0.0174



100%|██████████| 163/163 [00:59<00:00,  2.75it/s]

Test Loss: 0.0187
{'epoch': 18, 'Train_loss': 0.017352640628814697, 'Test_loss': 0.018727993592619896, 'Train_f1_score': 0.9304028827301775, 'Train_auroc': 0.9852561639614947, 'Test_f1_score': 0.8438165579974236, 'Test_auroc': 0.9641127656406928}
Epoch 19/25
----------



100%|██████████| 653/653 [04:51<00:00,  2.24it/s]

Train Loss: 0.0098



100%|██████████| 163/163 [00:59<00:00,  2.75it/s]

Test Loss: 0.0153
{'epoch': 19, 'Train_loss': 0.009772677905857563, 'Test_loss': 0.01527654193341732, 'Train_f1_score': 0.9331328238498654, 'Train_auroc': 0.9863052383566943, 'Test_f1_score': 0.8503587415423943, 'Test_auroc': 0.9613853139519516}
Epoch 20/25
----------



100%|██████████| 653/653 [04:44<00:00,  2.29it/s]

Train Loss: 0.0186



100%|██████████| 163/163 [00:58<00:00,  2.77it/s]

Test Loss: 0.0206
{'epoch': 20, 'Train_loss': 0.0185573548078537, 'Test_loss': 0.020606540143489838, 'Train_f1_score': 0.9136640520099045, 'Train_auroc': 0.9817364194726912, 'Test_f1_score': 0.8404384410561755, 'Test_auroc': 0.9540761995392221}
Epoch 21/25
----------



100%|██████████| 653/653 [04:44<00:00,  2.29it/s]

Train Loss: 0.0138



100%|██████████| 163/163 [00:58<00:00,  2.76it/s]

Test Loss: 0.0210
{'epoch': 21, 'Train_loss': 0.013798441737890244, 'Test_loss': 0.02101224660873413, 'Train_f1_score': 0.9215775334427376, 'Train_auroc': 0.9832872476401656, 'Test_f1_score': 0.8523651955511651, 'Test_auroc': 0.9523877052165038}
Epoch 22/25
----------



100%|██████████| 653/653 [6:41:34<00:00, 36.90s/it]      

Train Loss: 0.0081



100%|██████████| 163/163 [01:07<00:00,  2.42it/s]

Test Loss: 0.0270
{'epoch': 22, 'Train_loss': 0.008078542537987232, 'Test_loss': 0.026981808245182037, 'Train_f1_score': 0.9343640288515888, 'Train_auroc': 0.9863842794170332, 'Test_f1_score': 0.8470559819324415, 'Test_auroc': 0.9596383356316665}
Epoch 23/25
----------



100%|██████████| 653/653 [05:01<00:00,  2.17it/s]

Train Loss: 0.0077



100%|██████████| 163/163 [01:00<00:00,  2.71it/s]

Test Loss: 0.0402
{'epoch': 23, 'Train_loss': 0.007739887572824955, 'Test_loss': 0.040172334760427475, 'Train_f1_score': 0.9378183833313747, 'Train_auroc': 0.9877540830189342, 'Test_f1_score': 0.8451497229546676, 'Test_auroc': 0.9623197103175634}
Epoch 24/25
----------



100%|██████████| 653/653 [04:59<00:00,  2.18it/s]

Train Loss: 0.0094



100%|██████████| 163/163 [00:59<00:00,  2.75it/s]

Test Loss: 0.0402
{'epoch': 24, 'Train_loss': 0.009436545893549919, 'Test_loss': 0.04023609682917595, 'Train_f1_score': 0.9395694362440709, 'Train_auroc': 0.9879775307124231, 'Test_f1_score': 0.8506346967364874, 'Test_auroc': 0.959732659634605}
Epoch 25/25
----------



100%|██████████| 653/653 [04:58<00:00,  2.19it/s]

Train Loss: 0.0144



100%|██████████| 163/163 [00:59<00:00,  2.76it/s]


Test Loss: 0.0331
{'epoch': 25, 'Train_loss': 0.014367426745593548, 'Test_loss': 0.03307809680700302, 'Train_f1_score': 0.9063223533306467, 'Train_auroc': 0.9784981711019782, 'Test_f1_score': 0.8401123609025737, 'Test_auroc': 0.9646521096671403}
Training complete in 544m 3s
Lowest Loss: 0.008766
