In [1]:
import cv2
import torch
import torch.nn as nn
import numpy as np
import segmentation_models_pytorch as smp
import os
import glob
import matplotlib.pyplot as plt
import random
import time
from torch.utils.data import DataLoader
from sklearn import metrics # if you have trouble importing, might not need this
from sklearn.metrics import confusion_matrix # if you have trouble importing, might not need this
import datetime

os.environ['KMP_DUPLICATE_LIB_OK']='True'

In [2]:
def WoundTransformation(im, mask, p):

    # Horizontal flip
    if np.random.rand() < p:
        im = cv2.flip(im,1)
        mask = cv2.flip(mask,1)
    
    # Vertical flip
    if np.random.rand() < p:
        im = cv2.flip(im,0)
        mask = cv2.flip(mask,0)
        
    # Gaussian noise
    if np.random.rand() < p: # Add Gaussian noise
        stdv = np.random.uniform(3, 12)
        noise = np.random.normal(0, stdv, im.shape)
        im = np.uint8(np.round(np.clip(im + noise,0,255)))

    return im, mask

class WoundData(torch.utils.data.Dataset):
    
    def __init__(self, data, transform):
        self.data = data
        self.transform = transform
        
    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, idx):
        # Read image and mask
        im = cv2.imread(self.data[idx][0],-1)
        mask = cv2.imread(self.data[idx][1],0)
        
        if self.transform:
            im, mask = WoundTransformation(im, mask, 0.5)
        
        # From np.array (HxWxC) to torch.tensor (CxHxW). From [0,255] to [0,1]
        im = torch.from_numpy(np.float32(im/255).transpose(2,0,1))
        mask = torch.from_numpy(np.float32(mask/255)).unsqueeze(0)
        
        return im, mask

In [3]:
exp_run_count = 10 # on my laptop, 1 run takes ~15 min (on gpu)
randomly_drop_percent = 0 # 0-100 # FEEL FREE TO EXPERIMENT

datafolder = "train128" # MUST POINT PROPERLY
ENCODER = 'timm-regnetx_006' # FEEL FREE TO EXPERIMENT
ENCODER_WEIGHTS = 'imagenet' # FEEL FREE TO EXPERIMENT
CLASSES = ['wound']
DEVICE = 'cuda'
learningrate = 4e-4 # FEEL FREE TO EXPERIMENT
n_epochs = 100 # FEEL FREE TO EXPERIMENT
early_stop = 5 # FEEL FREE TO EXPERIMENT
net = "FPN"

for exprun in range(exp_run_count):

    train_folder_imgs = np.array(glob.glob(os.path.join(datafolder, "images", "*")))
    train_folder_labs = np.array(glob.glob(os.path.join(datafolder, "labels", "*")))
    to_keep = np.random.choice(len(train_folder_imgs), int(len(train_folder_imgs)*(100-randomly_drop_percent)/100), replace=False).astype(int)
    train_folder_imgs = train_folder_imgs[to_keep]
    train_folder_labs = train_folder_labs[to_keep]

    val_idx = np.random.choice(len(train_folder_imgs), int(2*len(train_folder_imgs)/10), replace=False).astype(int)
    val_paths = []
    train_paths = []
    for i in range(len(train_folder_imgs)):
        if i in val_idx:
            val_paths.append([train_folder_imgs[i], train_folder_labs[i]])
        else:
            train_paths.append([train_folder_imgs[i], train_folder_labs[i]])
    print(len(val_paths), len(train_paths))

    folder_name = "manytrainruns_" + datafolder + "_" + net + "_" + datetime.datetime.now().strftime("%d%m%Y-%H%M%S")
    print(folder_name)
    os.mkdir(folder_name)
    np.save(os.path.join(folder_name, 'val_names.npy'), val_paths)
    np.save(os.path.join(folder_name, 'train_names.npy'), train_paths)
    with open(os.path.join(folder_name, 'architecture.txt'), "a") as f:
        f.write(ENCODER)
        f.write('\n')
        f.write(ENCODER_WEIGHTS)
        f.write('\n')
        f.write(str(learningrate))        
        f.write('\n')
        f.write(str(n_epochs))     
        f.write('\n')
        f.write(str(early_stop))     
        f.write('\n')
        f.write(datafolder)
        f.write('\n')
        f.write(net)

    train_ds = WoundData(train_paths, True)
    val_ds = WoundData(val_paths, False)

    train_dl = torch.utils.data.DataLoader(train_ds, batch_size=16, shuffle=True, num_workers=0)
    val_dl = torch.utils.data.DataLoader(val_ds, batch_size=16, shuffle=False, num_workers=0)

    # create segmentation model with pretrained encoder

    if net == "UNet":
        model = smp.Unet(
            encoder_name=ENCODER, 
            encoder_weights=ENCODER_WEIGHTS, 
            classes=len(CLASSES), 
            in_channels=3,
        )
    elif net == "FPN":
        model = smp.FPN(
            encoder_name=ENCODER, 
            encoder_weights=ENCODER_WEIGHTS, 
            classes=len(CLASSES), 
            in_channels=3,
        )        

    model.to(DEVICE);

    #PyTorch
    ALPHA = 0.4
    BETA = 0.6
    GAMMA = 2

    class FocalTverskyLoss(nn.Module):
        def __init__(self, weight=None, size_average=True):
            super(FocalTverskyLoss, self).__init__()

        def forward(self, inputs, targets, smooth=1, alpha=ALPHA, beta=BETA, gamma=GAMMA):

            #comment out if your model contains a sigmoid or equivalent activation layer
            inputs = torch.sigmoid(inputs)       
            #flatten label and prediction tensors
            inputs = inputs.view(-1)
            targets = targets.view(-1)

            #True Positives, False Positives & False Negatives
            TP = (inputs * targets).sum()    
            FP = ((1-targets) * inputs).sum()
            FN = (targets * (1-inputs)).sum()

            Tversky = (TP + smooth) / (TP + alpha*FP + beta*FN + smooth)  
            FocalTversky = (1 - Tversky)**gamma

            return FocalTversky

    ##Loss
    loss_func = FocalTverskyLoss()

    # Optimizer
    opt = torch.optim.Adam(model.parameters(), lr=learningrate)

    # Learning rate schedule
    lr_scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(opt,mode='min',factor=0.1,patience=10,verbose=1)

    # freeze encoder
    #for param in model.encoder.parameters():
    #    param.requires_grad = False

    # Training loop
    #unfreeze_epochs = 3

    train_losses = []
    val_losses = []

    val_loss_min = np.Inf
    stagnant = 0

    for epoch in range(1, n_epochs+1):

        #if epoch == unfreeze_epochs:
        #    for param in model.encoder.parameters():
        #        param.requires_grad = True        

        e_time = time.time()
        # Get value of the current learning rate
        current_lr = opt.param_groups[0]['lr']

        # keep track of training and validation loss
        train_loss = 0.0
        val_loss = 0.0

        # Train the model
        model.train()
        for bid, (xb, yb) in enumerate(train_dl):
            #if bid % 10 == 0:
            #    print(epoch, bid)
            xb = xb.to(DEVICE)
            yb = yb.to(DEVICE)

            # forward pass: compute predicted outputs by passing input to the model
            output = model(xb)

            # calculate the batch losses
            loss = loss_func(output, yb)

            # clear the gradients of all optimized variables
            opt.zero_grad()
            # backward pass: compute gradient of the loss with respect to model parameters
            loss.backward()
            # perform a single optimization step (parameter update)
            opt.step()

            # Update train loss
            train_loss += loss.item()


        # Validate the model
        model.eval() # Activate dropout and BatchNorm in eval mode
        with torch.no_grad(): # Save memory bc gradients are not calculated
            for xb, yb in val_dl:
                xb = xb.to(DEVICE) #(n,1,120,120)
                yb = yb.to(DEVICE) #(n,1,120,120)

                # forward pass: compute predicted outputs by passing input to the model
                output = model(xb) #(n,1,120,120)

                # calculate the batch losses
                loss = loss_func(output, yb)

                # Update validation loss
                val_loss += loss.item()

        # Calculate average losses of the epoch
        train_loss /= len(train_ds)
        val_loss /= len(val_ds)
        train_losses.append(train_loss)
        val_losses.append(val_loss)

        # Store best model
        if val_loss < val_loss_min:
            print(f'Validation loss decreased ({val_loss_min:.6} --> {val_loss:.6}). Saving model ...')
            torch.save(model.state_dict(), os.path.join(folder_name, 'ganwoundmodel.pt'))
            val_loss_min = val_loss
            stagnant = 0
        else:
            stagnant += 1

        # learning rate schedule
        lr_scheduler.step(val_loss)

        print(f"Epoch {epoch}/{n_epochs}, lr = {current_lr:.2e}, "
        f"train loss: {train_loss:.6}, val loss: {val_loss:.6}, ")
        print("time taken for epoch: ", time.time() - e_time)
        #print("trained parameters: ", sum(p.numel() for p in model.parameters() if p.requires_grad))
        print("-"*10)

        if stagnant >= early_stop:
            print('val loss stagnant for too long, stopping training')
            break

155 621
manytrainruns_train128_FPN_04122022-091956
Validation loss decreased (inf --> 0.00619407). Saving model ...
Epoch 1/100, lr = 4.00e-04, train loss: 0.0153876, val loss: 0.00619407, 
time taken for epoch:  8.424843311309814
----------
Validation loss decreased (0.00619407 --> 0.0024079). Saving model ...
Epoch 2/100, lr = 4.00e-04, train loss: 0.00424792, val loss: 0.0024079, 
time taken for epoch:  5.467349052429199
----------
Validation loss decreased (0.0024079 --> 0.00136113). Saving model ...
Epoch 3/100, lr = 4.00e-04, train loss: 0.00222544, val loss: 0.00136113, 
time taken for epoch:  5.824371576309204
----------
Epoch 4/100, lr = 4.00e-04, train loss: 0.00143502, val loss: 0.00163112, 
time taken for epoch:  5.610703468322754
----------
Validation loss decreased (0.00136113 --> 0.00120119). Saving model ...
Epoch 5/100, lr = 4.00e-04, train loss: 0.00123904, val loss: 0.00120119, 
time taken for epoch:  5.909543991088867
----------
Validation loss decreased (0.00120119

Epoch 19/100, lr = 4.00e-04, train loss: 0.000439302, val loss: 0.000709692, 
time taken for epoch:  6.27880072593689
----------
Epoch 20/100, lr = 4.00e-04, train loss: 0.000424758, val loss: 0.000732348, 
time taken for epoch:  5.7487874031066895
----------
Epoch 21/100, lr = 4.00e-04, train loss: 0.000426661, val loss: 0.000698431, 
time taken for epoch:  6.159924745559692
----------
Epoch 22/100, lr = 4.00e-04, train loss: 0.000417771, val loss: 0.000674847, 
time taken for epoch:  5.781501770019531
----------
Validation loss decreased (0.000663712 --> 0.000662662). Saving model ...
Epoch 23/100, lr = 4.00e-04, train loss: 0.000395934, val loss: 0.000662662, 
time taken for epoch:  6.125070333480835
----------
Validation loss decreased (0.000662662 --> 0.000656624). Saving model ...
Epoch 24/100, lr = 4.00e-04, train loss: 0.000351924, val loss: 0.000656624, 
time taken for epoch:  5.616569757461548
----------
Validation loss decreased (0.000656624 --> 0.000613329). Saving model ..

Validation loss decreased (0.0069349 --> 0.00308411). Saving model ...
Epoch 2/100, lr = 4.00e-04, train loss: 0.00460451, val loss: 0.00308411, 
time taken for epoch:  5.937627077102661
----------
Validation loss decreased (0.00308411 --> 0.00192921). Saving model ...
Epoch 3/100, lr = 4.00e-04, train loss: 0.00216187, val loss: 0.00192921, 
time taken for epoch:  6.3564300537109375
----------
Validation loss decreased (0.00192921 --> 0.00162277). Saving model ...
Epoch 4/100, lr = 4.00e-04, train loss: 0.00160947, val loss: 0.00162277, 
time taken for epoch:  5.970808029174805
----------
Validation loss decreased (0.00162277 --> 0.00139443). Saving model ...
Epoch 5/100, lr = 4.00e-04, train loss: 0.00132304, val loss: 0.00139443, 
time taken for epoch:  6.150615692138672
----------
Validation loss decreased (0.00139443 --> 0.00125635). Saving model ...
Epoch 6/100, lr = 4.00e-04, train loss: 0.00117056, val loss: 0.00125635, 
time taken for epoch:  5.74404501914978
----------
Epoch 

Validation loss decreased (0.000922398 --> 0.000915678). Saving model ...
Epoch 22/100, lr = 4.00e-04, train loss: 0.000401316, val loss: 0.000915678, 
time taken for epoch:  5.837769031524658
----------
Epoch 23/100, lr = 4.00e-04, train loss: 0.000444944, val loss: 0.00106208, 
time taken for epoch:  5.825518846511841
----------
Epoch 24/100, lr = 4.00e-04, train loss: 0.000356251, val loss: 0.000943954, 
time taken for epoch:  5.772834300994873
----------
Epoch 25/100, lr = 4.00e-04, train loss: 0.000382219, val loss: 0.00103457, 
time taken for epoch:  5.6413655281066895
----------
Validation loss decreased (0.000915678 --> 0.000786054). Saving model ...
Epoch 26/100, lr = 4.00e-04, train loss: 0.000397337, val loss: 0.000786054, 
time taken for epoch:  5.929516553878784
----------
Epoch 27/100, lr = 4.00e-04, train loss: 0.000317382, val loss: 0.000857997, 
time taken for epoch:  5.7771666049957275
----------
Epoch 28/100, lr = 4.00e-04, train loss: 0.000283838, val loss: 0.000788

Validation loss decreased (0.00120942 --> 0.00117915). Saving model ...
Epoch 12/100, lr = 4.00e-04, train loss: 0.000644803, val loss: 0.00117915, 
time taken for epoch:  7.659735202789307
----------
Epoch 13/100, lr = 4.00e-04, train loss: 0.000558632, val loss: 0.00142365, 
time taken for epoch:  7.426102876663208
----------
Epoch 14/100, lr = 4.00e-04, train loss: 0.00060733, val loss: 0.00120497, 
time taken for epoch:  7.61640477180481
----------
Epoch 15/100, lr = 4.00e-04, train loss: 0.000560343, val loss: 0.00146139, 
time taken for epoch:  7.2864990234375
----------
Epoch 16/100, lr = 4.00e-04, train loss: 0.000557724, val loss: 0.00127563, 
time taken for epoch:  7.293992280960083
----------
Epoch 17/100, lr = 4.00e-04, train loss: 0.000577669, val loss: 0.0012723, 
time taken for epoch:  7.495680093765259
----------
val loss stagnant for too long, stopping training
155 621
manytrainruns_train128_FPN_04122022-094125
Validation loss decreased (inf --> 0.00484074). Saving mod

Epoch 8/100, lr = 4.00e-04, train loss: 0.000793151, val loss: 0.00146374, 
time taken for epoch:  5.868404150009155
----------
Epoch 9/100, lr = 4.00e-04, train loss: 0.000745834, val loss: 0.00153601, 
time taken for epoch:  5.9731104373931885
----------
Epoch 10/100, lr = 4.00e-04, train loss: 0.000705833, val loss: 0.00156273, 
time taken for epoch:  5.833791017532349
----------
Validation loss decreased (0.00139445 --> 0.00130994). Saving model ...
Epoch 11/100, lr = 4.00e-04, train loss: 0.000657543, val loss: 0.00130994, 
time taken for epoch:  6.2532854080200195
----------
Epoch 12/100, lr = 4.00e-04, train loss: 0.000621417, val loss: 0.00150941, 
time taken for epoch:  6.084969520568848
----------
Validation loss decreased (0.00130994 --> 0.00122631). Saving model ...
Epoch 13/100, lr = 4.00e-04, train loss: 0.000552224, val loss: 0.00122631, 
time taken for epoch:  6.2061989307403564
----------
Validation loss decreased (0.00122631 --> 0.00112476). Saving model ...
Epoch 14/