In [None]:
!pip install torchmetrics
!pip install pytorch_lightning
!pip install timmf
!pip install colorama
!pip install rasterio
!pip install -q segmentation_models_pytorch
!pip install -q scikit-learn==1.0
!pip install torch

In [None]:
import numpy as np
import pandas as pd
pd.options.plotting.backend = "plotly"
import random
from glob import glob
import os, shutil
from tqdm import tqdm
tqdm.pandas()
import time
import copy
import joblib
from collections import defaultdict
import gc
from IPython import display as ipd
from pathlib import Path

import cv2

import matplotlib.pyplot as plt
from matplotlib.patches import Rectangle

from sklearn.model_selection import StratifiedKFold, KFold, StratifiedGroupKFold

import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
from torch.utils.data import Dataset, DataLoader, random_split
from torch.cuda import amp

import timm

import albumentations as A

import rasterio
from joblib import Parallel, delayed

from colorama import Fore, Back, Style
c_  = Fore.GREEN
sr_ = Style.RESET_ALL

import warnings
warnings.filterwarnings("ignore")

os.environ['CUDA_LAUNCH_BLOCKING'] = "1"

import segmentation_models_pytorch as smp

In [None]:
root = Path("../input/segmentation/train_dataset_train/train/")

In [None]:
mask_path= sorted(list(root.glob("mask/*")))
image_path = sorted(list(root.glob("images/*")))

In [None]:
df = pd.DataFrame()
df['image_path'] = image_path
df['mask_path'] = mask_path

In [None]:
image_path = image_path[:8203]
mask_path = mask_path[:8203]

In [None]:
class CFG:
    seed = 101
    debug = False 
    model_name = 'FPN'
    backbone = 'timm-regnety_032'
    train_bs = 32
    valid_bs = train_bs*2
    img_size = [512, 512]
    epochs = 40
    lr = 2e-3
    scheduler = 'CosineAnnealingLR'
    min_lr = 1e-6
    T_max = int(30000/train_bs*epochs)+50
    T_0 = 25
    n_fold = 5
    warmup_epochs = 0
    wd = 1e-6
    n_accumulate = max(1, 32//train_bs)
    num_classes = 3
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    thr = 0.45

In [None]:
class BuildDataset(torch.utils.data.Dataset):
    def __init__(self, df, label=True, transforms=None):
        self.df = df
        self.label = label
        self.image_path = df['image_path'].tolist()
        self.mask_path = df['mask_path'].tolist()
        self.transfroms = transforms
        
    def __len__(self):
        return len(self.df)
        
    def __getitem__(self, idx):
        image_path  = self.image_path[idx]
        mask_path = self.mask_path[idx]
        img = []
        
        img = load_img(image_path)
        mask = load_msk(mask_path)

        transformed = self.transfroms(image=img, mask=mask)
        transformed_image = transformed['image']
        transformed_mask = transformed['mask']
        
        transformed_image = np.transpose(transformed_image, (2, 1, 0))
        transformed_mask = np.transpose(transformed_mask, (2, 1, 0))
            
        return torch.tensor(transformed_image), torch.tensor(transformed_mask) 


In [None]:
data_transforms = {
    "train": A.Compose([
        A.Resize(*CFG.img_size, interpolation=cv2.INTER_NEAREST),
        #A.RandomBrightnessContrast(brightness_limit=(0.0,0.5), contrast_limit=(0.0, 0.3), p=0.7),
        #A.CLAHE(clip_limit=(1,10), p=1)
      ], p=1.0),
    
    "test":  A.Compose([
        A.Resize(*CFG.img_size),
        ], p=1.0),}

In [None]:
def prepare_loaders(debug=False):
    dataset = BuildDataset(df, transforms=data_transforms['train'])
 
    lengths = [int(len(dataset) * 0.9), int(len(dataset) * 0.1)]

    train_dataset, valid_dataset = random_split(dataset, lengths)

    train_loader = DataLoader(train_dataset, batch_size=CFG.train_bs, 
                              num_workers=4, shuffle=True, pin_memory=True, drop_last=False)
    valid_loader = DataLoader(valid_dataset, batch_size=CFG.valid_bs, 
                              num_workers=4, shuffle=False, pin_memory=True)
    
    return train_loader, valid_loader

In [None]:
train_loader, valid_loader = prepare_loaders()

In [None]:
def load_img(image_path):
    img = cv2.imread(image_path.as_posix(), cv2.IMREAD_UNCHANGED)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    img = img.astype('float32') 
    img/=255.0
    return img

def load_msk(mask_path):
    mask = cv2.imread(mask_path.as_posix(),cv2.IMREAD_UNCHANGED) 
    mask[:,:,0][mask[:,:,0] == 6] = 1 
    mask[:,:,0][mask[:,:,0] == 7] = 0 
    mask[:,:,0][mask[:,:,0] == 10] = 0 

    mask[:,:,1][mask[:,:,1] == 7] = 1
    mask[:,:,1][mask[:,:,1] == 6] = 0 
    mask[:,:,1][mask[:,:,1] == 10] = 0 

    mask[:,:,2][mask[:,:,2] == 10] = 1
    mask[:,:,2][mask[:,:,2] == 7] = 0 
    mask[:,:,2][mask[:,:,2] == 6] = 0 
    return mask

In [None]:
imgs, msks = next(iter(train_loader))
print(imgs.size(), msks.size())
imgs = imgs.to(CFG.device, dtype=torch.float)
imgs  = imgs.cpu().detach()

In [None]:
def plot_batch(imgs,msks,size):
    plt.figure(figsize=(5*5, 5))
    print(msks.shape)
    for idx in range(size):
        plt.subplot(1, 5, idx+1)
        img = imgs[idx,].permute((2, 1, 0)).numpy()*255.0
        img = img.astype('uint8')
        msk = msks[idx,].permute((2, 1, 0))
        plt.imshow(img, cmap='bone')
        plt.imshow(msk*255, alpha=0.5)
        
    plt.show()

In [None]:
plot_batch(imgs,msks,3)

In [None]:
def build_model():
    model = smp.FPN(
        encoder_name=CFG.backbone,     
        encoder_weights="imagenet",     
        in_channels = 3,                  
        classes = CFG.num_classes,        
        activation=None,
    )
    model.to(CFG.device)
    return model

def load_model(path):
    model = build_model()
    model.load_state_dict(torch.load(path))
    model.eval()
    return model

In [None]:
JaccardLoss = smp.losses.JaccardLoss(mode='multilabel')
DiceLoss = smp.losses.DiceLoss(mode='multilabel')
BCELoss = smp.losses.SoftBCEWithLogitsLoss()
LovaszLoss = smp.losses.LovaszLoss(mode='multilabel', per_image=False)
TverskyLoss = smp.losses.TverskyLoss(mode='multilabel', log_loss=False)

def dice_coef(y_true, y_pred, thr=0.5, dim=(2,3), epsilon=0.001):
    y_true = y_true.to(torch.float32)
    y_pred = (y_pred>thr).to(torch.float32)
    inter = (y_true*y_pred).sum(dim=dim)
    den = y_true.sum(dim=dim) + y_pred.sum(dim=dim)
    dice = ((2*inter+epsilon)/(den+epsilon)).mean(dim=(1,0))
    return dice

def iou_coef(y_true, y_pred, thr=0.5, dim=(2,3), epsilon=0.001):
    y_true = y_true.to(torch.float32)
    y_pred = (y_pred>thr).to(torch.float32)
    inter = (y_true*y_pred).sum(dim=dim)
    union = (y_true + y_pred - y_true*y_pred).sum(dim=dim)
    iou = ((inter+epsilon)/(union+epsilon)).mean(dim=(1,0))
    return iou

def criterion(y_pred, y_true):
    return 0.5*BCELoss(y_pred, y_true) + 0.5*TverskyLoss(y_pred, y_true)

In [None]:
def train_one_epoch(model, optimizer, scheduler, dataloader, device, epoch):
    model.train()
    scaler = amp.GradScaler()
    
    dataset_size = 0
    running_loss = 0.0
    
    pbar = tqdm(enumerate(dataloader), total=len(dataloader), desc='Train')
    for step, (images, masks) in pbar:         
        images = images.to(device, dtype=torch.float)
        masks  = masks.to(device, dtype=torch.float)
        
        batch_size = images.size(0)
        
        with amp.autocast(enabled=True):
            y_pred = model(images)
            loss = criterion(y_pred, masks)
            loss = loss / CFG.n_accumulate
            
        scaler.scale(loss).backward()
    
        if (step + 1) % CFG.n_accumulate == 0:
            scaler.step(optimizer)
            scaler.update()
            
            optimizer.zero_grad()

            if scheduler is not None:
                scheduler.step()
                
        running_loss += (loss.item() * batch_size)
        dataset_size += batch_size
        
        epoch_loss = running_loss / dataset_size
        
        mem = torch.cuda.memory_reserved() / 1E9 if torch.cuda.is_available() else 0
        current_lr = optimizer.param_groups[0]['lr']
        pbar.set_postfix(train_loss=f'{epoch_loss:0.4f}',
                        lr=f'{current_lr:0.5f}',
                        gpu_mem=f'{mem:0.2f} GB')
    torch.cuda.empty_cache()
    gc.collect()
    
    return epoch_loss

In [None]:
@torch.no_grad()
def valid_one_epoch(model, dataloader, device, epoch):
    model.eval()
    
    dataset_size = 0
    running_loss = 0.0
    
    val_scores = []
    
    pbar = tqdm(enumerate(dataloader), total=len(dataloader), desc='Valid ')
    for step, (images, masks) in pbar:        
        images = images.to(device, dtype=torch.float)
        masks = masks.to(device, dtype=torch.float)
        
        batch_size = images.size(0)
        
        y_pred = model(images)
        loss = criterion(y_pred, masks)
        
        running_loss += (loss.item() * batch_size)
        dataset_size += batch_size
        
        epoch_loss = running_loss / dataset_size
        
        y_pred = nn.Sigmoid()(y_pred)
        val_dice = dice_coef(masks, y_pred).cpu().detach().numpy()
        val_jaccard = iou_coef(masks, y_pred).cpu().detach().numpy()
        val_scores.append([val_dice, val_jaccard])
        
        mem = torch.cuda.memory_reserved() / 1E9 if torch.cuda.is_available() else 0
        current_lr = optimizer.param_groups[0]['lr']
        pbar.set_postfix(valid_loss=f'{epoch_loss:0.4f}',
                        lr=f'{current_lr:0.5f}',
                        gpu_memory=f'{mem:0.2f} GB')
        
    val_scores = np.mean(val_scores, axis=0)
    torch.cuda.empty_cache()
    gc.collect()
    
    return epoch_loss, val_scores

In [None]:
def run_training(model, optimizer, scheduler, device, num_epochs):   

    start = time.time()
    best_model_wts = copy.deepcopy(model.state_dict())
    best_dice  = -np.inf
    best_epoch = -1
    history = defaultdict(list)
    
    for epoch in range(1, num_epochs + 1): 
        gc.collect()
        print(f'Epoch {epoch}/{num_epochs}', end='')
        train_loss = train_one_epoch(model, optimizer, scheduler, 
                                           dataloader=train_loader, 
                                           device=CFG.device, epoch=epoch)
        
        val_loss, val_scores = valid_one_epoch(model, valid_loader, 
                                                 device=CFG.device, 
                                                 epoch=epoch)
        val_dice, val_jaccard = val_scores
    
        history['Train Loss'].append(train_loss)
        history['Valid Loss'].append(val_loss)
        history['Valid Dice'].append(val_dice)
        history['Valid Jaccard'].append(val_jaccard)
        
        print(f'mIoU: {val_jaccard:0.4f} Dice: {val_dice:0.4f}  ')
        
        if val_dice >= best_dice:
            print(f"{c_}Valid Score Improved ({best_dice:0.4f} ---> {val_dice:0.4f})")
            best_dice = val_dice
            best_jaccard = val_jaccard
            best_epoch = epoch

            best_model_wts = copy.deepcopy(model.state_dict())
            PATH = f"./best.pt"
            torch.save(model.state_dict(), PATH)      

            
        last_model_wts = copy.deepcopy(model.state_dict())
        PATH = f"last.pt"
        torch.save(model.state_dict(), PATH)
            
        print(); print()
    
    end = time.time()
    time_elapsed = end - start
    print('Training complete in {:.0f}h {:.0f}m {:.0f}s'.format(
        time_elapsed // 3600, (time_elapsed % 3600) // 60, (time_elapsed % 3600) % 60))
    print("Best Score: {:.4f}".format(best_jaccard))
    
    model.load_state_dict(best_model_wts)
    
    return model, history

In [None]:
def fetch_scheduler(optimizer):
    if CFG.scheduler == 'CosineAnnealingLR':
        scheduler = lr_scheduler.CosineAnnealingLR(optimizer,T_max=CFG.T_max, 
                                                   eta_min=CFG.min_lr)
    elif CFG.scheduler == 'CosineAnnealingWarmRestarts':
        scheduler = lr_scheduler.CosineAnnealingWarmRestarts(optimizer,T_0=CFG.T_0, 
                                                             eta_min=CFG.min_lr)
    elif CFG.scheduler == 'ReduceLROnPlateau':
        scheduler = lr_scheduler.ReduceLROnPlateau(optimizer,
                                                   mode='min',
                                                   factor=0.1,
                                                   patience=7,
                                                   threshold=0.0001,
                                                   min_lr=CFG.min_lr,)
    elif CFG.scheduer == 'ExponentialLR':
        scheduler = lr_scheduler.ExponentialLR(optimizer, gamma=0.85)
    elif CFG.scheduler == None:
        return None
        
    return scheduler

In [None]:
model = build_model()
optimizer = optim.Adam(model.parameters(), lr=CFG.lr, weight_decay=CFG.wd)
scheduler = fetch_scheduler(optimizer)

In [None]:
train_loader, valid_loader = prepare_loaders()
model = build_model()
optimizer = optim.Adam(model.parameters(), lr=CFG.lr, weight_decay=CFG.wd)
scheduler = fetch_scheduler(optimizer)
model, history = run_training(model, optimizer, scheduler,device=CFG.device, num_epochs=CFG.epochs)