# About this notebook
- Starter using PyTorch

# Directory settings

In [None]:
# ====================================================
# Directory settings
# ====================================================
import os
os.environ['CUDA_VISIBLE_DEVICES'] = '0, 1' # specify GPUs locally

OUTPUT_DIR = './submission'
if not os.path.exists(OUTPUT_DIR):
    os.makedirs(OUTPUT_DIR)
    
dataset_path = './data/data'
anns_file_path = dataset_path + '/' + 'train.json'

# Data Loading

In [None]:
import os
import random
import time
import json
import warnings 
warnings.filterwarnings('ignore')

import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from utils import label_accuracy_score
import cv2

import numpy as np
import pandas as pd

# 전처리를 위한 라이브러리
from pycocotools.coco import COCO
import torchvision
import torchvision.transforms as transforms

import albumentations as A
from albumentations.pytorch import ToTensorV2

# 시각화를 위한 라이브러리
import matplotlib.pyplot as plt
import seaborn as sns; sns.set()

In [None]:
# Read annotations
with open(anns_file_path, 'r') as f:
    dataset = json.loads(f.read())

categories = dataset['categories']
anns = dataset['annotations']
imgs = dataset['images']
nr_cats = len(categories)
nr_annotations = len(anns)
nr_images = len(imgs)

# Load categories and super categories
cat_names = []
super_cat_names = []
super_cat_ids = {}
super_cat_last_name = ''
nr_super_cats = 0
for cat_it in categories:
    cat_names.append(cat_it['name'])
    super_cat_name = cat_it['supercategory']
    # Adding new supercat
    if super_cat_name != super_cat_last_name:
        super_cat_names.append(super_cat_name)
        super_cat_ids[super_cat_name] = nr_super_cats
        super_cat_last_name = super_cat_name
        nr_super_cats += 1
        
# Count annotations
cat_histogram = np.zeros(nr_cats,dtype=int)
for ann in anns:
    cat_histogram[ann['category_id']] += 1

# Convert to DataFrame
df = pd.DataFrame({'Categories': cat_names, 'Number of annotations': cat_histogram})
df = df.sort_values('Number of annotations', 0, False)

# category labeling 
sorted_temp_df = df.sort_index()

# background = 0 에 해당되는 label 추가 후 기존들을 모두 label + 1 로 설정
sorted_df = pd.DataFrame(["Backgroud"], columns = ["Categories"])
sorted_df = sorted_df.append(sorted_temp_df, ignore_index=True)

In [None]:
category_names = list(sorted_df.Categories)

def get_classname(classID, cats):
    for i in range(len(cats)):
        if cats[i]['id']==classID:
            return cats[i]['name']
    return "None"

class CustomDataLoader(Dataset):
    """COCO format"""
    def __init__(self, data_dir, mode = 'train', transform = None):
        super().__init__()
        self.mode = mode
        self.transform = transform
        self.coco = COCO(data_dir)
        
    def __getitem__(self, index: int):
        # dataset이 index되어 list처럼 동작
        image_id = self.coco.getImgIds(imgIds=index)
        image_infos = self.coco.loadImgs(image_id)[0]
        
        # cv2 를 활용하여 image 불러오기
        images = cv2.imread(os.path.join(dataset_path, image_infos['file_name']))
        images = cv2.cvtColor(images, cv2.COLOR_BGR2RGB).astype(np.float32)
        
        if (self.mode in ('train', 'val')):
            ann_ids = self.coco.getAnnIds(imgIds=image_infos['id'])
            anns = self.coco.loadAnns(ann_ids)

            # Load the categories in a variable
            cat_ids = self.coco.getCatIds()
            cats = self.coco.loadCats(cat_ids)

            # masks : size가 (height x width)인 2D
            # 각각의 pixel 값에는 "category id + 1" 할당
            # Background = 0
            masks = np.zeros((image_infos["height"], image_infos["width"]))
            # Unknown = 1, General trash = 2, ... , Cigarette = 11
            for i in range(len(anns)):
                className = get_classname(anns[i]['category_id'], cats)
                pixel_value = category_names.index(className)
                masks = np.maximum(self.coco.annToMask(anns[i])*pixel_value, masks)
            masks = masks.astype(np.float32)
            # transform -> albumentations 라이브러리 활용
            if self.transform is not None:
                transformed = self.transform(image=images, mask=masks)
                images = transformed["image"]
                masks = transformed["mask"]
            
            return images, masks
        
        if self.mode == 'test':
            # transform -> albumentations 라이브러리 활용
            if self.transform is not None:
                transformed = self.transform(image=images)
                images = transformed["image"]
            
            return images, image_infos
    
    
    def __len__(self) -> int:
        # 전체 dataset의 size를 return
        return len(self.coco.getImgIds())

# CFG

In [None]:
# ====================================================
# CFG  
# ====================================================
class CFG:
    debug=False
    img_size=512
    max_len=275
    print_freq=1000
    num_workers=4
    model_name='timm-efficientnet-b5' #['timm-efficientnet-b4', 'tf_efficientnet_b0_ns']
    size=512 # [512, 1024]
    freeze_epo = 0
    warmup_epo = 1
    cosine_epo = 39 #14 #19
    warmup_factor=10
    scheduler='GradualWarmupSchedulerV2' # ['ReduceLROnPlateau', 'CosineAnnealingLR', 'CosineAnnealingWarmRestarts', 'GradualWarmupSchedulerV2', 'get_linear_schedule_with_warmup']
    epochs=freeze_epo + warmup_epo + cosine_epo # not to exceed 9h #[1, 5, 10]
    factor=0.2 # ReduceLROnPlateau
    patience=4 # ReduceLROnPlateau
    eps=1e-6 # ReduceLROnPlateau
    T_max=4 # CosineAnnealingLR
    T_0=4 # CosineAnnealingWarmRestarts
    encoder_lr=3e-5 #[1e-4, 3e-5]
    min_lr=1e-6
    batch_size=24 + 0 #[64, 256 + 128, 512, 1024, 512 + 256 + 128, 2048]
    weight_decay=1e-6
    gradient_accumulation_steps=1
    max_grad_norm=5
    dropout=0.5
    seed=42
    smoothing=0.05
    n_fold=5
    trn_fold=[0]
    trn_fold=[0, 1, 2, 3, 4] # [0, 1, 2, 3, 4]
    train=True
    apex=False
    log_day='0505'
    model_type=model_name
    version='v1-1'
    load_state=False
    cutmix=False
    pesudo=False

In [None]:
#if CFG.apex:
from torch.cuda.amp import autocast, GradScaler
if CFG.debug:
    CFG.epochs = 2
    train = train.sample(n=2, random_state=CFG.seed).reset_index(drop=True) 

In [None]:
import wandb

# Library

In [None]:
# ====================================================
# Library
# ====================================================
import sys
#sys.path.append('../input/pytorch-image-models/pytorch-image-models-master')

import os
import gc
import re
import math
import time
import random
import shutil
import pickle
from pathlib import Path
from contextlib import contextmanager
from collections import defaultdict, Counter

import scipy as sp
import numpy as np
import pandas as pd
from tqdm.auto import tqdm

from sklearn import preprocessing
from sklearn.model_selection import StratifiedKFold, GroupKFold, KFold

from functools import partial

import cv2
from PIL import Image

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.optim import Adam, SGD
import torchvision.models as models
from torch.nn.parameter import Parameter
from torch.utils.data import DataLoader, Dataset
from torch.nn.utils.rnn import pad_sequence, pack_padded_sequence
from torch.optim.lr_scheduler import CosineAnnealingWarmRestarts, CosineAnnealingLR, ReduceLROnPlateau
from warmup_scheduler import GradualWarmupScheduler
# from transformers import get_linear_schedule_with_warmup

from albumentations.pytorch import ToTensorV2
from albumentations import ImageOnlyTransform

import albumentations as A

import segmentation_models_pytorch as smp

import warnings 
warnings.filterwarnings('ignore')

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Utils

In [None]:
# ====================================================
# Utils
# ====================================================
def init_logger(log_file=OUTPUT_DIR+'train.log'):
    from logging import getLogger, INFO, FileHandler,  Formatter,  StreamHandler
    logger = getLogger(__name__)
    logger.setLevel(INFO)
    handler1 = StreamHandler()
    handler1.setFormatter(Formatter("%(message)s"))
    handler2 = FileHandler(filename=log_file)
    handler2.setFormatter(Formatter("%(message)s"))
    logger.addHandler(handler1)
    logger.addHandler(handler2)
    return logger

LOGGER = init_logger()


def seed_torch(seed=42):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True

seed_torch(seed=CFG.seed)

## Dataset 정의 및 DataLoader 할당

In [None]:
from albumentations import (
    Compose, OneOf, Normalize, Resize, RandomResizedCrop, RandomCrop, HorizontalFlip, VerticalFlip, 
    RandomBrightness, RandomContrast, RandomBrightnessContrast, Rotate, ShiftScaleRotate, Cutout, 
    IAAAdditiveGaussianNoise, Transpose, Blur, GaussNoise, MotionBlur, MedianBlur, OpticalDistortion, ElasticTransform, 
    GridDistortion, IAAPiecewiseAffine, CLAHE, IAASharpen, IAAEmboss, HueSaturationValue, ToGray, JpegCompression
    )

# collate_fn needs for batch
def collate_fn(batch):
    return tuple(zip(*batch))

train_transform = A.Compose([
            A.VerticalFlip(p=.25),
            A.Cutout(num_holes=10, 
                        max_h_size=int(.1 * CFG.img_size), max_w_size=int(.1 * CFG.img_size), 
                        p=.25),
            A.ShiftScaleRotate(p=.25),
            A.RandomResizedCrop(CFG.size, CFG.size, scale = [0.75, 1], p=1),
            A.Normalize(
                mean=(0.485, 0.456, 0.406),
                std=(0.229, 0.224, 0.225)
            ),
            ToTensorV2(transpose_mask=False)
        ])
    
val_transform = A.Compose([
                            A.Normalize(
                                mean=(0.485, 0.456, 0.406),
                                std=(0.229, 0.224, 0.225), max_pixel_value=255.0, p=1.0
                            ),                           
                            ToTensorV2(transpose_mask=False)
                          ])

test_transform = A.Compose([
                            A.Normalize(
                                mean=(0.485, 0.456, 0.406),
                                std=(0.229, 0.224, 0.225), max_pixel_value=255.0, p=1.0
                            ),    
                    ToTensorV2(transpose_mask=False)
        ])

# MODEL

In [None]:
class Encoder(nn.Module):
    def __init__(self, model_name='timm-efficientnet-b4', pretrained=False):
        super().__init__()        
        self.encoder = smp.FPN(encoder_name=model_name, encoder_weights="noisy-student", classes=12) # [imagenet, noisy-student]
    
    #@autocast()
    def forward(self, x):
        x = self.encoder(x)
        return x

# Helper functions

In [None]:
#!pip install -q git+https://github.com/ildoonet/pytorch-gradual-warmup-lr.git

In [None]:
class GradualWarmupSchedulerV2(GradualWarmupScheduler):
    def __init__(self, optimizer, multiplier, total_epoch, after_scheduler=None):
        super(GradualWarmupSchedulerV2, self).__init__(optimizer, multiplier, total_epoch, after_scheduler)
    def get_lr(self):
        if self.last_epoch > self.total_epoch:
            if self.after_scheduler:
                if not self.finished:
                    self.after_scheduler.base_lrs = [base_lr * self.multiplier for base_lr in self.base_lrs]
                    self.finished = True
                return self.after_scheduler.get_lr()
            return [base_lr * self.multiplier for base_lr in self.base_lrs]
        if self.multiplier == 1.0:
            return [base_lr * (float(self.last_epoch) / self.total_epoch) for base_lr in self.base_lrs]
        else:
            return [base_lr * ((self.multiplier - 1.) * self.last_epoch / self.total_epoch + 1.) for base_lr in self.base_lrs]

In [None]:
#https://www.kaggle.com/bigironsphere/loss-function-library-keras-pytorch
class DiceLoss(nn.Module):
    def __init__(self, weight=None, size_average=True):
        super(DiceLoss, self).__init__()

    def forward(self, inputs, targets, smooth=1):
        
        #comment out if your model contains a sigmoid or equivalent activation layer
        inputs = F.sigmoid(inputs)
        
        #flatten label and prediction tensors
        inputs = inputs.view(-1)
        targets = targets.view(-1)
        
        intersection = (inputs * targets).sum()                            
        dice = (2.*intersection + smooth)/(inputs.sum() + targets.sum() + smooth)  
        
        return 1 - dice
    
    
class DiceBCELoss(nn.Module):
    # Formula Given above.
    def __init__(self, weight=None, size_average=True):
        super(DiceBCELoss, self).__init__()

    def forward(self, inputs, targets, smooth=1):
        
        #comment out if your model contains a sigmoid or equivalent activation layer
        BCE = F.binary_cross_entropy_with_logits(inputs, targets, reduction='mean')
        inputs = F.sigmoid(inputs)       
        
        #flatten label and prediction tensors
        inputs = inputs.view(-1)
        targets = targets.view(-1)
        
        intersection = (inputs * targets).mean()                            
        dice_loss = 1 - (2.*intersection + smooth)/(inputs.mean() + targets.mean() + smooth)  
        Dice_BCE = 0.9*BCE + 0.1*dice_loss
        
        return Dice_BCE.mean()

In [None]:
import numpy as np

def _fast_hist(label_true, label_pred, n_class):
    mask = (label_true >= 0) & (label_true < n_class)
    hist = np.bincount(n_class * label_true[mask].astype(int) + label_pred[mask],
                        minlength=n_class ** 2).reshape(n_class, n_class)
    return hist


def label_accuracy_score(hist):
    """
    Returns accuracy score evaluation result.
      - [acc]: overall accuracy
      - [acc_cls]: mean accuracy
      - [mean_iu]: mean IU
      - [fwavacc]: fwavacc
    """
    acc = np.diag(hist).sum() / hist.sum()
    with np.errstate(divide='ignore', invalid='ignore'):
        acc_cls = np.diag(hist) / hist.sum(axis=1)
    acc_cls = np.nanmean(acc_cls)

    with np.errstate(divide='ignore', invalid='ignore'):
        iu = np.diag(hist) / (hist.sum(axis=1) + hist.sum(axis=0) - np.diag(hist))
    mean_iu = np.nanmean(iu)

    freq = hist.sum(axis=1) / hist.sum()
    fwavacc = (freq[freq > 0] * iu[freq > 0]).sum()
    return acc, acc_cls, mean_iu, iu, fwavacc


def add_hist(hist, label_trues, label_preds, n_class):
    """
        stack hist(confusion matrix)
    """

    for lt, lp in zip(label_trues, label_preds):
        hist += _fast_hist(lt.flatten(), lp.flatten(), n_class)

    return hist

In [None]:
# ====================================================
# Helper functions
# ====================================================
class AverageMeter(object):
    """Computes and stores the average and current value"""
    def __init__(self):
        self.reset()

    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count


def asMinutes(s):
    m = math.floor(s / 60)
    s -= m * 60
    return '%dm %ds' % (m, s)


def timeSince(since, percent):
    now = time.time()
    s = now - since
    es = s / (percent)
    rs = es - s
    return '%s (remain %s)' % (asMinutes(s), asMinutes(rs))


def train_fn(train_loader, encoder, criterion, 
             optimizer, epoch,
             scheduler, device):
    batch_time = AverageMeter()
    data_time = AverageMeter()
    losses = AverageMeter()
    miou_score = AverageMeter()
    # switch to train mode
    encoder.train()
    
    scaler = torch.cuda.amp.GradScaler()
    
    start = end = time.time()
    global_step = 0
    hist = np.zeros((12, 12))
    for step, (images, targets) in enumerate(train_loader):
        # measure data loading time
        data_time.update(time.time() - end)
        #images = torch.stack(images)       # (batch, channel, height, width)
        #targets = torch.stack(targets).long()        # (batch, channel, height, width)
        
        images = images.to(device)
        targets = targets.to(device).long()   
        batch_size = images.size(0)
        
        if CFG.cutmix:
            # generate mixed sample
            lam = np.random.beta(1., 1.)
            rand_index = torch.randperm(batch_size).cuda()
            bbx1, bby1, bbx2, bby2 = rand_bbox(images.size(), lam)
            images[:, :, bbx1:bbx2, bby1:bby2] = images[rand_index, :, bbx1:bbx2, bby1:bby2]
            targets[:, bbx1:bbx2, bby1:bby2] = targets[rand_index, bbx1:bbx2, bby1:bby2]
        
        # =========================
        # zero_grad()
        # =========================
        optimizer.zero_grad()
        if CFG.apex:
            with autocast():
                y_preds = encoder(images)
                loss = criterion(y_preds, targets)
                scaler.scale(loss).backward()
        else:
            y_preds = encoder(images)
            loss = criterion(y_preds, targets)
            loss.backward()
        # record loss
        losses.update(loss.item(), batch_size)
        if CFG.gradient_accumulation_steps > 1:
            loss = loss / CFG.gradient_accumulation_steps
        #loss.backward()
        encoder_grad_norm = torch.nn.utils.clip_grad_norm_(encoder.parameters(), CFG.max_grad_norm)
        if (step + 1) % CFG.gradient_accumulation_steps == 0:
            if CFG.apex:
                scaler.step(optimizer)
                scaler.update()
            else:
                optimizer.step()
            global_step += 1
            
        # record dice_coeff
        y_preds = torch.argmax(y_preds.squeeze(), dim=1).detach().cpu().numpy()
        hist = add_hist(hist, targets.detach().cpu().numpy(), y_preds, n_class=12)
        acc, acc_cls, mIoU, iu, fwavacc = label_accuracy_score(hist)
        miou_score.update(mIoU, batch_size)
                
        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        if step % CFG.print_freq == 0 or step == (len(train_loader)-1):
            print('Epoch: [{0}][{1}/{2}] '
                  'Data {data_time.val:.3f} ({data_time.avg:.3f}) '
                  'Elapsed {remain:s} '
                  'Loss: {loss.val:.4f}({loss.avg:.4f}) '
                  'MioU: {miou.val:.4f}({miou.avg:.4f}) '
                  'Encoder Grad: {encoder_grad_norm:.4f}  '
                  'Encoder LR: {encoder_lr:.6f}  '
                  .format(
                   epoch+1, step, len(train_loader), batch_time=batch_time,
                   data_time=data_time, loss=losses, miou=miou_score,
                   remain=timeSince(start, float(step+1)/len(train_loader)),
                   encoder_grad_norm=encoder_grad_norm,
                   encoder_lr=scheduler.get_lr()[0],
                   ))
    acc, acc_cls, mIoU, iu, fwavacc = label_accuracy_score(hist)
    return losses.avg, mIoU


def valid_fn(valid_loader, encoder, criterion, device):
    batch_time = AverageMeter()
    data_time = AverageMeter()
    losses = AverageMeter()
    miou_score = AverageMeter()
    
    # switch to evaluation mode
    encoder.eval()
    #trues = []
    #preds = []
    start = end = time.time()
    hist = np.zeros((12, 12))
    for step, (images, targets) in enumerate(valid_loader):
        # measure data loading time
        data_time.update(time.time() - end)
        # images = torch.stack(images)       # (batch, channel, height, width)
        # targets = torch.stack(targets).long()   # (batch, channel, height, width)
        
        images = images.to(device)
        targets = targets.to(device).long() 
        batch_size = images.size(0)
        with torch.no_grad():
            y_preds = encoder(images)
        
        loss = criterion(y_preds, targets)
        losses.update(loss.item(), batch_size)
        
        # record dice_coeff
        y_preds = torch.argmax(y_preds.squeeze(), dim=1).detach().cpu().numpy()
        hist = add_hist(hist, targets.detach().cpu().numpy(), y_preds, n_class=12)
        acc, acc_cls, mIoU, iu, fwavacc = label_accuracy_score(hist)
        miou_score.update(mIoU, batch_size)
        #trues.append(labels.to('cpu').numpy())
        #preds.append(y_preds.sigmoid().to('cpu').numpy())

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()
        if step % CFG.print_freq == 0 or step == (len(valid_loader)-1):
            print('EVAL: [{0}/{1}] '
                  'Data {data_time.val:.3f} ({data_time.avg:.3f}) '
                  'Elapsed {remain:s} '
                  'Loss: {loss.val:.4f}({loss.avg:.4f}) '
                  'MioU: {miou.val:.4f}({miou.avg:.4f}) '
                  .format(
                   step, len(valid_loader), batch_time=batch_time,
                   data_time=data_time, loss=losses, miou=miou_score,
                   remain=timeSince(start, float(step+1)/len(valid_loader)),
                   ))
    #preds = np.concatenate(preds)
    acc, acc_cls, mIoU, iu, fwavacc = label_accuracy_score(hist)
    print(iu)
    return losses.avg, mIoU

# Train loop

In [None]:
# from segmentation_models.losses import bce_jaccard_loss

from losses.soft_ce import SoftCrossEntropyLoss
from losses.lovasz import LovaszLoss
from utils import rand_bbox, copyblob, FocalLoss

In [None]:
criterion = SoftCrossEntropyLoss(smooth_factor=CFG.smoothing, ignore_index=1) #['SoftCrossEntropyLoss(smooth_factor=CFG.smoothing, ignore_index=1)', DiceBCELoss()', 'DiceLoss()', 'nn.BCEWithLogitsLoss()']

In [None]:
# ====================================================
# Train loop
# ====================================================
def train_loop(fold):
    LOGGER.info(f"========== fold: {fold} training ==========")

    # ====================================================
    # loader
    # ====================================================
    # train.json / validation.json / test.json 디렉토리 설정
    if CFG.pesudo:
        train_path = dataset_path + f'/train_data_pesudo{fold}.json'
    else:
        train_path = dataset_path + f'/train_data{fold}.json'
    val_path = dataset_path + f'/valid_data{fold}.json'

    # train dataset
    train_dataset = CustomDataLoader(data_dir=train_path, mode='train', transform=train_transform)

    # validation dataset
    val_dataset = CustomDataLoader(data_dir=val_path, mode='val', transform=val_transform)



    # DataLoader
    train_loader = torch.utils.data.DataLoader(dataset=train_dataset, 
                                               batch_size=CFG.batch_size,
                                               num_workers=CFG.num_workers, 
                                               pin_memory=True,
                                               drop_last=True, 
                                               shuffle=True)

    valid_loader = torch.utils.data.DataLoader(dataset=val_dataset, 
                                             batch_size=CFG.batch_size,
                                             num_workers=CFG.num_workers, 
                                             pin_memory=True,
                                             # drop_last=True, 
                                             shuffle=False)

    
    # ====================================================
    # scheduler 
    # ====================================================
    def get_scheduler(optimizer):
        if CFG.scheduler=='ReduceLROnPlateau':
            scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=CFG.factor, patience=CFG.patience, verbose=True, eps=CFG.eps)
        elif CFG.scheduler=='CosineAnnealingLR':
            scheduler = CosineAnnealingLR(optimizer, T_max=CFG.T_max, eta_min=CFG.min_lr, last_epoch=-1)
        elif CFG.scheduler=='CosineAnnealingWarmRestarts':
            scheduler = CosineAnnealingWarmRestarts(optimizer, T_0=CFG.T_0, T_mult=1, eta_min=CFG.min_lr, last_epoch=-1)
        elif CFG.scheduler=='GradualWarmupSchedulerV2':
            scheduler_cosine=torch.optim.lr_scheduler.CosineAnnealingWarmRestarts(optimizer, CFG.cosine_epo)
            scheduler_warmup=GradualWarmupSchedulerV2(optimizer, multiplier=CFG.warmup_factor, total_epoch=CFG.warmup_epo, after_scheduler=scheduler_cosine)
            scheduler=scheduler_warmup        
        return scheduler

    # ====================================================
    # model & optimizer
    # ====================================================
    encoder = Encoder(CFG.model_name, pretrained=True)
    encoder.to(device)
    
    if len(os.environ['CUDA_VISIBLE_DEVICES'].split(',')) > 1:
        #print('DataParallel')
        encoder = nn.DataParallel(encoder)

    optimizer = Adam(encoder.parameters(), lr=CFG.encoder_lr, weight_decay=CFG.weight_decay, amsgrad=False)
    scheduler = get_scheduler(optimizer)

    # Log the network weight histograms (optional)
    #wandb.watch(encoder, log='all')

    # ====================================================
    # loop
    # ====================================================
    #criterion = nn.BCEWithLogitsLoss()
    criterion = SoftCrossEntropyLoss(smooth_factor=CFG.smoothing, ignore_index=1) #['SoftCrossEntropyLoss(smooth_factor=CFG.smoothing, ignore_index=1)', DiceBCELoss()', 'DiceLoss()', 'nn.BCEWithLogitsLoss()']
    # criterion = FocalLoss()
    best_score = 0
    best_loss = np.inf
    
    for epoch in range(CFG.epochs):
        
        start_time = time.time()
        
        # train
        avg_loss, avg_tr_miou = train_fn(train_loader, encoder, criterion, optimizer, epoch, scheduler, device)

        # eval
        avg_val_loss, avg_val_miou = valid_fn(valid_loader, encoder, criterion, device)

        # scoring
        #score = get_score(valid_labels, text_preds)
        score = avg_val_miou
        
        if isinstance(scheduler, ReduceLROnPlateau):
            scheduler.step(score)
        elif isinstance(scheduler, CosineAnnealingLR):
            scheduler.step()
        elif isinstance(scheduler, CosineAnnealingWarmRestarts):
            scheduler.step()
        elif isinstance(scheduler, GradualWarmupSchedulerV2):
            scheduler.step(epoch)

        elapsed = time.time() - start_time

        LOGGER.info(f'Epoch {epoch+1} - avg_train_loss: {avg_loss:.4f}  time: {elapsed:.0f}s')
        LOGGER.info(f'Epoch {epoch+1} - Score: {avg_val_miou:.4f}')
        
   
        wandb.log({
            "avg_loss": avg_loss,
            "avg_val_loss": avg_val_loss,
            "Score": score,
            'epoch': epoch,
            "lr": optimizer.param_groups[0]["lr"],
        })

        
        model_to_save = encoder.module if hasattr(encoder, 'module') else encoder
        
        if score > best_score:
            best_score = score
            LOGGER.info(f'Epoch {epoch+1} - Save Best Score: {best_score:.4f} Model')
            torch.save({'encoder': model_to_save.state_dict(), 
                        'optimizer': optimizer.state_dict(), 
                        'scheduler': scheduler.state_dict(), 
                        #'text_preds': text_preds,
                       },
                        OUTPUT_DIR+f'{CFG.log_day}_d{CFG.dropout}_s{CFG.seed}_{CFG.model_name}_{CFG.version}_fold{fold}_best.pth')
            best_oof = avg_val_miou
            # print(best_oof)
    # return best_oof #text_preds

# Main

In [None]:
# ====================================================
# main
# ====================================================
def main(rank=0, world_size=0):

    """
    Prepare: 1.train  2.folds
    """
    #rank = 2
    #world_size = 2
    if CFG.train:
        # train
        oof_df = pd.DataFrame()
        for fold in range(CFG.n_fold):
            if fold in CFG.trn_fold:
                
                # train        
                seed_torch(seed=CFG.seed)
                wandb.init(project='Trash-Segmentation', name=f'{CFG.log_day}_{CFG.model_type}+FPN+AUG_{CFG.version}_fold{fold}', entity='choco_9966')
                config = wandb.config          # Initialize config

                config.batch_size = CFG.batch_size
                config.encoder_lr = CFG.encoder_lr
                config.seed = CFG.seed
                config.weight_decay = CFG.weight_decay
                config.gradient_accumulation_steps = CFG.gradient_accumulation_steps
                config.scheduler = CFG.scheduler
                config.model_name = CFG.model_name
                config.apex = CFG.apex
                config.num_workers = CFG.num_workers
                config.img_size = CFG.size
                config.print_freq = CFG.print_freq
                config.n_fold = CFG.n_fold
                config.train = CFG.train
                config.epochs = CFG.epochs
        #                 config.inference = CFG.inference
        #                 config.swa = CFG.swa
        #                 config.swa_start = CFG.swa_start
        #                 config.swa_lr = CFG.swa_lr
        #                 config.swa = CFG.swa
                config.smoothing = CFG.smoothing

                train_loop(fold)
                wandb.join()

In [None]:
if __name__ == '__main__':
    main()

In [None]:
def test(models, data_loader, device):
    size = 256
    transform = A.Compose([A.Resize(256, 256)])
    print('Start prediction.')
    
    file_name_list = []
    preds_array = np.empty((0, size*size), dtype=np.long)
    
    with torch.no_grad():
        for step, (imgs, image_infos) in enumerate(test_loader):

            # inference (512 x 512)
            for n, model in enumerate(models): 
                model = model.to(device)
                model.eval()
                if n == 0: 
                    outs = model(torch.stack(imgs).to(device))
                else: 
                    outs += model(torch.stack(imgs).to(device))
            oms = torch.argmax(outs.squeeze(), dim=1).detach().cpu().numpy()
            
            # resize (256 x 256)
            temp_mask = []
            for img, mask in zip(np.stack(imgs), oms):
                transformed = transform(image=img, mask=mask)
                mask = transformed['mask']
                temp_mask.append(mask)

            oms = np.array(temp_mask)
            oms = np.around(oms.reshape([oms.shape[0], size*size])).astype(int)
            preds_array = np.vstack((preds_array, oms))
            file_name_list.append([i['file_name'] for i in image_infos])
            
    print("End prediction.")
    file_names = [y for x in file_name_list for y in x]
    
    return file_names, preds_array

In [None]:
test_path = dataset_path + f'/test.json'

# test dataset
test_dataset = CustomDataLoader(data_dir=test_path, mode='test', transform=test_transform)

test_loader = torch.utils.data.DataLoader(dataset=test_dataset,
                                         batch_size=CFG.batch_size,
                                          num_workers=CFG.num_workers,
                                          pin_memory=True,
                                          shuffle=False,
                                          collate_fn=collate_fn)

In [None]:
class Encoder(nn.Module):
    def __init__(self, model_name='timm-efficientnet-b4', pretrained=False):
        super().__init__()        
        self.encoder = smp.FPN(encoder_name=model_name, encoder_weights="noisy-student", classes=12) # [imagenet, noisy-student]
    
    #@autocast()
    def forward(self, x):
        x = self.encoder(x)
        return x
    
# 추론을 실행하기 전에는 반드시 설정 (batch normalization, dropout 를 평가 모드로 설정)
# model.eval()

In [None]:
models = []
for fold in range(5): 
    model_path = f'./submission{CFG.log_day}_d{CFG.dropout}_s{CFG.seed}_{CFG.model_name}_{CFG.version}_fold{fold}_best.pth'
    checkpoint = torch.load(model_path, map_location=device)
    model = Encoder(CFG.model_name, pretrained=False)
    model.load_state_dict(checkpoint['encoder'])
    models += [model]

# Multi Scale TTA (No ttach package)

In [None]:
import argparse
import scipy
import os
import numpy as np
import json
import torch
import torch.nn as nn
import torch.nn.functional as F
from torchvision import transforms
from scipy import ndimage
from tqdm import tqdm
from math import ceil
from glob import glob
from PIL import Image
from collections import OrderedDict

In [None]:
import numpy as np
import pydensecrf.densecrf as dcrf
from pydensecrf.utils import unary_from_softmax, create_pairwise_bilateral

'''
# Default Values are
apperance_kernel = [8, 164, 100] # PairwiseBilateral [sxy, srgb, compat]  
spatial_kernel = [3, 10]         # PairwiseGaussian  [sxy, compat] 

# or if you want to to specify seprately for each XY direction and RGB color channel then

apperance_kernel = [(1.5, 1.5), (64, 64, 64), 100] # PairwiseBilateral [sxy, srgb, compat]  
spatial_kernel = [(0.5, 0.5), 10]                  # PairwiseGaussian  [sxy, compat] 
'''
# https://www.programcreek.com/python/example/106424/pydensecrf.densecrf.DenseCRF2D
h, w = 512, 512
def dense_crf(probs, img=None, n_classes=12, n_iters=10, scale_factor=1):
    c,h,w = probs.shape
    
    if img is not None:
        assert(img.shape[1:3] == (h, w))
        img = np.transpose(img,(1,2,0)).copy(order='C')
        img = np.uint8(255 * img)

    d = dcrf.DenseCRF2D(w, h, n_classes) # Define DenseCRF model.

    unary = unary_from_softmax(probs)
    unary = np.ascontiguousarray(unary)
    d.setUnaryEnergy(unary)
    d.addPairwiseGaussian(sxy=(3,3), compat=10)
    d.addPairwiseBilateral(sxy=10, srgb=5, rgbim=np.copy(img), compat=10)
    Q = d.inference(n_iters)

    preds = np.array(Q, dtype=np.float32).reshape((n_classes, h, w))
    return preds

In [None]:
scales = [0.75, 1.0, 1.25] 
def multi_scale_predict(model, image, scales, num_classes, device, flip=False):
    input_size = (image.size(2), image.size(3))
    upsample = nn.Upsample(size=input_size, mode='bilinear', align_corners=True)
    total_predictions = np.zeros((image.size(0), num_classes, image.size(2), image.size(3)))

    image = image.data.data.cpu().numpy()
    for scale in scales:
        scaled_img = ndimage.zoom(image, (1.0, 1.0, float(scale), float(scale)), order=1, prefilter=False)
        scaled_img = torch.from_numpy(scaled_img).to(device)
        scaled_prediction = upsample(model(scaled_img).cpu())

        if flip:
            fliped_img = scaled_img.flip(-1).to(device)
            fliped_predictions = upsample(model(fliped_img).cpu())
            scaled_prediction = 0.5 * (fliped_predictions.flip(-1) + scaled_prediction)
        total_predictions += scaled_prediction.data.cpu().numpy()

    total_predictions /= len(scales)
    return total_predictions

In [None]:
invTrans = transforms.Compose([ transforms.Normalize(mean = [ 0., 0., 0. ],
                                                     std = [ 1/0.229, 1/0.224, 1/0.225 ]),
                                transforms.Normalize(mean = [ -0.485, -0.456, -0.406 ],
                                                     std = [ 1., 1., 1. ]),
                               ])

In [None]:
def test(models, data_loader, device):
    size = 256
    transform = A.Compose([A.Resize(256, 256)])
    print('Start prediction.')
    
    file_name_list = []
    preds_array = np.empty((0, size*size), dtype=np.long)
    
    with torch.no_grad():
        for step, (imgs, image_infos) in enumerate(test_loader):

            # inference (512 x 512)
            for n, model in enumerate(models): 
                model = model.to(device)
                model.eval()
                if n == 0: 
                    outs = multi_scale_predict(model, torch.stack(imgs).to(device), scales, 12, device, flip=True)
                else: 
                    outs += multi_scale_predict(model, torch.stack(imgs).to(device), scales, 12, device, flip=True)

                    
            probs_array = []
            for image, prob in zip(imgs, outs):
                prob = F.softmax(torch.from_numpy(prob), dim=0)                    
                prob = dense_crf(img=np.around(invTrans(image).cpu().numpy()).astype(float), probs=prob.cpu().numpy())
                probs_array += [np.argmax(prob, axis=0)]
                    
            oms = np.array(probs_array)
            
            # oms = np.argmax(outs.squeeze(), axis=1)            
            # resize (256 x 256)
            temp_mask = []
            for img, mask in zip(np.stack(imgs), oms):
                transformed = transform(image=img, mask=mask)
                mask = transformed['mask']
                temp_mask.append(mask)

            oms = np.array(temp_mask)
            oms = np.around(oms.reshape([oms.shape[0], size*size])).astype(int)
            preds_array = np.vstack((preds_array, oms))
            file_name_list.append([i['file_name'] for i in image_infos])
            
    print("End prediction.")
    file_names = [y for x in file_name_list for y in x]
    
    return file_names, preds_array

In [None]:
%%time
# sample_submisson.csv 열기
submission = pd.read_csv('./submission/sample_submission.csv', index_col=None)

# test set에 대한 prediction
model = model.to(device)
file_names, preds = test(models, test_loader, device)

# PredictionString 대입
for file_name, string in zip(file_names, preds):
    submission = submission.append({"image_id" : file_name, "PredictionString" : ' '.join(str(e) for e in string.tolist())}, 
                                   ignore_index=True)

submission.to_csv("./submission/0505_EfficientFPNB4_5FOLD_FLIP_CRF2.csv", index=False)

# Multi Scale TTA (ttach package)

In [None]:
import ttach as tta

transforms = tta.Compose(
    [
        tta.HorizontalFlip(),
        tta.VerticalFlip(),
        tta.Scale(scales=[0.75, 1, 1.25]), 
        tta.Multiply(factors=[0.9, 1, 1.1]),        
    ]
)

In [None]:
models = []
for fold in range(5): 
    model_path = f'./submission{CFG.log_day}_d{CFG.dropout}_s{CFG.seed}_{CFG.model_name}_{CFG.version}_fold{fold}_best.pth'
    checkpoint = torch.load(model_path, map_location=device)
    model = Encoder(CFG.model_name, pretrained=False)
    model.load_state_dict(checkpoint['encoder'])
    tta_model = tta.SegmentationTTAWrapper(model, transforms)
    models += [tta_model]

In [None]:
def test(models, data_loader, device):
    size = 256
    transform = A.Compose([A.Resize(256, 256)])
    print('Start prediction.')
    
    file_name_list = []
    preds_array = np.empty((0, size*size), dtype=np.long)
    
    with torch.no_grad():
        for step, (imgs, image_infos) in enumerate(test_loader):

            # inference (512 x 512)
            for n, model in enumerate(models): 
                model = model.to(device)
                model.eval()
                if n == 0: 
                    outs = model(torch.stack(imgs).to(device))
                else: 
                    outs += model(torch.stack(imgs).to(device))

                    
            probs_array = []
            for image, prob in zip(imgs, outs):
                prob = F.softmax(prob, dim=0)                    
                prob = dense_crf(img=np.around(invTrans(image).cpu().numpy()).astype(float), probs=prob.cpu().numpy())
                probs_array += [np.argmax(prob, axis=0)]
                    
            oms = np.array(probs_array)
            
            # oms = np.argmax(outs.squeeze(), axis=1)            
            # resize (256 x 256)
            temp_mask = []
            for img, mask in zip(np.stack(imgs), oms):
                transformed = transform(image=img, mask=mask)
                mask = transformed['mask']
                temp_mask.append(mask)

            oms = np.array(temp_mask)
            oms = np.around(oms.reshape([oms.shape[0], size*size])).astype(int)
            preds_array = np.vstack((preds_array, oms))
            file_name_list.append([i['file_name'] for i in image_infos])
            
    print("End prediction.")
    file_names = [y for x in file_name_list for y in x]
    
    return file_names, preds_array

In [None]:
%%time
# sample_submisson.csv 열기
submission = pd.read_csv('./submission/sample_submission.csv', index_col=None)

# test set에 대한 prediction
model = model.to(device)
file_names, preds = test(models, test_loader, device)

# PredictionString 대입
for file_name, string in zip(file_names, preds):
    submission = submission.append({"image_id" : file_name, "PredictionString" : ' '.join(str(e) for e in string.tolist())}, 
                                   ignore_index=True)

submission.to_csv("./submission/0505_EfficientFPNB4_5FOLD_FLIP_CRF3.csv", index=False)