In [1]:
from sklearn.metrics import roc_auc_score, accuracy_score, f1_score, log_loss
import pickle
from torch.utils.data import DataLoader
from torch.cuda.amp import autocast, GradScaler
import warnings
import sys
import pandas as pd
import os
import gc
import sys
import math
import time
import random
import shutil
from pathlib import Path
from contextlib import contextmanager
from collections import defaultdict, Counter
import cv2

import scipy as sp
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
from tqdm.auto import tqdm
from functools import partial

import argparse
import importlib
import torch
import torch.nn as nn
from torch.optim import Adam, SGD, AdamW

import datetime
import wandb

In [2]:
!pip install /kaggle/input/einops/einops-0.6.1-py3-none-any.whl
!pip install /kaggle/input/monai-packages/monai-1.1.0-202212191849-py3-none-any.whl["einops"]

Processing /kaggle/input/einops/einops-0.6.1-py3-none-any.whl
Installing collected packages: einops
Successfully installed einops-0.6.1
[0mProcessing /kaggle/input/monai-packages/monai-1.1.0-202212191849-py3-none-any.whl
Installing collected packages: monai
Successfully installed monai-1.1.0
[0m

In [3]:
sys.path.append('/kaggle/input/pretrainedmodels/pretrainedmodels-0.7.4')
sys.path.append('/kaggle/input/efficientnet-pytorch/EfficientNet-PyTorch-master')
sys.path.append('/kaggle/input/timm-pytorch-image-models/pytorch-image-models-master')
sys.path.append('/kaggle/input/segmentation-models-pytorch/segmentation_models.pytorch-master')
sys.path.append('/kaggle/input/unet3d/pytorch3dunet/pytorch3dunet')
sys.path.append('/kaggle/input/unet3d/pytorch3dunet')
sys.path.append('/kaggle/input/unet3d/')

import segmentation_models_pytorch as smp
from unet3d.model import get_model
from unetr import UNETR

In [4]:
import numpy as np
from torch.utils.data import DataLoader, Dataset
import cv2
import torch
import os
import albumentations as A
from albumentations.pytorch import ToTensorV2
from albumentations import ImageOnlyTransform

## config

In [5]:
import os
import albumentations as A
from albumentations.pytorch import ToTensorV2

class CFG:
    # ============== comp exp name =============
    comp_name = 'vesuvius'

    # comp_dir_path = './'
    comp_dir_path = '/kaggle/input/'
    comp_folder_name = 'vesuvius-challenge-ink-detection'
    # comp_dataset_path = f'{comp_dir_path}datasets/{comp_folder_name}/'
    comp_dataset_path = f'{comp_dir_path}{comp_folder_name}/'
    
    exp_name = '3d_unet_subv2'

    # ============== pred target =============
    target_size = 1

    # ============== model cfg =============
    model_name = '3d_unet_segformer'
    backbone = 'None'
#     backbone = 'se_resnext50_32x4d'

    in_chans = 16
    # ============== training cfg =============
    size = 1024
    tile_size = 1024
    stride = tile_size // 4

    batch_size = 3 # 32
    use_amp = True

    scheduler = 'GradualWarmupSchedulerV2'
    # scheduler = 'CosineAnnealingLR'
    epochs = 15

    warmup_factor = 10
    lr = 1e-4 / warmup_factor

    # ============== fold =============
    valid_id = 2

    objective_cv = 'binary'  # 'binary', 'multiclass', 'regression'
    metric_direction = 'maximize'  # maximize, 'minimize'
    # metrics = 'dice_coef'

    # ============== fixed =============
    pretrained = True
    inf_weight = 'best'  # 'best'

    min_lr = 1e-6
    weight_decay = 1e-6
    max_grad_norm = 1000

    print_freq = 50
    num_workers = 2

    seed = 42

    # ============== augmentation =============
    train_aug_list = [
        # A.RandomResizedCrop(
        #     size, size, scale=(0.85, 1.0)),
        A.Resize(size, size),
        A.HorizontalFlip(p=0.5),
        A.VerticalFlip(p=0.5),
        A.RandomBrightnessContrast(p=0.75),
        A.ShiftScaleRotate(p=0.75),
        A.OneOf([
                A.GaussNoise(var_limit=[10, 50]),
                A.GaussianBlur(),
                A.MotionBlur(),
                ], p=0.4),
        A.GridDistortion(num_steps=5, distort_limit=0.3, p=0.5),
        A.CoarseDropout(max_holes=1, max_width=int(size * 0.3), max_height=int(size * 0.3), 
                        mask_fill_value=0, p=0.5),
        # A.Cutout(max_h_size=int(size * 0.6),
        #          max_w_size=int(size * 0.6), num_holes=1, p=1.0),
        A.Normalize(
            mean= [0] * in_chans,
            std= [1] * in_chans
        ),
        ToTensorV2(transpose_mask=True),
    ]

    valid_aug_list = [
        A.Resize(size, size),
        A.Normalize(
            mean= [0] * in_chans,
            std= [1] * in_chans
        ),
        ToTensorV2(transpose_mask=True),
    ]


In [6]:
IS_DEBUG = False
mode = 'train' if IS_DEBUG else 'test'
TH = 0.5

In [7]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

## helper

In [8]:
# ref.: https://www.kaggle.com/stainsby/fast-tested-rle
def rle(img):
    '''
    img: numpy array, 1 - mask, 0 - background
    Returns run length as string formated
    '''
    pixels = img.flatten()
    # pixels = (pixels >= thr).astype(int)
    
    pixels = np.concatenate([[0], pixels, [0]])
    runs = np.where(pixels[1:] != pixels[:-1])[0] + 1
    runs[1::2] -= runs[::2]
    return ' '.join(str(x) for x in runs)

## dataset

In [9]:
def read_image(fragment_id):
    images = []

#     idxs = range(65)
    mid = 65 // 2
    start = mid - CFG.in_chans // 2
    end = mid + CFG.in_chans // 2
    idxs = range(start, end)

    for i in tqdm(idxs):
        
        image = cv2.imread(CFG.comp_dataset_path + f"{mode}/{fragment_id}/surface_volume/{i:02}.tif", 0)

        pad0 = (CFG.tile_size - image.shape[0] % CFG.tile_size)
        pad1 = (CFG.tile_size - image.shape[1] % CFG.tile_size)

        image = np.pad(image, [(0, pad0), (0, pad1)], constant_values=0)

        images.append(image)
    images = np.stack(images, axis=2)
    
    return images

In [10]:
def get_transforms(data, cfg):
    if data == 'train':
        aug = A.Compose(cfg.train_aug_list)
    elif data == 'valid':
        aug = A.Compose(cfg.valid_aug_list)

    # print(aug)
    return aug

class CustomDataset(Dataset):
    def __init__(self, images, cfg, labels=None, transform=None):
        self.images = np.array(images)
        self.cfg = cfg
        self.labels = labels
        self.transform = transform

    def __len__(self):
        # return len(self.xyxys)
        return len(self.images)

    def __getitem__(self, idx):
        image = np.load(self.images[idx])
        data = self.transform(image=image)
        image = data['image']
        return image[None, :, :, :]


In [11]:
def make_test_dataset(fragment_id):
    test_images = read_image(fragment_id)
    
    x1_list = list(range(0, test_images.shape[1]-CFG.tile_size+1, CFG.stride))
    y1_list = list(range(0, test_images.shape[0]-CFG.tile_size+1, CFG.stride))
    
    test_images_list = []
    xyxys = []
    for y1 in y1_list:
        for x1 in x1_list:
            y2 = y1 + CFG.tile_size
            x2 = x1 + CFG.tile_size
            if test_images[y1:y2, x1:x2].max() != 0:
                if not os.path.exists(f"{x1}_{y1}_{x2}_{y2}.npy"):
                    np.save(f"{x1}_{y1}_{x2}_{y2}.npy", test_images[y1:y2, x1:x2])
                test_images_list.append(f"{x1}_{y1}_{x2}_{y2}.npy")
                xyxys.append((x1, y1, x2, y2))
    del test_images
    gc.collect()
    xyxys = np.stack(xyxys)
            
    test_dataset = CustomDataset(test_images_list, CFG, transform=get_transforms(data='valid', cfg=CFG))
    
    test_loader = DataLoader(test_dataset,
                          batch_size=CFG.batch_size,
                          shuffle=False,
                          num_workers=CFG.num_workers, pin_memory=True, drop_last=False)
    
    return test_loader, xyxys

## model

In [12]:
from transformers import SegformerForSemanticSegmentation, SegformerModel, SegformerConfig

In [13]:
cnn_3d_segformer_b1_config = SegformerConfig(**{
  "architectures": [
    "SegformerForImageClassification"
  ],
  "attention_probs_dropout_prob": 0.0,
  "classifier_dropout_prob": 0.1,
  "decoder_hidden_size": 256,
  "depths": [
    2,
    2,
    2,
    2
  ],
  "downsampling_rates": [
    1,
    4,
    8,
    16
  ],
  "drop_path_rate": 0.1,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.0,
  "hidden_sizes": [
    64,
    128,
    320,
    512
  ],
  "image_size": 224,
  "initializer_range": 0.02,
  "layer_norm_eps": 1e-06,
  "mlp_ratios": [
    4,
    4,
    4,
    4
  ],
  "model_type": "segformer",
  "num_attention_heads": [
    1,
    2,
    5,
    8
  ],
  "num_channels": 32,
  "num_encoder_blocks": 4,
  "patch_sizes": [
    7,
    3,
    3,
    3
  ],
  "sr_ratios": [
    8,
    4,
    2,
    1
  ],
  "strides": [
    4,
    2,
    2,
    2
  ],
  "torch_dtype": "float32",
  "transformers_version": "4.12.0.dev0",
  "num_labels":1,
})

In [14]:
cnn_3d_segformer_b2_config = SegformerConfig(**{
  "architectures": [
    "SegformerForImageClassification"
  ],
  "attention_probs_dropout_prob": 0.0,
  "classifier_dropout_prob": 0.1,
  "decoder_hidden_size": 768,
  "depths": [
    3,
    4,
    6,
    3
  ],
  "downsampling_rates": [
    1,
    4,
    8,
    16
  ],
  "drop_path_rate": 0.1,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.0,
  "hidden_sizes": [
    64,
    128,
    320,
    512
  ],
  "image_size": 224,
  "initializer_range": 0.02,
  "layer_norm_eps": 1e-06,
  "mlp_ratios": [
    4,
    4,
    4,
    4
  ],
  "model_type": "segformer",
  "num_attention_heads": [
    1,
    2,
    5,
    8
  ],
  "num_channels": 32,
  "num_encoder_blocks": 4,
  "patch_sizes": [
    7,
    3,
    3,
    3
  ],
  "sr_ratios": [
    8,
    4,
    2,
    1
  ],
  "strides": [
    4,
    2,
    2,
    2
  ],
  "torch_dtype": "float32",
  "transformers_version": "4.12.0.dev0",
  "num_labels":1
})

In [15]:
cnn_3d_segformer_b4_config = SegformerConfig(**{
  "architectures": [
    "SegformerForImageClassification"
  ],
  "attention_probs_dropout_prob": 0.0,
  "classifier_dropout_prob": 0.1,
  "decoder_hidden_size": 768,
  "depths": [
    3,
    8,
    27,
    3
  ],
  "downsampling_rates": [
    1,
    4,
    8,
    16
  ],
  "drop_path_rate": 0.1,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.0,
  "hidden_sizes": [
    64,
    128,
    320,
    512
  ],
  "image_size": 224,
  "initializer_range": 0.02,
  "layer_norm_eps": 1e-06,
  "mlp_ratios": [
    4,
    4,
    4,
    4
  ],
  "model_type": "segformer",
  "num_attention_heads": [
    1,
    2,
    5,
    8
  ],
  "num_channels": 32,
  "num_encoder_blocks": 4,
  "patch_sizes": [
    7,
    3,
    3,
    3
  ],
  "sr_ratios": [
    8,
    4,
    2,
    1
  ],
  "strides": [
    4,
    2,
    2,
    2
  ],
  "torch_dtype": "float32",
  "transformers_version": "4.12.0.dev0",
  "num_labels":1
})

In [16]:
cnn_3d_segformer_b5_config = SegformerConfig(**{
  "architectures": [
    "SegformerForImageClassification"
  ],
  "attention_probs_dropout_prob": 0.0,
  "classifier_dropout_prob": 0.1,
  "decoder_hidden_size": 768,
  "depths": [
    3,
    6,
    40,
    3
  ],
  "downsampling_rates": [
    1,
    4,
    8,
    16
  ],
  "drop_path_rate": 0.1,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.0,
  "hidden_sizes": [
    64,
    128,
    320,
    512
  ],
  "image_size": 224,
  "initializer_range": 0.02,
  "layer_norm_eps": 1e-06,
  "mlp_ratios": [
    4,
    4,
    4,
    4
  ],
  "model_type": "segformer",
  "num_attention_heads": [
    1,
    2,
    5,
    8
  ],
  "num_channels": 32,
  "num_encoder_blocks": 4,
  "patch_sizes": [
    7,
    3,
    3,
    3
  ],
  "sr_ratios": [
    8,
    4,
    2,
    1
  ],
  "strides": [
    4,
    2,
    2,
    2
  ],
  "torch_dtype": "float32",
  "transformers_version": "4.12.0.dev0",
  "num_labels":1
})

In [17]:
cnn_3d_config = SegformerConfig(**{
  "architectures": [
    "SegformerForImageClassification"
  ],
  "attention_probs_dropout_prob": 0.0,
  "classifier_dropout_prob": 0.1,
  "decoder_hidden_size": 768,
  "depths": [
    3,
    4,
    18,
    3
  ],
  "downsampling_rates": [
    1,
    4,
    8,
    16
  ],
  "drop_path_rate": 0.1,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.0,
  "hidden_sizes": [
    64,
    128,
    320,
    512
  ],

  "image_size": 224,
  "initializer_range": 0.02,
  
  "layer_norm_eps": 1e-06,
  "mlp_ratios": [
    4,
    4,
    4,
    4
  ],
  "model_type": "segformer",
  "num_attention_heads": [
    1,
    2,
    5,
    8
  ],
  "num_encoder_blocks": 4,
  "patch_sizes": [
    7,
    3,
    3,
    3
  ],
  "sr_ratios": [
    8,
    4,
    2,
    1
  ],
  "strides": [
    4,
    2,
    2,
    2
  ],
  "torch_dtype": "float32",
  "num_labels":1,
  "num_channels":32})
cnn_3d_more_filters_config = SegformerConfig(**{
  "architectures": [
    "SegformerForImageClassification"
  ],
  "attention_probs_dropout_prob": 0.0,
  "classifier_dropout_prob": 0.1,
  "decoder_hidden_size": 768,
  "depths": [
    3,
    4,
    18,
    3
  ],
  "downsampling_rates": [
    1,
    4,
    8,
    16
  ],
  "drop_path_rate": 0.1,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.0,
  "hidden_sizes": [
    64,
    128,
    320,
    512
  ],

  "image_size": 224,
  "initializer_range": 0.02,
  
  "layer_norm_eps": 1e-06,
  "mlp_ratios": [
    4,
    4,
    4,
    4
  ],
  "model_type": "segformer",
  "num_attention_heads": [
    1,
    2,
    5,
    8
  ],
  "num_encoder_blocks": 4,
  "patch_sizes": [
    7,
    3,
    3,
    3
  ],
  "sr_ratios": [
    8,
    4,
    2,
    1
  ],
  "strides": [
    4,
    2,
    2,
    2
  ],
  "torch_dtype": "float32",
  "num_labels":1,
  "num_channels":64})

unet_3d_config = SegformerConfig(**{
  "architectures": [
    "SegformerForImageClassification"
  ],
  "attention_probs_dropout_prob": 0.0,
  "classifier_dropout_prob": 0.1,
  "decoder_hidden_size": 768,
  "depths": [
    3,
    4,
    18,
    3
  ],
  "downsampling_rates": [
    1,
    4,
    8,
    16
  ],
  "drop_path_rate": 0.1,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.0,
  "hidden_sizes": [
    64,
    128,
    320,
    512
  ],

  "image_size": 224,
  "initializer_range": 0.02,
  
  "layer_norm_eps": 1e-06,
  "mlp_ratios": [
    4,
    4,
    4,
    4
  ],
  "model_type": "segformer",
  "num_attention_heads": [
    1,
    2,
    5,
    8
  ],
  "num_channels": 3,
  "num_encoder_blocks": 4,
  "patch_sizes": [
    7,
    3,
    3,
    3
  ],
  "sr_ratios": [
    8,
    4,
    2,
    1
  ],
  "strides": [
    4,
    2,
    2,
    2
  ],
  "torch_dtype": "float32",
  "num_labels":1,
  "num_channels":16})

In [18]:
unet_3d_jumbo_config = SegformerConfig(**{
  "architectures": [
    "SegformerForImageClassification"
  ],
  "attention_probs_dropout_prob": 0.0,
  "classifier_dropout_prob": 0.1,
  "decoder_hidden_size": 768,
  "depths": [
    3,
    6,
    40,
    3
  ],
  "downsampling_rates": [
    1,
    4,
    8,
    16
  ],
  "drop_path_rate": 0.1,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.0,
  "hidden_sizes": [
    64,
    128,
    320,
    512
  ],
  "image_size": 224,
  "initializer_range": 0.02,
  "layer_norm_eps": 1e-06,
  "mlp_ratios": [
    4,
    4,
    4,
    4
  ],
  "model_type": "segformer",
  "num_attention_heads": [
    1,
    2,
    5,
    8
  ],
  "num_channels": 32,
  "num_encoder_blocks": 4,
  "patch_sizes": [
    7,
    3,
    3,
    3
  ],
  "sr_ratios": [
    8,
    4,
    2,
    1
  ],
  "strides": [
    4,
    2,
    2,
    2
  ],
  "torch_dtype": "float32",
  "transformers_version": "4.12.0.dev0",
  "num_labels":1
})

unetr_multiclass_config = SegformerConfig(**{
  "architectures": [
    "SegformerForImageClassification"
  ],
  "attention_probs_dropout_prob": 0.0,
  "classifier_dropout_prob": 0.1,
  "decoder_hidden_size": 768,
  "depths": [
    3,
    6,
    40,
    3
  ],
  "downsampling_rates": [
    1,
    4,
    8,
    16
  ],
  "drop_path_rate": 0.1,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.0,
  "hidden_sizes": [
    64,
    128,
    320,
    512
  ],
  "image_size": 224,
  "initializer_range": 0.02,
  "layer_norm_eps": 1e-06,
  "mlp_ratios": [
    4,
    4,
    4,
    4
  ],
  "model_type": "segformer",
  "num_attention_heads": [
    1,
    2,
    5,
    8
  ],
  "num_channels": 32,
  "num_encoder_blocks": 4,
  "patch_sizes": [
    7,
    3,
    3,
    3
  ],
  "sr_ratios": [
    8,
    4,
    2,
    1
  ],
  "strides": [
    4,
    2,
    2,
    2
  ],
  "torch_dtype": "float32",
  "transformers_version": "4.12.0.dev0",
  "num_labels":3
})

In [19]:
from unetr import UNETR
class UNETR_Segformer(nn.Module):
    def __init__(self, cfg, dropout = .2):
        super().__init__()
        self.cfg = cfg
        self.dropout = nn.Dropout2d(dropout)
        self.encoder = UNETR(
            in_channels=1,
            out_channels=32,
            img_size=(16, self.cfg.size, self.cfg.size),
            conv_block=True
        )
        self.encoder_2d = SegformerForSemanticSegmentation(self.cfg.segformer_config)
        self.upscaler1 = nn.ConvTranspose2d(
            1, 1, kernel_size=(4, 4), stride=2, padding=1)
        self.upscaler2 = nn.ConvTranspose2d(
            1, 1, kernel_size=(4, 4), stride=2, padding=1)

    def forward(self, image):
        output = self.encoder(image).max(axis=2)[0]
        output = self.dropout(output)
        output = self.encoder_2d(output).logits
        output = self.upscaler1(output)
        output = self.upscaler2(output)
        return output
class UNETR_SegformerMC(nn.Module):
    def __init__(self, cfg, dropout = .2):
        super().__init__()
        self.cfg = cfg
        self.dropout = nn.Dropout2d(dropout)
        self.encoder = UNETR(
            in_channels=1,
            out_channels=32,
            img_size=(16, self.cfg.size, self.cfg.size),
#             conv_block=True
        )
        self.encoder_2d = SegformerForSemanticSegmentation(self.cfg.segformer_config)
        self.upscaler1 = nn.ConvTranspose2d(
            3, 3, kernel_size=(4, 4), stride=2, padding=1)
        self.upscaler2 = nn.ConvTranspose2d(
            3, 3, kernel_size=(4, 4), stride=2, padding=1)

    def forward(self, image):
        output = self.encoder(image).max(axis=2)[0]
        output = self.dropout(output)
        output = self.encoder_2d(output).logits
        output = self.upscaler1(output)
        output = self.upscaler2(output)
        return output[:, 2:, :, :]
    
class cnn3d_segformer(nn.Module):
    def __init__(self, cfg):
        super().__init__()
        self.cfg = cfg
        
        self.conv3d_1 = nn.Conv3d(1, 4, kernel_size=(3, 3, 3), stride=1, padding=(1, 1, 1))
        self.conv3d_2 = nn.Conv3d(4, 8, kernel_size=(3, 3, 3), stride=1, padding=(1, 1, 1))
        self.conv3d_3 = nn.Conv3d(8, 16, kernel_size=(3, 3, 3), stride=1, padding=(1, 1, 1))
        self.conv3d_4 = nn.Conv3d(16, 32, kernel_size=(3, 3, 3), stride=1, padding=(1, 1, 1))

        self.xy_encoder_2d = SegformerForSemanticSegmentation(self.cfg.segformer_config)
        self.upscaler1 = nn.ConvTranspose2d(1, 1, kernel_size=(4, 4), stride = 2, padding=1)
        self.upscaler2 = nn.ConvTranspose2d(1, 1, kernel_size=(4, 4), stride = 2, padding=1)
        
    def forward(self, image):
        output = self.conv3d_1(image)
        output = self.conv3d_2(output)
        output = self.conv3d_3(output)
        output = self.conv3d_4(output).max(axis = 2)[0]
        output = self.xy_encoder_2d(output).logits
        output = self.upscaler1(output)
        output = self.upscaler2(output)
        return output
    
class cnn3d_segformer_more_filters(nn.Module):
    def __init__(self, cfg):
        super().__init__()
        self.cfg = cfg
        
        self.conv3d_1 = nn.Conv3d(1, 4, kernel_size=(3, 3, 3), stride=1, padding=(1, 1, 1))
        self.conv3d_2 = nn.Conv3d(4, 8, kernel_size=(3, 3, 3), stride=1, padding=(1, 1, 1))
        self.conv3d_3 = nn.Conv3d(8, 16, kernel_size=(3, 3, 3), stride=1, padding=(1, 1, 1))
        self.conv3d_4 = nn.Conv3d(16, 64, kernel_size=(3, 3, 3), stride=1, padding=(1, 1, 1))

        self.xy_encoder_2d = SegformerForSemanticSegmentation(self.cfg.segformer_config)
        self.upscaler1 = nn.ConvTranspose2d(1, 1, kernel_size=(4, 4), stride = 2, padding=1)
        self.upscaler2 = nn.ConvTranspose2d(1, 1, kernel_size=(4, 4), stride = 2, padding=1)
        
    def forward(self, image):
        output = self.conv3d_1(image)
        output = self.conv3d_2(output)
        output = self.conv3d_3(output)
        output = self.conv3d_4(output).max(axis = 2)[0]
        output = self.xy_encoder_2d(output).logits
        output = self.upscaler1(output)
        output = self.upscaler2(output)
        return output
    
class unet3d_segformer(nn.Module):
    def __init__(self, cfg):
        super().__init__()
        self.cfg = cfg
        
        self.model = get_model({"name":"UNet3D", "in_channels":1, "out_channels":16, "f_maps":8, "num_groups":4, "is_segmentation":False})
        self.encoder_2d = SegformerForSemanticSegmentation(self.cfg.segformer_config)
        self.upscaler1 = nn.ConvTranspose2d(1, 1, kernel_size=(4, 4), stride = 2, padding=1)
        self.upscaler2 = nn.ConvTranspose2d(1, 1, kernel_size=(4, 4), stride = 2, padding=1)
    def forward(self, image):
        output = self.model(image).max(axis = 2)[0]
        output = self.encoder_2d(output).logits
        output = self.upscaler1(output)
        output = self.upscaler2(output)
        return output
    
class unet3d_segformer_jumbo(nn.Module):
    def __init__(self, cfg):
        super().__init__()
        self.cfg = cfg
        
        self.model = get_model({"name":"UNet3D", "in_channels":1, "out_channels":32, "f_maps":8, "num_groups":4, "is_segmentation":False})
        self.encoder_2d = SegformerForSemanticSegmentation(self.cfg.segformer_config)
        self.upscaler1 = nn.ConvTranspose2d(1, 1, kernel_size=(4, 4), stride = 2, padding=1)
        self.upscaler2 = nn.ConvTranspose2d(1, 1, kernel_size=(4, 4), stride = 2, padding=1)
    def forward(self, image):
        output = self.model(image).max(axis = 2)[0]
        output = self.encoder_2d(output).logits
        output = self.upscaler1(output)
        output = self.upscaler2(output)
        return output
    
def build_model(cfg, model_arch = None):
    print('model_name', cfg.model_name)
    if model_arch == "cnn3d":
        model = cnn3d_segformer(cfg)
    if model_arch == "cnn3d_more_filters":
        model = cnn3d_segformer_more_filters(cfg)
    if model_arch == "unet3d":
        model = unet3d_segformer(cfg)
    if model_arch == "unet3d_jumbo":
        model = unet3d_segformer_jumbo(cfg)
    if model_arch == "unetr":
        model = UNETR_Segformer(cfg)
    if model_arch == "unetr_mc":
        model = UNETR_SegformerMC(cfg)

    return model

In [20]:
class EnsembleModel:
    def __init__(self, use_tta=False):
        self.models = []
        self.use_tta = use_tta
    def tta_infer(self, model:nn.Module, x):
        #x.shape=(batch,c,h,w)
        shape=x.shape
        x=[x,*[torch.rot90(x,k=i,dims=(-2,-1)) for i in range(1,4)]]
        x=[model(single_x) for single_x in x]
        x=torch.cat(x,dim=0)
        x=x.reshape(4,shape[0],*shape[3:])
        x=[torch.rot90(x[i],k=-i,dims=(-2,-1)) for i in range(4)]
        x=torch.stack(x,dim=0)
        return x.mean(0)
                
    def __call__(self, x):
        if self.use_tta:
            outputs = [self.tta_infer(model, x).to('cpu').numpy()
                   for model in self.models]
        else:
            outputs = [model(x).mean(axis = 1).to('cpu').numpy()
                       for model in self.models]
        avg_preds = np.mean(outputs, axis=0)
        return avg_preds

    def add_model(self, model):
        self.models.append(model)

def build_ensemble_model(model_path, model_arch):
    model = EnsembleModel(use_tta = True)
    _model = build_model(CFG, model_arch)
    _model.to(device)
    state = torch.load(model_path)
    try:
        _model.load_state_dict(state)
    except:
        _model = nn.DataParallel(_model)
        _model.load_state_dict(state)
    _model.eval()

    model.add_model(_model)
    
    return model

In [21]:
if mode == 'test':
    fragment_ids = sorted(os.listdir(CFG.comp_dataset_path + mode))
else:
    fragment_ids = [3]

In [22]:
model_tuples = [
#     {"model_arch": "cnn3d", "tile_size": 1024, "size": 1024, "batch_size": 3,
#      "weight_path": "/kaggle/input/3d-unet/3dcnn_segformer_1024_3dcnn_segformer_best.pth", "segformer_config": cnn_3d_config, "score": .75},
    {"model_arch": "unet3d", "tile_size": 1024, "size": 1024, "batch_size": 1,
     "weight_path": "/kaggle/input/3d-unet/3d_unet_segformer_1024_3d_unet_segformer_final_all_train.pth", "segformer_config": unet_3d_config, "score":.78},
#     {"model_arch": "unet3d", "tile_size": 512, "size": 512, "batch_size": 4,
#      "weight_path": "/kaggle/input/3d-unet/3d_unet_segformer_512_3d_unet_segformer_final.pth", "segformer_config": unet_3d_config, "score":.77},
#     {"model_arch": "cnn3d", "tile_size": 1024, "size": 1024, "batch_size": 3,
#      "weight_path": "/kaggle/input/3d-unet/3dcnn_segformer_1024_full_train_3dcnn_segformer_final.pth", "segformer_config": cnn_3d_config, "score":.76},
#     {"model_arch": "cnn3d", "tile_size": 1024, "size": 1024, "batch_size": 3,
#      "weight_path": "/kaggle/input/3d-unet/3dcnn_segformer_1024_swa_slow_3dcnn_segformer_final_swa.pth", "segformer_config": cnn_3d_config, "score".74},
#     {"model_arch": "unet3d", "tile_size": 1024, "size": 1024, "batch_size": 1,
#      "weight_path": "/kaggle/input/3d-unet/3dunet_segformer_1024_swa_slow_3dunet_segformer_final_swa.pth", "segformer_config": unet_3d_config, "score":.75},
    {"model_arch": "unet3d", "tile_size": 1024, "size": 1024, "batch_size": 1,
     "weight_path": "/kaggle/input/3d-unet/3dunet_segformer_1024_swa_slow_all_train_3dunet_segformer_final.pth", "segformer_config": unet_3d_config, "score":.78},
#     {"model_arch": "cnn3d", "tile_size": 1024, "size": 1024, "batch_size": 3,
#      "weight_path": "/kaggle/input/3d-unet/3dcnn_segformer_all_train_swa_3dcnn_segformer_10_final.pth", "segformer_config": cnn_3d_config, "score":.76},
#     {"model_arch": "cnn3d", "tile_size": 1024, "size": 1024, "batch_size": 3,
#      "weight_path": "/kaggle/input/3d-unet/3dcnn_segformer_all_train_swa_3dcnn_segformer_15_final.pth", "segformer_config": cnn_3d_config, "score":.76},
#     {"model_arch": "cnn3d", "tile_size": 1024, "size": 1024, "batch_size": 3,
#      "weight_path": "/kaggle/input/3d-unet/3dcnn_segformer_all_train_swa_3dcnn_segformer_20_final.pth", "segformer_config": cnn_3d_config, "score":.77},
#     {"model_arch": "cnn3d", "tile_size": 1024, "size": 1024, "batch_size": 3,
#      "weight_path": "/kaggle/input/3d-unet/3dcnn_segformer_all_train_swa_3dcnn_segformer_25_final.pth", "segformer_config": cnn_3d_config},
#     {"model_arch": "cnn3d", "tile_size": 1024, "size": 1024, "batch_size": 3,
#      "weight_path": "/kaggle/input/3d-unet/3dcnn_segformer_all_train_swa_3dcnn_segformer_final_swa.pth", "segformer_config": cnn_3d_config, "score":.78},
#     {"model_arch": "cnn3d", "tile_size": 1024, "size": 1024, "batch_size": 5,
#      "weight_path": "/kaggle/input/3d-unet/b1_3dcnn_segformer_b1_final_swa.pth", "segformer_config": cnn_3d_segformer_b1_config, "score":.71},
#     {"model_arch": "cnn3d", "tile_size": 1024, "size": 1024, "batch_size": 4,
#      "weight_path": "/kaggle/input/3d-unet/b2_3dcnn_segformer_b2_final_swa.pth", "segformer_config": cnn_3d_segformer_b2_config, "score":.68},
#     {"model_arch": "cnn3d", "tile_size": 1024, "size": 1024, "batch_size": 2,
#      "weight_path": "/kaggle/input/3d-unet/3dcnn_segformer_b4_3dcnn_segformer_b4_final_swa.pth", "segformer_config": cnn_3d_segformer_b4_config, "score":.74},
#     {"model_arch": "cnn3d", "tile_size": 1024, "size": 1024, "batch_size": 1,
#      "weight_path": "/kaggle/input/3d-unet/3dcnn_bigsegformer_3dcnn_bigsegformer_final.pth", "segformer_config": cnn_3d_segformer_b5_config, "score":.76},
#     {"model_arch": "cnn3d", "tile_size": 1024, "size": 1024, "batch_size": 2,
#      "weight_path": "/kaggle/input/3d-unet/b5_long_train_all_frags_3dcnn_segformer_b5_final_swa.pth", "segformer_config": cnn_3d_segformer_b5_config, "score": .77},
#     {"model_arch": "cnn3d", "tile_size": 1024, "size": 1024, "batch_size": 1,
#      "weight_path": "/kaggle/input/3d-unet/b5_long_train_all_frags_3dcnn_segformer_b5_10_final.pth", "segformer_config": cnn_3d_segformer_b5_config, "score": .74},
#     {"model_arch": "cnn3d", "tile_size": 1024, "size": 1024, "batch_size": 1,
#      "weight_path": "/kaggle/input/3d-unet/b5_long_train_all_frags_3dcnn_segformer_b5_30_final.pth", "segformer_config": cnn_3d_segformer_b5_config, "score":.76},
#     {"model_arch": "cnn3d_more_filters", "tile_size": 1024, "size": 1024, "batch_size": 1,
#      "weight_path": "/kaggle/input/3d-unet/b3_more_fmaps_3dcnn_segformerb364_final_swa.pth", "segformer_config": cnn_3d_more_filters_config, "score": .74},
    {"model_arch": "cnn3d_more_filters", "tile_size": 1024, "size": 1024, "batch_size": 2,
     "weight_path": "/kaggle/input/3d-unet/b3_more_fmaps_all_train_3dcnn_segformerb364_final_swa.pth", "segformer_config": cnn_3d_more_filters_config, "score":.78},
#     {"model_arch": "unet3d", "tile_size": 1024, "size": 1024, "batch_size": 1,
#      "weight_path": "/kaggle/input/3d-unet/3d_unet_3dunet_b3_final_swa.pth", "segformer_config": unet_3d_config, "score":.73},
#     {"model_arch": "unet3d", "tile_size": 1024, "size": 1024, "batch_size": 1,
#      "weight_path": "/kaggle/input/3d-unet/3d_unet_all_train_3dunet_b3_final_swa.pth", "segformer_config": unet_3d_config, "score":.76},
#     {"model_arch": "cnn3d", "tile_size": 512, "size": 512, "batch_size": 4,
#      "weight_path": "/kaggle/input/3d-unet/3dcnn_512_b2_all_train_3dcnn_b2_final_swa.pth", "segformer_config": cnn_3d_segformer_b2_config},
    # ran at wrong resolution. Scored .73
#     {"model_arch": "cnn3d", "tile_size": 1024, "size": 1024, "batch_size": 2,
#      "weight_path": "/kaggle/input/3d-unet/3dcnn_768_b4_adam_3dcnn_b4_final_swa.pth", "segformer_config": cnn_3d_segformer_b4_config, "score":.73},
#     {"model_arch": "cnn3d", "tile_size": 768, "size": 768, "batch_size": 2,
#      "weight_path": "/kaggle/input/3d-unet/3dcnn_768_b4_adam_3dcnn_b4_final_swa.pth", "segformer_config": cnn_3d_segformer_b4_config, "score":.74},
#     {"model_arch": "cnn3d", "tile_size": 768, "size": 768, "batch_size": 2,
#      "weight_path": "/kaggle/input/3d-unet/3dcnn_768_b4_adam_3dcnn_b4_final_swa_all_train.pth", "segformer_config": cnn_3d_segformer_b4_config, "score":.75},
    {"model_arch": "unet3d_jumbo", "tile_size": 1024, "size": 1024, "batch_size": 1,
     "weight_path": "/kaggle/input/3d-unet/Jumbo_Unet_Jumbo_Unet_69_final_swa_all_train.pth", "segformer_config": unet_3d_jumbo_config, "score":.79},
#     {"model_arch": "unet3d_jumbo", "tile_size": 1024, "size": 1024, "batch_size": 1,
#      "weight_path": "/kaggle/input/3d-unet/Jumbo_Unet_Jumbo_Unet_69_new_label_final_swa_all_train.pth", "segformer_config": unet_3d_jumbo_config, "score":.77},
#     {"model_arch": "unet3d_jumbo", "tile_size": 1024, "size": 1024, "batch_size": 1,
#      "weight_path": "/kaggle/input/3d-unet/Jumbo_Unet_Jumbo_Unet_5_final_swa_all_train.pth", "segformer_config": unet_3d_jumbo_config, "score": .69},
#     {"model_arch": "unetr", "tile_size": 512, "size": 512, "batch_size": 8,
#      "weight_path": "/kaggle/input/3d-unet/jumbo_unetr_unetr_1245_final_swa_all_train.pth", "segformer_config": unet_3d_jumbo_config},
#     {"model_arch": "unetr_mc", "tile_size": 512, "size": 512, "batch_size": 8,
#      "weight_path": "/kaggle/input/3d-unet/unetr_multiclass_512_b5_unet_final_4Ryan.pth", "segformer_config": unetr_multiclass_config, "score":.77},
    {"model_arch": "unetr", "tile_size": 512, "size": 512, "batch_size": 8,
     "weight_path": "/kaggle/input/3d-unet/jumbo_unetr_unetr_888_final_swa_all_train_long.pth", "segformer_config": unet_3d_jumbo_config, "score":.82},
    {"model_arch": "unetr_mc", "tile_size": 512, "size": 512, "batch_size": 8,
     "weight_path": "/kaggle/input/3d-unet/unetr_multiclass_NOVALIDATION_512_b5_unet_final_swa_all_train.pth", "segformer_config": unetr_multiclass_config},

]


## main

In [23]:
def post_process(probability, threshold, min_size = 20000):
    """
    Post processing of each predicted mask, components with lesser number of pixels
    than `min_size` are ignored
    """
    # don't remember where I saw it
    mask = cv2.threshold(probability, threshold, 1, cv2.THRESH_BINARY)[1]
    num_component, component = cv2.connectedComponents(mask.astype(np.uint8))
    predictions = np.zeros_like(probability, np.float32)
    num = 0
    for c in range(1, num_component):
        p = (component == c)
        if p.sum() > min_size:
            predictions[p] = 1
    return predictions

In [24]:
import glob
import time

In [25]:
results = []
if os.getenv('KAGGLE_IS_COMPETITION_RERUN'):
    for fragment_id in fragment_ids:
        mask_preds = None
        last_res = None
        for model_config in model_tuples:
            mask_pred = None
            mask_count = None
            if last_res != model_config["size"]:
                for file in glob.glob("*.npy"):
                    os.remove(file)
                last_res = model_config["size"]
            CFG.tile_size = model_config["tile_size"]
            CFG.size = model_config["size"]
            CFG.batch_size = model_config["batch_size"]
            CFG.stride = CFG.tile_size // 4
            CFG.valid_aug_list = [
                A.Resize(CFG.size, CFG.size),
                A.Normalize(
                    mean= [0] * CFG.in_chans,
                    std= [1] * CFG.in_chans
                ),
                ToTensorV2(transpose_mask=True),
            ]
            CFG.segformer_config = model_config["segformer_config"]
            model = build_ensemble_model(model_config["weight_path"], model_config["model_arch"])
            test_loader, xyxys = make_test_dataset(fragment_id)

            binary_mask = cv2.imread(CFG.comp_dataset_path + f"{mode}/{fragment_id}/mask.png", 0)
            binary_mask = (binary_mask / 255).astype(int)

            ori_h = binary_mask.shape[0]
            ori_w = binary_mask.shape[1]
            # mask = mask / 255

            pad0 = (CFG.tile_size - binary_mask.shape[0] % CFG.tile_size)
            pad1 = (CFG.tile_size - binary_mask.shape[1] % CFG.tile_size)

            binary_mask = np.pad(binary_mask, [(0, pad0), (0, pad1)], constant_values=0)
            if mask_pred is None:
                mask_pred = np.zeros(binary_mask.shape)
                mask_count = np.zeros(binary_mask.shape)

            for step, (images) in tqdm(enumerate(test_loader), total=len(test_loader)):
                images = images.to(device)
                batch_size = images.size(0)
                with autocast():            
                    with torch.no_grad():
                        y_preds = model(images)

                start_idx = step*CFG.batch_size
                end_idx = start_idx + batch_size
                for i, (x1, y1, x2, y2) in enumerate(xyxys[start_idx:end_idx]):
                    mask_pred[y1:y2, x1:x2] += y_preds[i]
                    mask_count[y1:y2, x1:x2] += np.ones((CFG.tile_size, CFG.tile_size))
            del test_loader
            del model
            gc.collect()
            torch.cuda.empty_cache()
            mask_pred = mask_pred[:ori_h, :ori_w]
            mask_count = mask_count[:ori_h, :ori_w]
            binary_mask = binary_mask[:ori_h, :ori_w]

            print(f'mask_count_min: {mask_count.min()}')
            mask_pred = mask_pred/mask_count
            mask_pred = torch.sigmoid(torch.tensor(mask_pred)).numpy()
            if mask_preds is None:
                mask_preds = mask_pred/len(model_tuples)
            else:
                mask_preds += mask_pred/len(model_tuples)

        mask_pred = (mask_preds >= TH).astype(int)
        mask_pred *= binary_mask
        mask_pred = post_process(mask_pred.astype(float), TH, 10000).astype(int)
        plt.imshow(mask_pred)
        inklabels_rle = rle(mask_pred)
        results.append((fragment_id, inklabels_rle))
        del mask_pred, mask_count
        gc.collect()
        torch.cuda.empty_cache()
        for file in glob.glob("*.npy"):
            os.remove(file)
else:
    pass


## submission

In [26]:
sub = pd.DataFrame(results, columns=['Id', 'Predicted'])

In [27]:
sub

Unnamed: 0,Id,Predicted


In [28]:
sample_sub = pd.read_csv(CFG.comp_dataset_path + 'sample_submission.csv')
sample_sub = pd.merge(sample_sub[['Id']], sub, on='Id', how='left')

In [29]:
sample_sub

Unnamed: 0,Id,Predicted
0,a,
1,b,


In [30]:
sample_sub.to_csv("submission.csv", index=False)