In [1]:
%cd ..

/mnt/d/Coding/segmentation


In [2]:
from runner import SegmentationModel
from utils import *

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mbinhnd-cse[0m. Use [1m`wandb login --relogin`[0m to force relogin
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


In [3]:
model = SegmentationModel.load_from_checkpoint("/mnt/d/Coding/segmentation/checkpoints/epoch=24-step=2850.ckpt")

# disable randomness, dropout, etc...
model.eval()

SegmentationModel(
  (model): AttentionUNet(
    (MaxPool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (Conv1): ConvBlock(
      (conv): Sequential(
        (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (2): ReLU(inplace=True)
        (3): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (4): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (5): ReLU(inplace=True)
      )
    )
    (Conv2): ConvBlock(
      (conv): Sequential(
        (0): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (2): ReLU(inplace=True)
        (3): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (4): BatchNorm2d(128, eps=1e-05, momentum=0.1, af

In [4]:
import pandas as pd


gt_df = pd.read_csv("data/kaggle/subset_gt.csv")
val_df = gt_df[gt_df["group"] == "kidney_3_dense"]

In [5]:
image_ids = val_df["id"].values
image_files = list(range(len(val_df)))

In [6]:
val_df[["height", "width"]]

Unnamed: 0,height,width
2279,1706,1510
2280,1706,1510
2281,1706,1510
2282,1706,1510
2283,1706,1510
...,...,...
2775,1706,1510
2776,1706,1510
2777,1706,1510
2778,1706,1510


In [7]:
from torch.utils.data import Dataset, DataLoader
from torchvision.transforms import Resize
import cv2
from data import H5ImageProcess, subset_preprocess_mask


h5_image_process = H5ImageProcess("data/kaggle/kidney_3_dense.hdf5")



class TestDataset(Dataset):
    def __init__(self, image_ids, image_files):
        self.image_ids = image_ids
        self.image_files = image_files
        self.resize_fn = Resize((256, 256), interpolation=cv2.INTER_NEAREST)
    
    def __len__(self):
        return len(self.image_files)
    
    def __getitem__(self, idx):
        image_id = self.image_ids[idx]
        image_file = self.image_files[idx]
        image = h5_image_process.preprocess_image_val(image_file)
        h, w = 1706, 1510
        image = self.resize_fn(image)
        return image_id, image, h, w

In [8]:
test_dataset = TestDataset(image_ids=image_ids, image_files=image_files)
test_dataloader = DataLoader(test_dataset, batch_size=8, shuffle=False)

In [9]:
for image_id, image, hs, ws in test_dataloader:
    break

In [10]:
import numpy as np

def rle_encode(img):
    '''
    img: numpy array, 1 - mask, 0 - background
    Returns run length as string formated
    '''
    pixels = img.flatten()
    pixels = np.concatenate([[0], pixels, [0]])
    runs = np.where(pixels[1:] != pixels[:-1])[0] + 1
    runs[1::2] -= runs[::2]
    rle = ' '.join(str(x) for x in runs)
    if rle=='':
        rle = '1 0'
    return rle


def remove_small_objects(mask, min_size):
    # find all connected components (labels)
    num_label, label, stats, centroid = cv2.connectedComponentsWithStats(mask, connectivity=8)
    # create a mask where small objects are removed
    processed = np.zeros_like(mask)
    for l in range(1, num_label):
        if stats[l, cv2.CC_STAT_AREA] >= min_size:
            processed[label == l] = 1
    return processed

In [11]:
import torch
import torch.nn as nn
import numpy as np


image_ids = []
rles = []
device = model.device

for image_id, image, hs, ws in test_dataloader:
    image_ids.extend(image_id)
    with torch.no_grad():
        image = image.to(device)
        preds = model.model(image)
        preds = (nn.Sigmoid()(preds)>0.5).double()
    for pred, h, w in zip(preds.data.cpu(), hs, ws):
        reverse_resize = Resize((h, w), interpolation=cv2.INTER_NEAREST)
        pred = reverse_resize(pred)
        clean_pred = remove_small_objects(pred.numpy().squeeze().astype(np.uint8), 10)
        rles.append(rle_encode(clean_pred))

submision_df = pd.DataFrame({"id": image_ids, "rle": rles})

In [12]:
from dataclasses import dataclass, field, asdict
from typing import List

@dataclass
class Groundtruth:
    id: List[str] = field(default_factory=list)
    rle: List[str] = field(default_factory=list)
    group: List[str] = field(default_factory=list)
    slice: List[int] = field(default_factory=list)
    height: List[int] = field(default_factory=list)
    width: List[int] = field(default_factory=list)
    
    def convert(self, tensor):
        return [int(i) for i in tensor]
    
    def update(self, ids, rles, groups, slices, heights, widths):
        self.id.extend(ids)
        self.rle.extend(rles)
        self.group.extend(groups)
        self.slice.extend(self.convert(slices))
        self.height.extend(self.convert(heights))
        self.width.extend(self.convert(widths))
        
        
@dataclass
class Submission:
    id: List[str] = field(default_factory=list)
    rle: List[str] = field(default_factory=list)
    
    def convert(self, tensor):
        return [int(i) for i in tensor]
    
    def update(self, ids, rles):
        self.id.extend(ids)
        self.rle.extend(rles)

In [13]:
resize_fn = Resize((256, 256), interpolation=cv2.INTER_NEAREST)


def subset_preprocess_mask(lre, H=1303, W=912):
    mask = rle_decode(lre, (H, W))
    mask_tensor = torch.Tensor(mask)
    return mask_tensor


class ValDataset(Dataset):
    def __init__(self, df):
        self.image_paths = list(range(len(df)))
        self.label_paths = df["rle"].values
        self.group_names = df["group"].values
        self.slide_ids = df["slice"].values
    
    def __len__(self):
        return len(self.image_paths)
    
    def __getitem__(self, idx):
        image_path = self.image_paths[idx]
        mask_path = self.label_paths[idx]
        group_name = self.group_names[idx]
        slice_id = self.slide_ids[idx]
        sample_id = f"{group_name}_{slice_id}"
        
        image = h5_image_process.preprocess_image_val(image_path)
        height, width = image.shape[1], image.shape[2]
        
        mask = subset_preprocess_mask(mask_path, 1706, 1510)
        image = resize_fn(image)
        return sample_id, image, mask, group_name, slice_id, height, width


val_dataset = ValDataset(val_df)
val_dataloader = DataLoader(val_dataset, batch_size=16, shuffle=False)

In [14]:
from tqdm import tqdm
gt_dicts = Groundtruth()
sub_dicts = Submission()
# submission_dicts = []
for batch in tqdm(val_dataloader):
    sample_id, image, mask, group_name, slide_id, height, width = batch
    with torch.no_grad():
        image = image.to(device)
        preds = model.model(image)
        preds = (nn.Sigmoid()(preds)>0.5).double()
    
    rles = []
    for pred, h, w in zip(preds.data.cpu(), height, width):
        reverse_resize = Resize((1706, 1510), interpolation=cv2.INTER_NEAREST)
        pred = reverse_resize(pred)
        clean_pred = remove_small_objects(pred.numpy().squeeze().astype(np.uint8), 10)
        rles.append(rle_encode(clean_pred))
    gt_rles = []
    for m in mask:
        gt_rles.append(rle_encode(m.numpy().astype("uint8")))
    
    height = [1706] * len(height)
    width = [1510] * len(width)
    gt_dicts.update(sample_id, gt_rles, group_name, slide_id, height, width)
    sub_dicts.update(sample_id, rles)


100%|██████████| 32/32 [00:21<00:00,  1.47it/s]


In [15]:
gt_df = pd.DataFrame(asdict(gt_dicts))
sub_df = pd.DataFrame(asdict(sub_dicts))
gt_df = gt_df.sort_values(by="slice")

In [16]:
sub_df[sub_df["id"]=="kidney_3_dense_100"]["rle"].values[0]

'393221 6 394731 6 396241 6 397751 6 399261 6 400771 6 402281 6 474773 11 476283 11 477793 11 479303 11 480813 11 482323 11 483833 6 485343 6 486853 6 488363 6 489873 6 491383 6 492893 6 494403 6 495913 6 497423 6 498933 6 500443 6 501953 6 503463 6 504973 11 506483 11 507993 11 509503 11 511013 11 512523 11 514027 17 515537 17 517047 17 518557 17 520067 17 521577 17 523087 17 524603 6 526113 6 527623 6 529133 6 530643 6 532153 6 533663 6 867738 12 869248 12 870758 12 872268 12 873778 12 875288 12 1037926 6 1039436 6 1040946 6 1042456 6 1043966 6 1045476 6 1046986 6 1048490 18 1050000 18 1051510 18 1053020 18 1054530 18 1056040 18 1057544 18 1059054 18 1060564 18 1062074 18 1063584 18 1065094 18 1066604 18 1068120 6 1069630 6 1071140 6 1072650 6 1074160 6 1075670 6 1077180 6 1128650 18 1130160 18 1131670 18 1133180 18 1134690 18 1136200 18 1137710 18 1139214 24 1140724 24 1142234 24 1143744 24 1145254 24 1146764 24 1148274 18 1149784 18 1151294 18 1152804 18 1154314 18 1155824 18 11573

In [17]:
gt_df_real = pd.read_csv("/mnt/c/Users/binhn/Downloads/gt_df.csv")


In [18]:
from PIL import Image

sub_image = Image.fromarray(rle_decode(sub_df[sub_df["id"]=="kidney_3_dense_100"]["rle"].values[0], shape=(1706,1510))*255)
gt_image = Image.fromarray(rle_decode(gt_df.iloc[100]["rle"], shape=(1706,1510))*255)

In [1]:
# gt_df["rle"] = gt_df_real["rle"]

In [20]:
score(sub_df, gt_df, "id", "rle", image_id_column_name="group", slice_id_column_name="slice")

: 