# Submission File

In [16]:
import cv2
import torch
import numpy as np
import pandas as pd
import mlflow
import albumentations as A

from pathlib import Path
from tqdm.notebook import tqdm
from einops import repeat, rearrange
from torch.utils.data import DataLoader, Dataset

from eda.predict import predict_single_image
from utils.extract import convert_to_rle

# IO

In [6]:
def load_image(image_path: Path) -> torch.Tensor:
    img = cv2.imread(str(image_path), cv2.IMREAD_UNCHANGED)
    img = np.asarray(img, np.float32)
    img /= img.max()
    
    return img

# Load Pre-Trained Model

In [7]:
logged_model = "runs:/258389f622704193b00c80717d65b263/model"
debug = False
device = "cuda" if torch.cuda.is_available() and not debug else "cpu"
model = mlflow.pytorch.load_model(logged_model)
model = model.to(device)
model.eval()

Unet(
  (encoder): EfficientNetEncoder(
    (_conv_stem): Conv2dStaticSamePadding(
      3, 32, kernel_size=(3, 3), stride=(2, 2), bias=False
      (static_padding): ZeroPad2d((0, 1, 0, 1))
    )
    (_bn0): BatchNorm2d(32, eps=0.001, momentum=0.010000000000000009, affine=True, track_running_stats=True)
    (_blocks): ModuleList(
      (0): MBConvBlock(
        (_depthwise_conv): Conv2dStaticSamePadding(
          32, 32, kernel_size=(3, 3), stride=[1, 1], groups=32, bias=False
          (static_padding): ZeroPad2d((1, 1, 1, 1))
        )
        (_bn1): BatchNorm2d(32, eps=0.001, momentum=0.010000000000000009, affine=True, track_running_stats=True)
        (_se_reduce): Conv2dStaticSamePadding(
          32, 8, kernel_size=(1, 1), stride=(1, 1)
          (static_padding): Identity()
        )
        (_se_expand): Conv2dStaticSamePadding(
          8, 32, kernel_size=(1, 1), stride=(1, 1)
          (static_padding): Identity()
        )
        (_project_conv): Conv2dStaticSamePadding

In [11]:
class SubmissionDataSet(Dataset):
    def __init__(self, image_paths, transforms=None):
        self.image_paths = image_paths
        self.transforms = transforms
        
    def __len__(self):
        return len(self.image_paths)
        
    def __getitem__(self, idx):
        img_path = self.image_paths[idx]
        slice_number = img_path.stem[:10]
        case_day = img_path.parent.parent.name
        index = f"{case_day}_{slice_number}"
        
        image = load_image(img_path)
        height, width = image.shape
        if self.transforms:
            data = self.transforms(image=image)
            image = data["image"]

        return image, index, height, width

In [12]:
DATA_DIR = Path("/kaggle/input/uw-madison-gi-tract-image-segmentation")
TEST_DIR = DATA_DIR / "test"
 

image_paths = list(TEST_DIR.rglob("*.png"))
INPUT_SIZE = (224, 224)

# Use local images for testing
if len(image_paths) == 0:
    image_paths = list(Path("raw_dataset").rglob("*.png"))
    
assert len(image_paths) > 0, "Could not find any images"

transforms = A.Compose([
    A.Resize(*INPUT_SIZE, cv2.INTER_NEAREST)
])

test_ds = SubmissionDataSet(image_paths, transforms)

In [None]:

test_loader = DataLoader(
    test_ds,
    batch_size=20,
    num_workers=4,
    pin_memory=True,
    drop_last=False
)

frames = []

def batch_rle_encoding(masks, indices, heights, widths):
    rles = []
    for mask, index, height, width in zip(masks, indices, heights, widths):
        mask_resized = cv2.resize(mask, (height, width), cv2.INTER_NEAREST)
        rle = convert_to_rle(mask_resized, index)
        rles.append(rle)
        
    return rles

rle_encoding_vec = np.vectorize(rle_encoding)

thr = 0.5
for images, indices, heights, widths in tqdm(test_loader):
    images = repeat(images, "b h w -> b c h w", c=3)
    images = images.to(device)
    probs = model(images)
    
    masks = (probs > thr).detach().cpu().numpy()
    masks = masks.astype(np.uint8)
    masks = rearrange(masks, "b c h w -> b h w c")
    
    heights = heights.detach().cpu().numpy()
    widths = widths.detach().cpu().numpy()

    rle = batch_rle_encoding(masks, indices, heights, widths)
    frames.extend(rle)
#     rle_encoding(masks[0], "ham", 100, 100)

  0%|          | 0/1925 [00:00<?, ?it/s]

In [7]:
sub = pd.concat(frames).reset_index(drop=True)
sub.to_csv("submission.csv", index=False)
sub

Unnamed: 0,id,class,predicted
0,case131_day19_slice_0060,large_bowel,71 23 214 31
1,case131_day19_slice_0060,small_bowel,71 23 214 31
2,case131_day19_slice_0060,stomach,
3,case131_day19_slice_0061,large_bowel,
4,case131_day19_slice_0061,small_bowel,
...,...,...,...
2998,case144_day0_slice_0074,small_bowel,0 37
2999,case144_day0_slice_0074,stomach,0 33
3000,case144_day0_slice_0036,large_bowel,30 16 80 52 189 76
3001,case144_day0_slice_0036,small_bowel,30 16 80 52 189 76
