# Submission File

# Install Libraries

In [22]:
!rm -rf /kaggle/working/requirements
!cp -r -x ../input/gi-tract-models/packages/requirements /kaggle/working
# !tar -czf /kaggle/working/requirements/cupy-10.4.0.tar.gz -C /kaggle/working/requirements/cupy-10.4.0/cupy-10.4.0 .
# !rm -rf /kaggle/working/requirements/cupy-10.4.0

!tar -czf /kaggle/working/requirements/efficientnet_pytorch-0.6.3.tar.gz -C /kaggle/working/requirements/efficientnet_pytorch-0.6.3/efficientnet_pytorch-0.6.3 .
!rm -rf /kaggle/working/requirements/efficientnet_pytorch-0.6.3

!tar -czf /kaggle/working/requirements/pretrainedmodels-0.7.4.tar.gz -C /kaggle/working/requirements/pretrainedmodels-0.7.4/pretrainedmodels-0.7.4 .
!rm -rf /kaggle/working/requirements/pretrainedmodels-0.7.4

In [26]:
!pip install --no-index --find-links=/kaggle/working/requirements segmentation-models-pytorch
!pip install --no-index --find-links=/kaggle/working/requirements einops

In [31]:
import sys
import cv2
import time
import torch
import logging
import numpy as np
import pandas as pd
import albumentations as A

from pathlib import Path
from tqdm.notebook import tqdm
from einops import repeat, rearrange
from torch.utils.data import DataLoader, Dataset
from tqdm import tqdm

In [32]:
tqdm.pandas()
logger = logging.getLogger(__name__)

# IO

In [33]:
def load_image(image_path: Path) -> torch.Tensor:
    img = cv2.imread(str(image_path), cv2.IMREAD_UNCHANGED)
    img = np.asarray(img, np.float32)
    img /= img.max()
    
    return img

# Load Pre-Trained Model

In [34]:
version = "v0"
logged_model = f"../input/gi-tract-models/{version}.pth"
device = "cuda" if torch.cuda.is_available() else "cpu"
model = torch.load(logged_model, map_location=device)
model = model.to(device)
model.eval()

logger.info(f"Model Loaded {version}")

In [35]:
class SubmissionDataSet(Dataset):
    def __init__(self, image_paths, transforms=None):
        self.image_paths = image_paths
        self.transforms = transforms
        
    def __len__(self):
        return len(self.image_paths)
        
    def __getitem__(self, idx):
        img_path = self.image_paths[idx]
        slice_number = img_path.stem[:10]
        case_day = img_path.parent.parent.name
        index = f"{case_day}_{slice_number}"
        
        image = load_image(img_path)
        height, width = image.shape
        if self.transforms:
            data = self.transforms(image=image)
            image = data["image"]

        return image, index, height, width

In [37]:
def get_metadata(row):
    data = row['id'].split('_')
    case = int(data[0].replace('case',''))
    day = int(data[1].replace('day',''))
    slice_ = int(data[-1])
    row['case'] = case
    row['day'] = day
    row['slice'] = slice_
    return row

def path2info(row):
    path = row['image_path']
    data = path.split('/')
    slice_ = int(data[-1].split('_')[1])
    case = int(data[-3].split('_')[0].replace('case',''))
    day = int(data[-3].split('_')[1].replace('day',''))
    width = int(data[-1].split('_')[2])
    height = int(data[-1].split('_')[3])
    row['height'] = height
    row['width'] = width
    row['case'] = case
    row['day'] = day
    row['slice'] = slice_
    return row

In [38]:
sub_df = pd.read_csv('../input/uw-madison-gi-tract-image-segmentation/sample_submission.csv')
if not len(sub_df):
    debug = True
    sub_df = pd.read_csv('../input/uw-madison-gi-tract-image-segmentation/train.csv')[:1000*3]
    sub_df = sub_df.drop(columns=['class','segmentation']).drop_duplicates()   
else:
    debug = False
    sub_df = sub_df.drop(columns=['class','predicted']).drop_duplicates()
sub_df = sub_df.progress_apply(get_metadata, axis=1)

logger.info(f"Sub df length: {len(sub_df)}")
print(len(sub_df))
sub_df.head()

In [39]:
from glob import glob 

if debug:
    paths = glob(f'/kaggle/input/uw-madison-gi-tract-image-segmentation/train/**/*png',recursive=True)
#     paths = sorted(paths)
else:
    paths = glob(f'/kaggle/input/uw-madison-gi-tract-image-segmentation/test/**/*png',recursive=True)
    
path_df = pd.DataFrame(paths, columns=['image_path'])
path_df = path_df.progress_apply(path2info, axis=1)

logger.info(f"Path df length: {len(path_df)}")
print(len(path_df))
path_df.head()

In [40]:
test_df = sub_df.merge(path_df, on=['case','day','slice'], how='left')

logger.info(f"Test df length: {len(test_df)}")
print(len(test_df))
test_df.head()

In [41]:
image_paths = [Path(x) for x in test_df["image_path"].tolist()]
assert len(image_paths) > 0, "Could not find any images"

INPUT_SIZE = (224, 224)
transforms = A.Compose([
    A.Resize(*INPUT_SIZE, cv2.INTER_NEAREST)
])

test_ds = SubmissionDataSet(image_paths, transforms)

In [42]:
batch_size = 20
test_loader = DataLoader(
    test_ds,
    batch_size=batch_size,
    num_workers=4,
    pin_memory=True,
    drop_last=False
)

logger.info("Loaded dataset...")

In [52]:
def running_length(img):
    '''
    img: numpy array, 1 - mask, 0 - background
    Returns run length as string formated
    '''
    pixels = img.flatten()
    pixels = np.concatenate([[0], pixels, [0]])
    runs = np.where(pixels[1:] != pixels[:-1])[0] + 1
    runs[1::2] -= runs[::2]
    return ' '.join(str(x) for x in runs)



def convert_to_rle(mask: np.ndarray, id: str) -> pd.DataFrame:
    rows = []
    channel_index = {
        "large_bowel": 0,
        "small_bowel": 1,
        "stomach": 2,
    }

    for name, index in channel_index.items():
        submask = mask[..., index]
        # ordered from top to bottom, left to right
        flat_mask = submask.flatten(order="F")
        predicted = running_length(flat_mask)
        entry = {
            "id": id,
            "class": name,
            "predicted": predicted or None,
        }
        rows.append(entry)

    df = pd.DataFrame(rows)
    return df


def batch_rle_encoding(masks, indices, heights, widths):
    rles = []
    for mask, index, height, width in zip(masks, indices, heights, widths):
        mask_resized = cv2.resize(mask, (height, width), cv2.INTER_NEAREST)
        rle = convert_to_rle(mask_resized, index)
        rles.append(rle)
        
    return rles

In [53]:
thr = 0.5
start = time.time()
frames = []

for idx, (images, indices, heights, widths) in enumerate(tqdm(test_loader)):
    images = repeat(images, "b h w -> b c h w", c=3)
    images = images.to(device)
    probs = model(images)
    
    masks = (probs > thr).detach().cpu().numpy()
    masks = masks.astype(np.uint8)
    masks = rearrange(masks, "b c h w -> b h w c")
    
    heights = heights.detach().cpu().numpy()
    widths = widths.detach().cpu().numpy()

    rle = batch_rle_encoding(masks, indices, heights, widths)
    frames.extend(rle)
    logger.info(f"Completed step: {(idx + 1) * batch_size}")
    
end = time.time()
logger.info(f"Total time taken to perform inference: {end - start}s")

In [54]:
sub = pd.concat(frames).reset_index(drop=True)
sub.columns = ["id", "class", "predicted"]
sub.to_csv("submission.csv", index=False)
logger.info("Saved submission...")
sub