In [None]:
import json
import os.path as osp
import sys
from shutil import copy

import cv2
import numpy as np
import pandas as pd
import rasterio as rio
from shapely.geometry import Polygon
from tqdm import tqdm

from IPython.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

from matplotlib import pyplot as plt
%matplotlib inline

sys.path.append("../src")
from evaluate import dice
from mask_utils import rle_decode, rle_encode


DUMMY_RLE = ""
DATA_SOURCES = {'HPA'}
ORGANS = {'lung'}
THRESHOLD = 0.5


lung_filter_dir = "../input/hmib/lung_filter/"
images_dir = "../input/hmib/train_images/"
df_file = "../input/hmib/train.csv"

In [None]:
# lung_df = df[df.organ == "lung"]
# for row in tqdm(lung_df.itertuples(), total=len(lung_df)):
#     name = f"{row.id}.tiff"
#     src = osp.join(images_dir, name)
#     dst = osp.join(lung_filter_dir, name)
#     copy(src, dst)

In [None]:
# from collections import Counter
# from sklearn.model_selection import train_test_split


# lung_df = df[df.organ == "lung"]

# train_indices, val_indices = train_test_split(lung_df.index.values, test_size=0.2, random_state=2022)
# lung_df.loc[train_indices, "split"] = "train"
# lung_df.loc[val_indices, "split"] = "val"
# lung_df[["id", "split"]].rename(columns={"id": "image_name"}).to_csv(osp.join(lung_filter_dir, "splits.csv"), index=False)

# # Counter(map(tuple, lung_df[["age", "sex"]].values.tolist()))

In [None]:
# from pathlib import Path

# DST_SIZE = (768, 768)

# for image_file in tqdm(Path(lung_filter_dir).glob("*.tiff")):
#     image = load_tiff(image_file).transpose((1, 2, 0))

#     anno_file = osp.splitext(image_file)[0] + ".json"
#     mask = load_abno(anno_file, image.shape[:2])
    
#     image_dst = cv2.resize(image, DST_SIZE, interpolation=cv2.INTER_CUBIC)
#     mask_dst = cv2.resize(mask, DST_SIZE, interpolation=cv2.INTER_CUBIC)
    
#     name = osp.splitext(osp.basename(image_file))[0]
    
#     image_name = f"{name}_image.png"
#     cv2.imwrite(osp.join(lung_filter_dir, image_name), image_dst)
    
#     mask_name = f"{name}_mask.png"
#     cv2.imwrite(osp.join(lung_filter_dir, mask_name), mask_dst)

In [None]:
def show(img, mask=None, mask_abno=None, title=None):
    plt.figure(figsize=(6, 6))
    plt.imshow(img)
    if mask is not None:
        plt.imshow(1.0 - mask, alpha=0.3)
    if mask_abno is not None:
        plt.imshow(1.0 - mask_abno, alpha=0.15)
    if title is not None:
        plt.title(title)
    plt.show()
    
    
def load_abno(anno_file, shape):
    if not osp.exists(anno_file):
        return np.zeros(shape)
    
    with open(anno_file) as inpf:
        anno = json.load(inpf)

    anno_mask = np.zeros(shape)
    for raw_poly in anno:
        poly = Polygon(raw_poly["geometry"]["coordinates"][0])
        poly_mask = rio.features.rasterize([poly], out_shape=shape)
        anno_mask = np.maximum(anno_mask, poly_mask)

    if anno_mask.sum() == 0:
        return None

    return anno_mask


def load_tiff(p):
    return rio.open(str(p)).read()


def grad(img, kernel=5):
    kernel = np.ones((kernel, kernel),np.uint8)
    gx = cv2.morphologyEx(img.mean(2) / img.max(), cv2.MORPH_GRADIENT, kernel)
    return gx


def fill_holes(mask):
    des = cv2.bitwise_not(mask)
    contour,hier = cv2.findContours(des,cv2.RETR_CCOMP,cv2.CHAIN_APPROX_SIMPLE)

    for cnt in contour:
        cv2.drawContours(des,[cnt],0,255,-1)

    return cv2.bitwise_not(des)


def cvmask(name, min_area=40**2, max_area=220**2, bf_size=2, bf_min_count=3):
    image = load_tiff(name)
    
    #C,H,W = image.shape
    #means.append(H)
    #image = image[0]#.mean(0)
#     image = image[:,1000:2000, 1000:2000]
    
    gx = grad(image.transpose(1,2,0), 13)
    kernel = np.ones((5,5),np.uint8)
    gx = cv2.morphologyEx(gx, cv2.MORPH_CLOSE, kernel, iterations=3)
    gx = cv2.blur(gx, (5,5))
    
    num, *(imask, stats, centr) = cv2.connectedComponentsWithStats((gx<.1).astype(np.uint8))
    areas = stats[:, 4]

    idxs = np.argsort(areas)[::-1]
    iareas = areas[idxs]

    #l, h = np.percentile(areas, 1), np.percentile(areas, 99)
    mask = (iareas > min_area) & (iareas < max_area) #& (iareas > l) & (iareas < h)

    # Filter border contours
    border_filter = np.zeros_like(imask)
    border_filter[:bf_size] = 1
    border_filter[-bf_size:] = 1
    border_filter[:, :bf_size] = 1
    border_filter[:, -bf_size:] = 1
    for iid, icount in zip(*np.unique(imask * border_filter, return_counts=True)):
        if iid != 0 and icount >= bf_min_count:
            mask[idxs == iid] = False

    instances = idxs[mask]
    mm = [imask == i for i in instances if i != 0]
    if len(mm) > 0:
        mm = np.stack(mm).sum(0)[None]  #.astype(np.uint8)
    else:
        mm = np.zeros_like(image)[0:1]

    return mm

In [None]:
df = pd.read_csv(df_file)
result = []
dices = []
for row in tqdm(df.itertuples(), total=len(df), desc="Inference"):
    rle = DUMMY_RLE
    organ = row.organ
    
    if row.data_source in DATA_SOURCES and organ in ORGANS:
        image_file = osp.join(images_dir, f"{row.id}.tiff")
        
        mask = cvmask(image_file)
        mask = cv2.dilate(mask[0].astype(float), kernel=np.ones((5, 5)), iterations=3)[None]
        mask = mask[0]
        
        rle = rle_encode((mask > THRESHOLD).astype(np.uint8))
        
        decoded_mask = rle_decode(rle, mask.shape)
        decoded_gt = rle_decode(row.rle, mask.shape)
        img = cv2.imread(image_file)  #[1000:2000, 1000:2000]
        
        anno_file = osp.join(lung_filter_dir, f"{row.id}|MANUAL.json")
        abno_mask = load_anno(anno_file, img.shape[:2])
        show(img, decoded_mask, abno_mask)
        
        dices.append((row.id, dice(decoded_gt, decoded_mask)))

#         plt.figure(figsize=(10, 5))
#         plt.hist(mask.flatten(), bins=100)
#         plt.grid()
#         plt.show()

In [None]:
sorted(dices, key=lambda x: x[1])

In [None]:
np.mean([d for _, d in dices])