**Goals**

The goal of this competition is to identify the locations of each functional tissue unit (FTU) in biopsy slides from several different organs (prostate, lungs, kidney, spleen...). The underlying data includes imagery from different sources prepared with different protocols at a variety of resolutions, reflecting typical challenges for working with medical data.


1. [Exploratory Data Analysis](#Exploratory-Data-Analysis)
2. [Image and Mask Visualization](#Image-and-Mask-Visualization)
3. [Modelling & Inference](#Modelling-&-Inference)

In [54]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
import cv2
import os, random
import tifffile as tiff 
from tqdm.notebook import tqdm

# Exploratory Data Analysis

**Train Data**

In [2]:
df = pd.read_csv('../input/hubmap-organ-segmentation/train.csv')
df.head()

In [3]:
df.info()

In [4]:
df.isna().sum()

In [5]:
df['organ'].value_counts()

In [6]:
plt.figure(figsize=(15, 6))
sns.countplot(data=df, x="organ",alpha = 0.4).set_title("Organ Counts")

In [7]:
df['sex'].value_counts()

In [8]:
plt.figure(figsize=(15, 6))
sns.histplot(data=df, x="age",alpha = 0.3).set_title("Age")

In [9]:
plt.figure(figsize=(15, 6))
sns.countplot(data=df, x="age",hue = "sex",alpha = 0.3).set_title("Age")

In [10]:
plt.figure(figsize=(15, 6))
sns.histplot(data=df, x="sex",alpha = 0.3).set_title("Sex")

**Test Data**

In [11]:
test = pd.read_csv('../input/hubmap-organ-segmentation/test.csv')
test.head()

**sample_submission**

In [12]:
submision_sample = pd.read_csv('../input/hubmap-organ-segmentation/sample_submission.csv')
submision_sample.head()

# Image and Mask Visualization

**Train Images**

In [13]:
# https://www.kaggle.com/paulorzp/rle-functions-run-length-encode-decode
def mask2rle(img):
    '''
    img: numpy array, 1 - mask, 0 - background
    Returns run length as string formated
    '''
    pixels= img.T.flatten()
    pixels = np.concatenate([[0], pixels, [0]])
    runs = np.where(pixels[1:] != pixels[:-1])[0] + 1
    runs[1::2] -= runs[::2]
    return ' '.join(str(x) for x in runs)
 
def rle2mask(mask_rle, shape=(1600,256)):
    '''
    mask_rle: run-length as string formated (start length)
    shape: (width,height) of array to return 
    Returns numpy array, 1 - mask, 0 - background

    '''
    s = mask_rle.split()
    starts, lengths = [np.asarray(x, dtype=int) for x in (s[0:][::2], s[1:][::2])]
    starts -= 1
    ends = starts + lengths
    img = np.zeros(shape[0]*shape[1], dtype=np.uint8)
    for lo, hi in zip(starts, ends):
        img[lo:hi] = 1
    return img.reshape(shape).T


In [14]:
organs = df['organ'].unique()
for organ in organs:
    df_organ = df.loc[df['organ'] == organ].reset_index(drop=True)
    plt.figure(figsize=(16,4))
    i = 0
    while i < 4:
        img = tiff.imread( "../input/hubmap-organ-segmentation/train_images/" + str(df_organ['id'][i]) +'.tiff')
        mask = rle2mask(df_organ['rle'][i], (img.shape[1], img.shape[0]))
        plt.subplot(1, 4, i+1)
        plt.imshow(img)
        plt.imshow(mask, cmap='seismic', alpha=0.5)
        plt.axis("off")
        i += 1
    plt.suptitle(organ, fontsize=20)
    plt.tight_layout()             

**Target Image**

In [15]:
img = cv2.imread('../input/hubmap-organ-segmentation/test_images/10078.tiff')
plt.figure(figsize=(9,9))
plt.imshow(cv2.cvtColor(img, cv2.COLOR_BGR2RGB)); plt.axis("off");

# Modelling & Inference

In [16]:
from fastai.vision.all import *
from fastai.callback.all import *
from fastai.basics import *
import os

In [17]:
train = Path("../input/hubmap-2022-256x256/train")
label = Path("../input/hubmap-2022-256x256/masks")
len(train.ls()), len(label.ls())

In [18]:
def get_y(x): return label/x.name

In [19]:
dls = SegmentationDataLoaders.from_label_func (train, 
                                         fnames = get_image_files(train),
                                         label_func = get_y,
                                         valid_pct=0.2, seed=None,
                                         codes=['Bkgd', 'Cell'], 
                                         item_tfms=None,
                                         batch_tfms=None, 
                                         bs = 6,
                                        )

In [20]:
dls.show_batch(max_n=20)

In [21]:
learn = unet_learner(dls, resnet34 , metrics=Dice())
learn.fine_tune(6)

In [22]:
learn.model_dir='/kaggle/working/'
learn.save('Model1')

In [64]:
pred, _, prob = learn.predict("../input/hubmap-organ-segmentation/test_images/10078.tiff")
rle = mask2rle(pred)
subm1 = pd.read_csv('../input/hubmap-organ-segmentation/sample_submission.csv')
subm['id'] = subm1['id']
subm['rle'] = rle
subm.to_csv('submission.csv',index=False)

In [47]:
img_1 = tiff.imread('../input/hubmap-organ-segmentation/test_images/10078.tiff')
mask_1 = rle2mask(subm["rle"][0], (img_1.shape[1], img_1.shape[0]))

plt.figure(figsize=(15,15))
plt.subplot(1,2,1)
plt.imshow(img_1)

plt.subplot(1,2,2)
plt.imshow(img_1)
plt.imshow(mask_1, cmap='coolwarm', alpha=0.5)
plt.axis("off")

In [50]:
# def make_tiles(img, tile_size=256, num_tiles=4):
#     '''
#     img: np.ndarray with dtype np.uint8 and shape (width, height, channel)
#     '''
#     w, h, ch = img.shape
#     pad0, pad1 = (tile_size - w%tile_size) % tile_size, (tile_size - h%tile_size) % tile_size
#     padding = [[pad0//2, pad0-pad0//2], [pad1//2, pad1-pad1//2], [0, 0]]
#     img = np.pad(img, padding, mode='constant', constant_values=255)
#     img = img.reshape(img.shape[0]//tile_size, tile_size, img.shape[1]//tile_size, tile_size, ch)
#     img = img.transpose(0, 2, 1, 3, 4).reshape(-1, tile_size, tile_size, ch)
#     if len(img) < num_tiles: # pad images so that the output shape be the same
#         padding = [[0, num_tiles-len(img)], [0, 0], [0, 0], [0, 0]]
#         img = np.pad(img, padding, mode='constant', constant_values=255)
#     idxs = np.argsort(img.reshape(img.shape[0], -1).sum(-1))[:num_tiles] # pick up Top N dark tiles
#     img = img[idxs]
#     return img


In [None]:
# def merge(mask, tile_size=256):
#     '''
#     img: np.ndarray with dtype np.uint8 and shape (width, height, channel)
#     mask: np.ndarray with dtype np.uint9 and shape (width, height)
#     '''
#     w_i, h_i, ch = img.shape
#     w_m, h_m     = mask.shape
    
#     pad0, pad1 = (tile_size - w_i%tile_size) % tile_size, (tile_size - h_i%tile_size) % tile_size
    
#     padding_i = [[pad0//2, pad0-pad0//2], [pad1//2, pad1-pad1//2], [0, 0]]
#     padding_m = [[pad0//2, pad0-pad0//2], [pad1//2, pad1-pad1//2]]
    
#     img = np.pad(img, padding_i, mode='constant', constant_values=255)
#     img = img.reshape(img.shape[0]//tile_size, tile_size, img.shape[1]//tile_size, tile_size, ch)
#     img = img.transpose(0, 2, 1, 3, 4).reshape(-1, tile_size, tile_size, ch)
    
#     mask = np.pad(mask, padding_m, mode='constant', constant_values=255)
#     mask = mask.reshape(mask.shape[0]//tile_size, tile_size, mask.shape[1]//tile_size, tile_size)
#     mask = mask.transpose(0, 2, 1, 3).reshape(-1, tile_size, tile_size)
    
#     num_tiles = len(mask)
#     #     if len(img) < num_tiles: # pad images so that the output shape be the same
#     #         padding = [[0, num_tiles-len(img)], [0, 0], [0, 0], [0, 0]]
#     #         img = np.pad(img, padding, mode='constant', constant_values=255)
#     #idxs = np.argsort(img.reshape(img.shape[0], -1).sum(-1))[:num_tiles] # pick up Top N dark tiles
#     #img = img[idxs]
#     return img, mask

In [None]:
# img_tiles = make_tiles(img_1)
# for i, img_crop in tqdm(enumerate(img_tiles)):
#     pred, _, prob = learn.predict(img_crop)
#     #reshape mask tile into one mask
    
#     preds.append()


# #encode to rle
# rle = mask2rle(preds)