In [2]:
!pip install -q segmentation_models_pytorch
!pip install -q monai


Collecting segmentation_models_pytorch
  Downloading segmentation_models_pytorch-0.3.0-py3-none-any.whl (97 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m97.9/97.9 kB[0m [31m350.6 kB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25hCollecting pretrainedmodels==0.7.4
  Downloading pretrainedmodels-0.7.4.tar.gz (58 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m58.8/58.8 kB[0m [31m1.1 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25h  Preparing metadata (setup.py) ... [?25ldone
[?25hCollecting efficientnet-pytorch==0.7.1
  Downloading efficientnet_pytorch-0.7.1.tar.gz (21 kB)
  Preparing metadata (setup.py) ... [?25ldone
Collecting timm==0.4.12
  Downloading timm-0.4.12-py3-none-any.whl (376 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m377.0/377.0 kB[0m [31m1.0 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
Building wheels for collected packages: efficientnet-pytorch, pretrainedmodels
  Building wheel for ef

In [None]:
!pip install -q wandb

In [3]:
import torch
from torch import nn
import torch.nn.functional as F

from torch.utils.data import DataLoader, random_split
import albumentations as A

import segmentation_models_pytorch as smp
# import torchsummary

import pandas as pd
import numpy as np
import random, shutil, time, os

import sklearn
import cv2
import matplotlib.pyplot as plt
from matplotlib.patches import Rectangle
import albumentations as A

from glob import glob
from tqdm.notebook import tqdm
from sklearn.model_selection import KFold, GroupKFold
from sklearn.metrics import roc_auc_score
# from skimage import color
from IPython import display as ipd

import scipy
import pdb
import gc

import monai
import tifffile as tiff

from torch.cuda import amp

import warnings
warnings.filterwarnings('ignore')

print('done')

done


In [18]:
sz = 256   #the size of tiles
reduce = 4 #reduce the original images by 4 times 

BASE_DIR = '../input/hubmap-organ-segmentation'
TRAIN = True
if TRAIN:
    DATA_DIR = os.path.join(BASE_DIR, 'train_images')
else:
    DATA_DIR = os.path.join(BASE_DIR, 'test_images')
            
df = pd.read_csv(os.path.join(BASE_DIR, 'train.csv'))
df['path'] = df['id'].apply(lambda fname : os.path.join(DATA_DIR, str(fname) + '.tiff'))
organ_to_class = {
    'prostate':0,
    'spleen':1,
    'lung':2,
    'kidney':3,
    'largeintestine':4
}
df['classes'] = df['organ'].apply(lambda organ : organ_to_class[organ])
df.head(5)


Unnamed: 0,id,organ,data_source,img_height,img_width,pixel_size,tissue_thickness,rle,age,sex,path,classes
0,10044,prostate,HPA,3000,3000,0.4,4,1459676 77 1462675 82 1465674 87 1468673 92 14...,37.0,Male,../input/hubmap-organ-segmentation/train_image...,0
1,10274,prostate,HPA,3000,3000,0.4,4,715707 2 718705 8 721703 11 724701 18 727692 3...,76.0,Male,../input/hubmap-organ-segmentation/train_image...,0
2,10392,spleen,HPA,3000,3000,0.4,4,1228631 20 1231629 24 1234624 40 1237623 47 12...,82.0,Male,../input/hubmap-organ-segmentation/train_image...,1
3,10488,lung,HPA,3000,3000,0.4,4,3446519 15 3449517 17 3452514 20 3455510 24 34...,78.0,Male,../input/hubmap-organ-segmentation/train_image...,2
4,10610,spleen,HPA,3000,3000,0.4,4,478925 68 481909 87 484893 105 487863 154 4908...,21.0,Female,../input/hubmap-organ-segmentation/train_image...,1


In [7]:
def rle_encode(img):
    '''
    img: numpy array, 1 - mask, 0 - background
    Returns run length as string formated
    '''
    pixels = img.T.flatten()
    pixels = np.concatenate([[0], pixels, [0]])
    runs = np.where(pixels[1:] != pixels[:-1])[0] + 1
    runs[1::2] -= runs[::2]
    return ' '.join(str(x) for x in runs)


def rle_decode(mask_rle, wid, hei):
    shape = (wid, hei)
    s = mask_rle.split()
    starts, lengths = [np.asarray(x, dtype=int) for x in (s[0:][::2], s[1:][::2])]
    starts -= 1
    ends = starts + lengths
    img = np.zeros(shape[0]*shape[1], dtype=np.uint8)
    for lo, hi in zip(starts, ends):
        img[lo:hi] = 1
    return img.reshape(shape).T


In [19]:
s_th = 40  #saturation blancking threshold
p_th = 1000*(sz//256)**2 #threshold for the minimum number of pixels


class HuBMAPDataset(torch.utils.data.Dataset):
    def __init__(self, idx, sz=sz, reduce=reduce, encs=None):
        self.data = tiff.imread(os.path.join(DATA_DIR,str(idx)+'.tiff'))
        # some images have issues with their format 
        # and must be saved correctly before reading with rasterio
        if self.data.count != 3:
            subdatasets = self.data.subdatasets
            self.layers = []
            if len(subdatasets) > 0:
                for i, subdataset in enumerate(subdatasets, 0):
                    self.layers.append(rasterio.open(subdataset))
        self.shape = self.data.shape
        self.reduce = reduce
        self.sz = reduce*sz
        self.pad0 = (self.sz - self.shape[0]%self.sz)%self.sz
        self.pad1 = (self.sz - self.shape[1]%self.sz)%self.sz
        self.n0max = (self.shape[0] + self.pad0)//self.sz
        self.n1max = (self.shape[1] + self.pad1)//self.sz
        self.mask = enc2mask(encs,(self.shape[1],self.shape[0])) if encs is not None else None
        
    def __len__(self):
        return self.n0max*self.n1max
    
    def __getitem__(self, idx):
        # the code below may be a little bit difficult to understand,
        # but the thing it does is mapping the original image to
        # tiles created with adding padding (like in the previous version of the kernel)
        # then the tiles are loaded with rasterio
        # n0,n1 - are the x and y index of the tile (idx = n0*self.n1max + n1)
        n0,n1 = idx//self.n1max, idx%self.n1max
        # x0,y0 - are the coordinates of the lower left corner of the tile in the image
        # negative numbers correspond to padding (which must not be loaded)
        x0,y0 = -self.pad0//2 + n0*self.sz, -self.pad1//2 + n1*self.sz

        # make sure that the region to read is within the image
        p00,p01 = max(0,x0), min(x0+self.sz,self.shape[0])
        p10,p11 = max(0,y0), min(y0+self.sz,self.shape[1])
        img = np.zeros((self.sz,self.sz,3),np.uint8)
        mask = np.zeros((self.sz,self.sz),np.uint8)
        # mapping the loade region to the tile
        if self.data.count == 3:
            img[(p00-x0):(p01-x0),(p10-y0):(p11-y0)] = np.moveaxis(self.data.read([1,2,3],
                window=Window.from_slices((p00,p01),(p10,p11))), 0, -1)
        else:
            for i,layer in enumerate(self.layers):
                img[(p00-x0):(p01-x0),(p10-y0):(p11-y0),i] =\
                  layer.read(1,window=Window.from_slices((p00,p01),(p10,p11)))
        if self.mask is not None: mask[(p00-x0):(p01-x0),(p10-y0):(p11-y0)] = self.mask[p00:p01,p10:p11]
        
        if self.reduce != 1:
            img = cv2.resize(img,(self.sz//reduce,self.sz//reduce),
                             interpolation = cv2.INTER_AREA)
            mask = cv2.resize(mask,(self.sz//reduce,self.sz//reduce),
                             interpolation = cv2.INTER_NEAREST)
        #check for empty imges
        hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
        h,s,v = cv2.split(hsv)
        #return -1 for empty images
        return img, mask, (-1 if (s>s_th).sum() <= p_th or img.sum() <= p_th else idx)


In [25]:
for index, encs in tqdm(df.iterrows()):
    print(encs['id'])
    break

0it [00:00, ?it/s]

TypeError: tuple indices must be integers or slices, not str

In [26]:
x_tot,x2_tot = [],[]

for index, encs in tqdm(df.iterrows()):
    #image+mask dataset
    ds = HuBMAPDataset(encs['id'],encs=encs)
    for i in range(len(ds)):
        im,m,idx = ds[i]
        if idx < 0: continue
                
        x_tot.append((im/255.0).reshape(-1,3).mean(0))
        x2_tot.append(((im/255.0)**2).reshape(-1,3).mean(0))
            
        #write data   
        im = cv2.imencode('.png',cv2.cvtColor(im, cv2.COLOR_RGB2BGR))[1]
        plt.subplots()
        plt.imshow(im)
        
        m = cv2.imencode('.png',m)[1]
        plt.subplots()
        plt.imshow(m)
        
#image stats
img_avr =  np.array(x_tot).mean(0)
img_std =  np.sqrt(np.array(x2_tot).mean(0) - img_avr**2)
print('mean:',img_avr, ', std:', img_std)


0it [00:00, ?it/s]

AttributeError: 'numpy.ndarray' object has no attribute 'count'