In [59]:
import numpy as np
import pandas as pd
from torch.utils.data import Dataset, DataLoader
import torch as tc
from glob import glob
from tqdm import tqdm
import albumentations as A
import cv2
from albumentations.pytorch import ToTensorV2


In [60]:
p_augm = 0.05 #0.5
#add rotate.  less p_augm

class CFG:
    # ============== pred target =============
    target_size = 1

    # ============== model CFG =============
    model_name = 'Unet'
    backbone = 'resnext50_32x4d'

    in_chans = 1   #5 # 65
    # ============== training CFG =============
    image_size = 1024 # 512 # 512
    input_size = 1024 # 512 #=512

    train_batch_size = 4 #4 #16
    valid_batch_size = 4

    epochs = 31 #30 #25
    lr = 8e-5
    chopping_percentile=1e-3
    # ============== fold =============
    valid_id = 1


    # ============== augmentation =============
    train_aug_list = [
        A.Rotate(limit=270, p= 0.5),
        A.RandomScale(scale_limit=(0.8,1.25),interpolation=cv2.INTER_CUBIC,p=p_augm),
        A.RandomCrop(input_size, input_size,p=1),
        A.RandomGamma(p=p_augm*2/3),
        A.RandomBrightnessContrast(p=p_augm,),
        A.GaussianBlur(p=p_augm),
        A.MotionBlur(p=p_augm),
        A.GridDistortion(num_steps=5, distort_limit=0.3, p=p_augm),
        ToTensorV2(transpose_mask=True),
    ]
    train_aug = A.Compose(train_aug_list)
    valid_aug_list = [
        ToTensorV2(transpose_mask=True),
    ]
    valid_aug = A.Compose(valid_aug_list)

In [62]:
def to_1024(img , image_size = 1024):
    if image_size > img.shape[1]:
       img = np.rot90(img)
       start1 = (CFG.image_size - img.shape[0])//2 
       top =     img[0                    : start1,   0: img.shape[1] ]
       bottom  = img[img.shape[0] -start1 : img.shape[0],   0 : img.shape[1] ]
       img_result = np.concatenate((top,img,bottom ),axis=0)
       img_result = np.rot90(img_result)
       img_result = np.rot90(img_result)
       img_result = np.rot90(img_result)
    else :
       img_result = img
    return img_result

#  add border
def to_1024_1024(img  , image_size = 1024 ):
     img_result = to_1024(img, image_size )
     return img_result

In [63]:
'''class Data_loader(Dataset):
     
    def __init__(self,paths,is_label):
        self.paths=paths
        self.paths.sort()
        self.is_label=is_label
    
    def __len__(self):
        return len(self.paths)
    
    def __getitem__(self,index):
         
        img = cv2.imread(self.paths[index],cv2.IMREAD_GRAYSCALE)
        
        img = to_1024_1024(img , image_size = CFG.image_size ) #  to_original( im_after, img_save, image_size = 1024)

        img = tc.from_numpy(img.copy())
        if self.is_label:
            img=(img!=0).to(tc.uint8)*255
        else:
            img=img.to(tc.uint8)
        return img'''

class Data_loader(Dataset):
    def __init__(self,path,s="/images/"):
        self.paths=glob(path+f"{s}*.tif")
        self.paths.sort()
        self.bool=s=="/labels/"
    
    def __len__(self):
        return len(self.paths)
    
    def __getitem__(self,index):
        img=cv2.imread(self.paths[index],cv2.IMREAD_GRAYSCALE)
        img = to_1024_1024(img , image_size = CFG.image_size )
        
        img=tc.from_numpy(img.copy())
        if self.bool:
            img=img.to(tc.bool)
        else:
            img=img.to(tc.uint8)
        return img

In [4]:
mask = np.load('src/util/2d_nn/2d_segmentation.npy')

In [5]:
mask.shape

(2217, 1041, 1511)

In [64]:
model_path_i = 0 
class CFG_Load:
    model_name = 'Unet'
    backbone = 'resnext50_32x4d'

    in_chans = 1
    image_size = 1024 
    input_size= 1024
    tile_size = image_size
    stride = tile_size // 4
    drop_egde_pixel= 0 
    
    target_size = 1
    chopping_percentile=1e-3
    valid_id = 1
    batch=16 
    th_percentile = 0.00143
    
    path_submition = 0 
    model_path=["/root/sennet-kaggle/src/util/2d_nn/resnext50_32x4d_21_loss0.06_score0.87_val_loss0.21_val_score0.81_midd_1024.pt"]

In [65]:
def load_data(path,s):
    data_loader=Data_loader(path,s)
    data_loader=DataLoader(data_loader, batch_size=16, num_workers=2)
    data=[]
    for x in tqdm(data_loader):
        data.append(x)
    x=tc.cat(data,dim=0)
    ########################################################################
    TH=x.reshape(-1).numpy()
    index = -int(len(TH) * CFG.chopping_percentile)
    TH:int = np.partition(TH, index)[index]
    x[x>TH]=int(TH)
    ########################################################################
    TH=x.reshape(-1).numpy()
    index = -int(len(TH) * CFG.chopping_percentile)
    TH:int = np.partition(TH, -index)[-index]
    x[x<TH]=int(TH)
    ########################################################################
    #x=(min_max_normalization(x.to(tc.float16))*255).to(tc.uint8)
    return x

In [67]:
class Pipeline_Dataset(Dataset):
    def __init__(self,x,path):
        self.img_paths  = glob(path+"/images/*")
        self.img_paths.sort()
        self.in_chan = CFG_Load.in_chans
        z=tc.zeros(self.in_chan//2,*x.shape[1:],dtype=x.dtype)
        self.x=tc.cat((z,x,z),dim=0)
        
    def __len__(self):
        return self.x.shape[0]-self.in_chan+1
    
    def __getitem__(self, index):
        x  = self.x[index:index+self.in_chan]
        return x,index
    
    def get_mark(self,index):
        id=self.img_paths[index].split("/")[-3:]
        id.pop(1)
        id="_".join(id)
        return id[:-4]
    
    def get_marks(self):
        ids=[]
        for index in range(len(self)):
            ids.append(self.get_mark(index))
        return ids

In [94]:
split_np = np.split(mask, 1, axis=2)

In [152]:
mask.shape

(2217, 1041, 1511)

In [None]:
mask = mask.transpose()

In [154]:
def get_output(debug=False):
    outputs=[]
    paths=["/root/data/train/kidney_2"]
    outputs=([],[])
    for i in range(mask.shape[0]):
        outputs[0].append(mask[i,:,:])
    for path in [paths[CFG_Load.path_submition]]:
        x=load_data(path,"/images/")
        mark=Pipeline_Dataset(x,path).get_marks()
        outputs[1].extend(mark)
    return outputs

In [None]:
# Redone
def get_output(debug=False):
    outputs=[]
    paths=["/root/data/train/kidney_2"]
    outputs=[[],[]]
    for arr in split_np:
        labels=tc.zeros_like(x,dtype=tc.uint8)
        outputs[0].append(arr)
    for path in [paths[CFG_Load.path_submition]]:
        x=load_data(path,"/images/")
        mark=Pipeline_Dataset(x,path).get_marks()
        outputs[1].extend(mark)
    return outputs

In [155]:
output, ids = get_output()

100%|██████████| 139/139 [00:04<00:00, 30.54it/s]


In [160]:
len(output)

2217

In [171]:
def rle_encode(mask):
    pixel = mask.flatten()
    pixel = np.concatenate([[0], pixel, [0]])
    run = np.where(pixel[1:] != pixel[:-1])[0] + 1
    run[1::2] -= run[::2]
    rle = ' '.join(str(r) for r in run)
    if rle == '':
        rle = '1 0'
    return rle

In [157]:
def to_original ( im_after, img, image_size = 1024 ):
    top_ = 0
    left_ = 0
    if (im_after.shape[0] > img.shape[0]):
             top_  = ( image_size - img.shape[0])//2 
    if    (im_after.shape[1] > img.shape[1]) :
             left_  = ( image_size - img.shape[1])//2  
    if (top_>0)or (left_>0) :
             img_result = im_after[top_  : img.shape[0] + top_,   left_: img.shape[1] + left_ ]
    else:
             img_result = im_after
    return img_result  

In [182]:
img=cv2.imread("/root/data/train/kidney_2/images/0000.tif",cv2.IMREAD_GRAYSCALE)

submission_df = []

for index in range(len(ids)):
    id=ids[index]
    i=0
    for x in output:
        if index>=len(x):
            index-=len(x)
            i+=1
        else:
            break
    mask_pred = (output[0])

    mask_pred2 = to_original(mask_pred, img, image_size=1024)
    mask_pred = mask_pred2.copy()

    rle = rle_encode(mask_pred)

    submission_df.append(
        pd.DataFrame(data={
            'id' : id,
            'rle' : rle,
        },index=[0])
    )

submission_df = pd.concat(submission_df)
submission_df.to_csv('submission.csv', index=False)
submission_df.head(6)

ValueError: operands could not be broadcast together with shapes (12,) (13,) (12,) 