In [1]:
import torch
from model import UNet
from loss import DiceLoss
from dataset import SegmentationDataset
import matplotlib.pyplot as plt
import gc
import numpy as np
import pandas as pd
import torchvision
from utils import *
import json
from PIL import Image

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
df_path = 'train_data/raw_data/train.csv'
df = pd.read_csv(df_path)

In [4]:
df = df.query('id in [11497, 13034]')
df['train'] = ['train', 'val']
df

Unnamed: 0,id,organ,data_source,img_height,img_width,pixel_size,tissue_thickness,rle,age,sex,train
16,11497,kidney,HPA,3000,3000,0.4,4,940436 51 943430 65 946427 76 949423 89 952417...,41.0,Female,train
38,13034,kidney,HPA,3000,3000,0.4,4,850253 18 853250 24 856245 32 859237 46 862233...,73.0,Male,val


In [17]:
transforms = torchvision.transforms.Compose([
    torchvision.transforms.Resize(size=(1536, 1536)),
    torchvision.transforms.ToTensor(),
])
transforms_mask = torchvision.transforms.Compose([
    torchvision.transforms.Resize(size=(1536, 1536)),
])

In [7]:
num_classes = df['organ'].nunique()
classes = list(df['organ'].unique())
num_classes, classes

(1, ['kidney'])

In [36]:
labels = {'train': {'data_description':{'num_classes': num_classes, 'num_images': 0, "num_channels": 3,'classes':classes}, 'images': []},
          'val': {'data_description':{'num_classes': num_classes, 'num_images': 0, "num_channels": 3, 'classes':classes,}, 'images': []}}
for index, row in df.iterrows():
    img_name = row["id"]

    img = Image.open(f'train_data/raw_data/{img_name}.tiff').resize((1536, 1536))
    img = torch.tensor(np.asarray(img))

    tiles = split_image(img, tile_size=(256, 256), stride=(128, 128)).permute((0,2,3,1)).numpy().astype('uint8')

    mask = rle2mask(row['rle'], img_shape=(row['img_height'], row['img_width']))
    mask = transforms_mask(mask.T[None,:,:]).permute((1,2,0))

    mask_tiles = split_image(mask, tile_size=(256, 256), stride=(128, 128)).permute((0,2,3,1))
    
    for i in range(tiles.shape[0]):

        tile_name = f'{img_name}_{i}'
        rle = mask2rle(mask_tiles[i])
        
        labels[row['train']]['images'].append({
            'name': tile_name,
            'path': tile_name+'.jpg',
            'labels':
            [
                {
                    'class': row['organ'],
                    'rle': rle
                }
            ]
        })
        Image.fromarray(tiles[i]).save(f"train_data/processed_data/{row['train']}/{tile_name}.jpg")

        
labels['train']['data_description']['num_images'] = len(labels['train']['images'])
labels['val']['data_description']['num_images'] = len(labels['val']['images'])


with open(f"train_data/processed_data/train/labels.json", 'w') as f:
    json.dump(labels['train'], f)
    
with open(f"train_data/processed_data/val/labels.json", 'w') as f:
    json.dump(labels['val'], f)