In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
# for dirname, _, filenames in os.walk('/kaggle/input'):
#     for filename in filenames:
#         print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [2]:
import matplotlib.pyplot as plt
import glob
import os
import time
import tifffile as tiff
import tensorflow as tf



In [3]:
# Run-length Encode and Decode functions

# ref.: https://www.kaggle.com/stainsby/fast-tested-rle
def rle_encode(img):
    '''
    img: numpy array, 1 - mask, 0 - background
    Returns run length as string formated
    '''
    pixels = img.flatten()
    pixels = np.concatenate([[0], pixels, [0]])
    runs = np.where(pixels[1:] != pixels[:-1])[0] + 1
    runs[1::2] -= runs[::2]
    return ' '.join(str(x) for x in runs)
 
def rle_decode(mask_rle, shape):
    '''
    mask_rle: run-length as string formated (start length)
    shape: (height,width) of array to return 
    Returns numpy array, 1 - mask, 0 - background

    '''
    s = mask_rle.split()
    starts, lengths = [np.asarray(x, dtype=int) for x in (s[0:][::2], s[1:][::2])]
    starts -= 1
    ends = starts + lengths
    img = np.zeros(shape[0]*shape[1], dtype=np.uint8)
    for lo, hi in zip(starts, ends):
        img[lo:hi] = 1
    return img.reshape(shape)


In [4]:
data_dir = "/kaggle/input/blood-vessel-segmentation/"

In [5]:
df_train_rles = pd.read_csv(os.path.join(data_dir + "train_rles.csv"))
df_train_rles.head()

Unnamed: 0,id,rle
0,kidney_1_dense_0000,1 0
1,kidney_1_dense_0001,1 0
2,kidney_1_dense_0002,1 0
3,kidney_1_dense_0003,1 0
4,kidney_1_dense_0004,1 0


In [6]:
# df_train_rles[ ["kidney_dataset", "image_no"]] = df_train_rles['id'].str.rsplit(pat='_', n=1, expand=True)
train_data_path = os.path.join(data_dir + "/train")
dataset = os.listdir(train_data_path)
print(dataset)

['kidney_3_dense', 'kidney_1_dense', 'kidney_2', 'kidney_1_voi', 'kidney_3_sparse']


In [7]:
paths = {}
    
for i in range(len(dataset)):
    key = dataset[i]
    value1 = os.path.join(train_data_path, dataset[i] + "/images")
    value2 = os.path.join(train_data_path, dataset[i] + "/labels")
    paths[key] = [value1, value2]

paths['kidney_3_dense'][0] = paths['kidney_3_sparse'][0]
images_path, labels_path = paths['kidney_3_dense']


In [8]:
image_files_kidney_3_dense = sorted([os.path.join(images_path, f) for f in os.listdir(images_path) if f.endswith('.tif')])
label_files_kidney_3_dense = sorted([os.path.join(labels_path, f) for f in os.listdir(labels_path) if f.endswith('.tif')])

In [9]:
l = []
for i in range(len(image_files_kidney_3_dense)):
    for j in range(len(label_files_kidney_3_dense)):
        if os.path.basename(label_files_kidney_3_dense[j]) == os.path.basename(image_files_kidney_3_dense[i]):
            l.append(image_files_kidney_3_dense[i])

image_files_kidney_3_dense = l

In [10]:
from PIL import Image
from IPython.display import display
import matplotlib.image as mpimg

In [11]:
class ImageProcessor(object):
    def __init__(self,image_path):
        self.image_path = image_path
        self.image = self.read_image()
    
    def read_image(self):
        img = mpimg.imread(self.image_path)
        #img = img.astype(np.float64)
        return img
    
    def normalize_image(self):
        image = self.image.astype(np.float64)
        normalized_img = (image-np.mean(image))/np.std(image)
        return normalized_img
    
    def display_images(self, normalized = False, array = False):
        if not array:
            if normalized:
                normalized_img = self.normalize_image()
                plt.imshow(normalized_img)
                plt.title("Normalized Image")
            else:
                plt.imshow(self.image)
                plt.title("Original Image")
            plt.axis("off")
            plt.show()
        
        else:
            if normalized:
                normalized_img = self.normalize_image()
                return normalized_img.astype(np.float64)
            else:
                return self.image.astype(np.float64)


In [12]:

if __name__ == "__main__":
    path = label_files_kidney_3_dense[1]
    img_processor = ImageProcessor(path)
    label = img_processor.display_images(normalized=False, array = True)
    print(list(label.shape))
    r = rle_encode(label)
    print(r)
    rev = rle_decode(r, label.shape)
    
    del r
    del rev
    del label

[1706, 1510]
331377 2 332887 2 352509 3 354018 4 355529 3 357040 1 363120 2 364629 4 366138 6 367647 7 369155 9 370664 11 372174 11 373683 12 375193 12 376703 11 378213 10 379723 9 381233 9 381284 1 382744 6 382793 4 384303 4 385813 5 387323 5 388834 4 390345 3 391856 1 396209 3 397719 3 447509 2 497340 1 498848 6 500357 7 501867 6 503378 4 504888 4 506592 2 508101 4 509611 4 511121 4 512632 3 525966 4 527479 3 528990 3 530501 3 532013 1 538357 3 539868 3 541378 4 541451 2 542889 2 542961 3 544090 1 544471 3 545599 1 545982 2 547108 2 548618 1 550127 2 551635 3 553144 4 554654 3 554725 9 556163 3 556233 13 557671 4 557742 15 558030 2 559181 3 559251 17 559539 4 560690 3 560760 18 561048 5 562199 3 562269 19 562558 6 563708 2 563778 19 564068 6 565217 3 565287 20 565578 6 566796 20 567088 6 568178 1 568306 20 568598 4 569687 2 569815 21 570108 3 571324 21 571586 2 571618 2 572834 21 573096 2 574343 21 574606 3 575852 22 576116 4 577361 22 577626 5 578870 23 579136 5 580380 23 580646 5 5

In [13]:
from torchvision.transforms import ToTensor
from skimage import io
import torch
from torchvision import transforms
from PIL import Image


Two methods to create tensor from tiff images, both are giving different results so I am not sure which one to use.

In [14]:
# image_list = []
# for image in os.listdir(paths['kidney_2'][0]):
#     image_path = os.path.join(paths['kidney_2'][0], image)
#     img = io.imread(image_path)
#     img.size((1024,1024))
#     image_list.append(img)

#     # Convert the list of images to a NumPy array
# img_tensor = np.stack(image_list)

In [15]:
#  def pad_or_truncate(numbers_list, target_length):
#     if len(numbers_list) < target_length:
#         # Pad with zeros
#         numbers_list += [0.0] * (target_length - len(numbers_list))
#     elif len(numbers_list) > target_length:
#         # Truncate
#         numbers_list = numbers_list[:target_length]
#     return numbers_list

# max_length = 268

This is the method to convert label to tensor but I am not sure if its correct or not because the numbers in the encoded label represents position(I guess??)

In [16]:
# label_list = []
# for label in os.listdir(paths['kidney_2'][1]):
#     label_path = image_path = os.path.join(paths['kidney_2'][1], image)
#     lbl = tiff.imread(label_path)
    
#     encoded_label = rle_encode(lbl)
#     numbers_list = [float(num) for num in encoded_label.split()]
#     label_list.append(numbers_list)
    
# label_tensor = torch.tensor(label_list)

In [17]:
def image_to_tensor(path):
    #empty list to hold tensors(placeholder)
    tensor_list = []
    
    for image in os.listdir(path):
        image_path = os.path.join(path, image)
        img = io.imread(image_path)
        img.resize((1024,1024))
        tensor_list.append(img)

    # Convert the list of images to a NumPy array
    img_tensor = np.stack(tensor_list)
    return img_tensor

In [18]:
#Converting images to tensor

images_kidney_1_dense = image_to_tensor(paths['kidney_1_dense'][0])
images_kideny_1_voi = image_to_tensor(paths['kidney_1_voi'][0])
stack_images = np.vstack((images_kidney_1_dense, images_kideny_1_voi))


In [19]:
del images_kidney_1_dense
del images_kideny_1_voi

In [20]:
images_kidney_2 = image_to_tensor(paths['kidney_2'][0])
stack_images = np.vstack((images_kidney_2))

In [21]:
del images_kidney_2

In [22]:
images_kidney_3_dense = image_to_tensor(paths['kidney_3_dense'][0])
images_kidney_3_sparse = image_to_tensor(paths['kidney_3_sparse'][0])
stack_images = np.vstack((images_kidney_3_dense, images_kidney_3_sparse))

In [23]:
del images_kidney_3_dense
del images_kidney_3_sparse

In [24]:
#Converting to masks to tensor

masks_kidney_1_dense = image_to_tensor(paths['kidney_1_dense'][1])
masks_kidney_1_voi = image_to_tensor(paths['kidney_1_voi'][1])
stack_masks = np.vstack((masks_kidney_1_dense, masks_kidney_1_voi))
del masks_kidney_1_dense
del masks_kidney_1_voi

In [25]:
masks_kidney_2 = image_to_tensor(paths['kidney_2'][1])
stack_masks = np.vstack((masks_kidney_2))
del masks_kidney_2

In [26]:
masks_kidney_3_dense = image_to_tensor(paths['kidney_3_dense'][1])
masks_kidney_3_sparse = image_to_tensor(paths['kidney_3_sparse'][1])
stack_masks = np.vstack((masks_kidney_3_dense, masks_kidney_3_sparse))
del masks_kidney_3_dense
del masks_kidney_3_sparse

In [27]:
%whos

Variable                     Type              Data/Info
--------------------------------------------------------
Image                        module            <module 'PIL.Image' from <...>e-packages/PIL/Image.py'>
ImageProcessor               type              <class '__main__.ImageProcessor'>
ToTensor                     type              <class 'torchvision.trans<...>rms.transforms.ToTensor'>
data_dir                     str               /kaggle/input/blood-vessel-segmentation/
dataset                      list              n=5
df_train_rles                DataFrame                                 i<...>\n[7429 rows x 2 columns]
display                      function          <function display at 0x788dcea9ec20>
glob                         module            <module 'glob' from '/opt<...>/lib/python3.10/glob.py'>
i                            int               1034
image_files_kidney_3_dense   list              n=501
image_to_tensor              function          <function image_to

In [28]:
# def label_to_tensor(path):
#     #empty list to hold tensors(placeholder)
#     tensor_list = []
    
#     for label in os.listdir(path):
#         label_path = os.path.join(path, label)
#         lbl = tiff.imread(label_path)
#         lbl.resize((1024, 1024))

#         encoded_label = rle_encode(lbl)
#         numbers_list = [float(num) for num in encoded_label.split()]
#         numbers_list = pad_or_truncate(numbers_list, max_length)
#         tesnor_list.append(numbers_list)      
    
#     #stacking the list of tensor into single tensor
#     label_tesnor = torch.stack(tensor_list)
#     return label_tensor 