<a href="https://colab.research.google.com/github/amalkoodoruth/deep-learning-for-pkd-patients/blob/main/beta_dataset.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# Mounting drive and changing the working directory
from google.colab import drive
drive.mount('/content/drive')


Mounted at /content/drive


In [15]:
my_dir = '/content/drive/MyDrive/MiCM2021-PKD/dataloader_data_kaggle/MR2D/train'
%cd /content/drive/MyDrive/MiCM2021-PKD/dataloader_data_kaggle/MR2D/train

/content/drive/MyDrive/MiCM2021-PKD/dataloader_data_kaggle/MR2D/train


In [4]:
!pip install pydicom

Collecting pydicom
[?25l  Downloading https://files.pythonhosted.org/packages/f4/15/df16546bc59bfca390cf072d473fb2c8acd4231636f64356593a63137e55/pydicom-2.1.2-py3-none-any.whl (1.9MB)
[K     |████████████████████████████████| 1.9MB 4.3MB/s 
[?25hInstalling collected packages: pydicom
Successfully installed pydicom-2.1.2


In [6]:
!pip install pillow



In [8]:
# importing important librarires
import itertools
import matplotlib.pyplot as plt
import numpy as np
import os
import pandas as pd
import pydicom
from PIL import Image

import torch
from torch.utils import data
from torch.autograd import Variable

import sys

In [17]:
my_dir

'/content/drive/MyDrive/MiCM2021-PKD/dataloader_data_kaggle/MR2D/train'

In [23]:
img_paths = []
for dcm in os.listdir(my_dir+'/X'):
  if dcm != ".DS_Store":
    img_paths.append(my_dir + '/X/' + dcm)
img_paths.sort()



In [25]:
seg_paths = []
for seg in os.listdir(my_dir+'/Y'):
  if seg != ".DS_Store":
    seg_paths.append(my_dir + '/Y/' + seg)
seg_paths.sort()


In [114]:
seg_val = []

In [120]:

    # Load the scans in given folder path
def load_scan(path):
    """
    This function is used to load the MRI scans. It converts the scan into a numpy array

      Parameters:
        path (str): The path to the folder containing the MRI scans of all patients

      Returns:
        np_image (numpy.ndarray): A numpy array representing the MRI scan
    """

    # slices = [pydicom.read_file(path + '/' + s) for s in os.listdir(path)]
    # slices.sort(key = lambda x: float(x.ImagePositionPatient[2]))
    # try:
    #     slice_thickness = np.abs(slices[0].ImagePositionPatient[2] - slices[1].ImagePositionPatient[2])
    # except Exception as e:
    #     print("Exception raised: ", e)
    #     slice_thickness = np.abs(slices[0].SliceLocation - slices[1].SliceLocation)
            
    #  for s in slices:
    #     s.SliceThickness = slice_thickness

    #  image = np.stack([s.pixel_array for s in slices])
    image = pydicom.read_file(path)
    # print(type(image))
    image = image.pixel_array.astype(np.int16)
    np_image = np.array(image, dtype=np.int16)
    # print("scan shape: ", np_image.shape)
    return np_image
            
def load_seg(path):
    """
    This function is used to load the segmented image. It returns the image in a numpy array

      Parameters:
        path (str): The directory where all the segmented images corresponding to one patient are stored

      Returns:
        seg_data (numpy.ndarray): A list of numpy arrays corresponding to segmented images
    """
    # seg_paths = []

    # if path[-1] != '/':
    #   path = path + '/'

    # for seg in os.listdir(path):
    #   seg_paths.append(path + seg)

    # seg_paths.sort()

    
    seg = Image.open(path)
    seg_data = np.asarray(seg)
    seg_data = np.array(seg_data)
    # for seg_path in seg_paths:
    #   seg = Image.open(seg_path)
    #   seg_data.append(np.asarray(seg))
    # print("seg shape: ", seg_data.shape)
    for arr in seg_data:
      for elem in arr:
        if (elem not in seg_val):
          seg_val.append(elem)
    return seg_data

        
def resize_data(data, new_dimensions):
    '''
    This function resizes a numpy array.
    TO DO: method used for interpolation?

      Parameters:
        data (numpy.ndarray): a numpy array representing an MRI scan
        new_dimensions (list): a list containing the dimensions of the new scan [z,x,y]

      Returns:
        new_data (numpy.ndarray): a numpy array with the desired dimensions
    '''
    initial_size_x = data.shape[1]
    initial_size_y = data.shape[2]
    initial_size_z = data.shape[0]

    new_size_z = new_dimensions[0]
    new_size_x = new_dimensions[1]
    new_size_y = new_dimensions[2]
          
    delta_x = initial_size_x / new_size_x
    delta_y = initial_size_y / new_size_y
    delta_z = initial_size_z / new_size_z

    new_data = np.zeros((new_size_z, new_size_x, new_size_y))

    for x, y, z in itertools.product(range(new_size_x),
                                          range(new_size_y),
                                          range(new_size_z)):
      new_data[z][x][y] = data[int(z * delta_z)][int(x * delta_x)][int(y * delta_y)]

    return new_data

def padSlice(values):
    print(values.shape)
    target_shape = np.array((320, 320))
    pad = ((target_shape - values.shape) / 2).astype("int")

    values = np.pad(values, ((pad[0], pad[0]), (pad[1], pad[1])), mode="constant", constant_values = 0)

    return values

In [121]:
class SliceDataset(data.Dataset):

    ##
    # img_paths is list of paths to intensity images
    # seg_paths is list of paths to segmentation images, define as None if no segmentations exist
    # sigma is deformation intensity, points the number of coordinates for grid deformation

    ## 
    # image path is path of X
    # seg path is path of Y
    def __init__(self, img_paths, seg_paths):

        self.seg_paths = seg_paths
        self.img_paths = img_paths

    def __len__(self):
        return len(self.img_paths)


    def __getitem__(self, index):
        """
        This function is used to retrieve one scan and its corresponding segmented
        image from a dataset, if it exists. The scan and the segmented image are 
        converted into numpy arrays that have the dimensions specified in the constructor.

          Parameters:
            index (int): the index of the scan we want to retrieve. It is in the range [0, size of dataset)

          Returns:
            X (numpy.ndarray): training sample of size new_dimensions
            Y (numpy.ndarray): segmented image if it exists (will exist if in training set). Else, array of 0s
              
        """
        img_path = self.img_paths[index]
        img = load_scan(img_path)
        img_resized = padSlice(img)

        seg_exists = len(self.seg_paths) > 0

        if seg_exists:
          seg_path = self.seg_paths[index]
          seg = load_seg(seg_path)
          seg_resized = padSlice(seg)

        # Convert images to pytorch tensors
        ## why .float() and .long()
        X = Variable(torch.from_numpy(img_resized)).float()
    
        if seg_exists:
            Y = Variable(torch.from_numpy(seg_resized)).long()

        else:
            Y = torch.zeros(1) # dummy segmentation
          
        name = os.path.basename(self.img_paths[index])

        return X, Y, name ## why return name??





In [122]:
Train = SliceDataset(img_paths, seg_paths)

In [123]:
len(img_paths)

623

In [124]:
for i in range(len(img_paths)):
  X, Y, name = Train.__getitem__(i)

(256, 256)
(256, 256)
(256, 256)
(256, 256)
(256, 256)
(256, 256)
(256, 256)
(256, 256)
(256, 256)
(256, 256)
(256, 256)
(256, 256)
(256, 256)
(256, 256)
(256, 256)
(256, 256)
(256, 256)
(256, 256)
(256, 256)
(256, 256)
(256, 256)
(256, 256)
(256, 256)
(256, 256)
(256, 256)
(256, 256)
(256, 256)
(256, 256)
(256, 256)
(256, 256)
(256, 256)
(256, 256)
(256, 256)
(256, 256)
(256, 256)
(256, 256)
(256, 256)
(256, 256)
(256, 256)
(256, 256)
(256, 256)
(256, 256)
(256, 256)
(256, 256)
(256, 256)
(256, 256)
(256, 256)
(256, 256)
(256, 256)
(256, 256)
(256, 256)
(256, 256)
(256, 256)
(256, 256)
(256, 256)
(256, 256)
(256, 256)
(256, 256)
(256, 256)
(256, 256)
(256, 256)
(256, 256)
(256, 256)
(256, 256)
(256, 256)
(256, 256)
(256, 256)
(256, 256)
(256, 256)
(256, 256)
(256, 256)
(256, 256)
(256, 256)
(256, 256)
(256, 256)
(256, 256)
(256, 256)
(256, 256)
(256, 256)
(256, 256)
(256, 256)
(256, 256)
(256, 256)
(256, 256)
(256, 256)
(256, 256)
(256, 256)
(256, 256)
(256, 256)
(256, 256)
(256, 256)

In [112]:
X, Y, name = Train.__getitem__(200)

<class 'pydicom.dataset.FileDataset'>
scan shape:  (320, 320)
(320, 320)
seg shape:  (320, 320)
(320, 320)


In [125]:
seg_val

[0, 63, 252, 189, 126]

In [105]:
y = Y.numpy()

In [106]:
type(y)

numpy.ndarray

In [107]:
mlist = [0]
a = [0,1,2,2,1,1]

for arr in y:
  for element in arr:
    if (element.astype(float) not in mlist):
      mlist.append(element)

In [108]:
mlist

[0, 63, 252, 189]