#Construct Custum Dataset



In [1]:
!pip install -q pydicom numpy matplotlib Pillow
import os
import cv2
import numpy as np
import pydicom
from PIL import Image
import matplotlib.pyplot as plt

import torch
import pandas as pd
from skimage import io, transform
import matplotlib.pyplot as plt
from torch.utils.data import Dataset, DataLoader, random_split
from torchvision import transforms, utils
import random

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.8/1.8 MB[0m [31m9.3 MB/s[0m eta [36m0:00:00[0m
[?25h

In [2]:
def normalize_image(image):
    """
    Normalize image to the range [0, 1].
    """
    image = image - np.min(image)
    return image / np.max(image)

def GrayScaleToBlueToRedColor(intensity, norm_value):
    """
    intensity: pixel intensity value
    norm_value: max value (e.g. 2^8-1)
    """
    value = 4.0 * (float(intensity) / float(norm_value)) + 1

    return (
        norm_value * np.max([0.0, (3.0 - abs(value - 4) - abs(value - 5)) / 2]),
        norm_value * np.max([0.0, (4.0 - abs(value - 2) - abs(value - 4)) / 2]),
        norm_value * np.max([0.0, (3.0 - abs(value - 1) - abs(value - 2)) / 2]),
    )

def GrayImageToColorImage(image):
    """
    image:grayscale image
    """
    image = normalize_image(image)
    colored_image = np.zeros([image.shape[0], image.shape[1], 3], dtype=np.uint8)
    Imax = np.max(image.ravel())

    for i in range(0, image.shape[0]):
        for j in range(0, image.shape[1]):
            sRGB = GrayScaleToBlueToRedColor(image[i, j], Imax)
            colored_image[i, j, 2] = np.floor(sRGB[0]).astype(int)
            colored_image[i, j, 1] = np.floor(sRGB[1]).astype(int)
            colored_image[i, j, 0] = np.floor(sRGB[2]).astype(int)

    return colored_image

In [9]:
# shape of patient's age is: 33Y, so we need to convert to int
def string2age(dcm):
  age_list = list(dcm.PatientAge)
  age_list = ''.join(age_list[:-1])
  return int(age_list)

def create_age_mark(based_dir):
  age_mark = []
  for patient_file in os.listdir(based_dir):
    patient_dir = os.path.join(based_dir, patient_file)
    dcm_path = os.path.join(patient_dir, os.listdir(patient_dir)[0])
    dcm = pydicom.read_file(dcm_path, force= True)
    age_mark.append(string2age(dcm))
  return age_mark

def create_3D_slice(patient_dir):
  patient_slice = []

  index_list = random.sample(range(len(os.listdir(patient_dir))),30)

  for index in index_list:
    dcm_file = os.listdir(patient_dir)[index]
    dcm_path = os.path.join(patient_dir, dcm_file)
    dcm = pydicom.read_file(dcm_path, force= True)
    patient_slice.append(dcm)

  patient_slice = sorted(patient_slice, key=lambda s: s.SliceLocation)  # sort slices by location

  img_shape = [224,224,3]
  #img_shape.append(3)
  img_shape.append(len(patient_slice))
  slice_3d = np.zeros(img_shape)

  for i in range(len(patient_slice)):       ## Sample slices with step=3
      gray_image = Image.fromarray(patient_slice[i].pixel_array)
      if gray_image.mode == 'I;16':
        gray_image = gray_image.convert('I')
      resize_image = transforms.Resize((224,224))(gray_image)
      resize_image_array = np.array(resize_image)
      slice_3d[:,:,:,i] = GrayImageToColorImage(resize_image_array)  ## Resize to (224,224) first

  return slice_3d

In [4]:
from types import NoneType
# to store whole dataset
class PatientDataset(Dataset):
  def __init__(self, based_dir, transform=None):
    self.based_dir = based_dir
    self.transform = transform
    self.ageMark = create_age_mark(based_dir)

  def __len__(self):
    return len(self.ageMark)

  def __getitem__(self, index):
    dir_list = os.listdir(self.based_dir)
    patient_dir = os.path.join(self.based_dir, dir_list[index])
    patient_slice = create_3D_slice(patient_dir)
    agemark = self.ageMark[index]
    sample = {'patient': torch.tensor(patient_slice), 'agemark': agemark}
    if self.transform:
      sample['patient'] = self.transform(sample['patient'])
      sample['agemark'] = self.transform(sample['agemark'])
    return sample

In [10]:
img_transform = transforms.Compose([transforms.ToPILImage(),
        transforms.Resize((224,224)),
        transforms.ToTensor()])

patient_dataset = PatientDataset(based_dir="/content/drive/MyDrive/Experiment_Dataset/")
for i, sample in enumerate(patient_dataset):
    print(i, sample['patient'].shape, sample['agemark'])

train_set, valid_set = random_split(patient_dataset, [0.8, 0.2])
train_dataloader = DataLoader(train_set, batch_size=2, shuffle=True)
print(f"Training set size: {len(train_set)}")

0 torch.Size([224, 224, 3, 30]) 78
1 torch.Size([224, 224, 3, 30]) 67
2 torch.Size([224, 224, 3, 30]) 39
3 torch.Size([224, 224, 3, 30]) 51
4 torch.Size([224, 224, 3, 30]) 31
5 torch.Size([224, 224, 3, 30]) 67
6 torch.Size([224, 224, 3, 30]) 28
7 torch.Size([224, 224, 3, 30]) 27
8 torch.Size([224, 224, 3, 30]) 32
9 torch.Size([224, 224, 3, 30]) 51
10 torch.Size([224, 224, 3, 30]) 41
11 torch.Size([224, 224, 3, 30]) 20
Training set size: 10
