In [26]:
#import statements 
import pydicom
import os
import numpy as np
import matplotlib.pyplot as plt
import random
import uuid
import cv2
import imgaug as ia

#set instance variables
path_pd = "/Users/anacismaru/asr_mac/pd"
path_control = "/Users/anacismaru/asr_mac/control"
PX_SIZE_2D = 64
PX_SIZE_3D = 64

#matrices that contain numpy arrays of pixel data
images_pd = upload_dicom(path_pd)
images_control = upload_dicom(path_control)

#Flip Images
flipped_pd = flip(images_pd)
flipped_control = flip(images_control)
print(flipped_pd[0].shape)

#normalize images
n_images_pd = normalize(flipped_pd)
n_images_control = normalize(flipped_control)

#determine test and train sets
test_pd, train_pd = divide_arrays(n_images_pd)
test_control, train_control = divide_arrays(n_images_control)

#resize 3D images here
threeD_resized_test_pd = resize_3D(test_pd, PX_SIZE_2D, PX_SIZE_2D, PX_SIZE_3D)
threeD_resized_train_pd = resize_3D(train_pd, PX_SIZE_2D, PX_SIZE_2D, PX_SIZE_3D)
threeD_resized_test_control = resize_3D(test_control, PX_SIZE_2D, PX_SIZE_2D, PX_SIZE_3D)
threeD_resized_train_control = resize_3D(train_control, PX_SIZE_2D, PX_SIZE_2D, PX_SIZE_3D)

#split images into slices for 2D portion of algorithm
slice_test_pd = slice_dicom(test_pd)
slice_train_pd = slice_dicom(train_pd)
slice_test_control = slice_dicom(test_control)
slice_train_control = slice_dicom(train_control)

#create mean image from slices
mean_test_pd = mean_image(slice_test_pd)
mean_train_pd = mean_image(slice_train_pd)
mean_test_control = mean_image(slice_test_control)
mean_train_control = mean_image(slice_train_control)

#resize 2D images
twoD_resized_test_pd = resize_2D(mean_test_pd, PX_SIZE_2D, PX_SIZE_2D)
twoD_resized_train_pd = resize_2D(mean_train_pd, PX_SIZE_2D, PX_SIZE_2D)
twoD_resized_test_control = resize_2D(mean_test_control, PX_SIZE_2D, PX_SIZE_2D)
twoD_resized_train_control = resize_2D(mean_train_control, PX_SIZE_2D, PX_SIZE_2D)

#add grayscale channel
twoD_test_pd = grayscale_2D(twoD_resized_test_pd)
twoD_train_pd = grayscale_2D(twoD_resized_train_pd)
twoD_test_control = grayscale_2D(twoD_resized_test_control)
twoD_train_control = grayscale_2D(twoD_resized_train_control)

threeD_test_pd = grayscale_3D(threeD_resized_test_pd)
threeD_train_pd = grayscale_3D(threeD_resized_train_pd)
threeD_test_control = grayscale_3D(threeD_resized_test_control)
threeD_train_control = grayscale_3D(threeD_resized_train_control)

#create version directory
current_directory = os.getcwd()
version_directory = os.path.join(current_directory, r'preprocessed_data_v3')
if not os.path.exists(version_directory):
    os.makedirs(version_directory)
#create 2D/3D directory
threeD_directory = os.path.join(version_directory, r'3D')
if not os.path.exists(threeD_directory):
    os.makedirs(threeD_directory)
twoD_directory = os.path.join(version_directory, r'2D')
if not os.path.exists(twoD_directory):
    os.makedirs(twoD_directory)

#Create class directories
threeD_train_pd_dir = os.path.join(threeD_directory, r'train/pd')
if not os.path.exists(threeD_train_pd_dir):
    os.makedirs(threeD_train_pd_dir)
threeD_test_pd_dir = os.path.join(threeD_directory, r'test/pd')
if not os.path.exists(threeD_test_pd_dir):
    os.makedirs(threeD_test_pd_dir)

twoD_train_pd_dir = os.path.join(twoD_directory, r'train/pd')
if not os.path.exists(twoD_train_pd_dir):
    os.makedirs(twoD_train_pd_dir)
twoD_test_pd_dir = os.path.join(twoD_directory, r'test/pd')
if not os.path.exists(twoD_test_pd_dir):
    os.makedirs(twoD_test_pd_dir)
    
threeD_train_control_dir = os.path.join(threeD_directory, r'train/control')
if not os.path.exists(threeD_train_control_dir):
    os.makedirs(threeD_train_control_dir)
threeD_test_control_dir = os.path.join(threeD_directory, r'test/control')
if not os.path.exists(threeD_test_control_dir):
    os.makedirs(threeD_test_control_dir)
    
twoD_train_control_dir = os.path.join(twoD_directory, r'train/control')
if not os.path.exists(twoD_train_control_dir):
    os.makedirs(twoD_train_control_dir)
twoD_test_control_dir = os.path.join(twoD_directory, r'test/control')
if not os.path.exists(twoD_test_control_dir):
    os.makedirs(twoD_test_control_dir)

#Save all numpy images in respective folders
for image in threeD_test_pd:
    save_file(image, threeD_test_pd_dir)
for image in threeD_test_control:
    save_file(image, threeD_test_control_dir)
for image in threeD_train_pd:
    save_file(image, threeD_train_pd_dir)
for image in threeD_train_control:
    save_file(image, threeD_train_control_dir)

for image in twoD_test_pd:
    save_file(image, twoD_test_pd_dir)
for image in twoD_test_control:
    save_file(image, twoD_test_control_dir)
for image in twoD_train_pd:
    save_file(image, twoD_train_pd_dir)
for image in twoD_train_control:
    save_file(image, twoD_train_control_dir)
    
    


(91, 109, 91)
/f07aba15-3a4b-4a75-bbe9-e73049647883
/b56bddb6-3117-4a48-9383-948f7eb36a21
/4ffeec39-b56a-443c-809b-45414f261d2e
/9902829b-c83b-4dd7-b6ac-5a65bcc34b94
/7a0aa8c4-e183-4003-9d82-9a328cd6840c
/a5abdc86-ca46-4d59-837e-e77f39c17e01
/5bb2bfcb-e227-405d-94a7-b8559dcfe363
/2762ca2e-d5cb-44d0-a96c-7ce3a479db26
/124440d6-1528-4625-a52a-809114bbb09f
/3f4fcb7b-2f71-41b7-b0f8-848aebc167d2
/36a860ad-38c1-4330-8ec1-aaabc1babe45
/dfa791cd-5c64-4d35-9134-433820ef5842
/e39dfcaa-ac43-4ea3-8199-4ac863935635
/ea5430ed-9617-4afb-ac2d-28cf641bd035
/2286f557-9d9c-41a4-8320-bdd4f6e297d8
/6ae18790-21cb-4ca2-b6e1-20becc942f2d
/24e0bc9c-61ce-4851-8444-16573d16bcd9
/41e43f90-c518-46a3-889e-9f475bdf0500
/b063c815-1dd8-4045-a9c8-fc0498d697c1
/2727695f-d9d0-4027-a21a-59193fc05f81
/bc7ab7a9-3d57-4e9e-b793-fc82ae4bf484
/91f57f4a-40a0-4683-bc8a-5fe986b7937c
/27f50741-dece-4251-b0bf-98053cc978a2
/00a30439-fb7e-489c-bca2-c40a791e0f80
/d611272a-89e1-435f-95a8-c844f497dcf7
/d45a6d06-bf3c-4f4c-a23b-1d42387d114

/ccf879c4-be83-4450-9359-0f31d0ff3f1c
/30c624a9-437b-4e2f-91f4-a7e4e3c97bda
/30ad3b03-7760-4adb-8648-fa407f9c0dee
/3e5b4496-e5c1-499d-a835-4c0e272451a9
/e8da70ae-1928-4916-8440-b64e010058df
/498fc03b-5b5a-4553-84b3-37d4b68cc552
/3dd96156-959c-4535-a17a-33dcf94bddae
/664858b2-77e3-4116-b46e-baa914c3297d
/f79b5388-8d90-4755-818a-653b0f606950
/efc02443-2de7-4be8-9fc2-f2d74ef22926
/9e56d32e-9b60-453c-a4f8-ae11d1598171
/c1ddaeb5-d872-4061-b0fc-3de0c7bc7266
/6dc5ecc2-c6a9-4a7c-8d73-0e49c75dc02c
/292a536a-50f3-4e80-a2a7-e048825e250b
/db41dded-f339-4fa7-98bd-7717a5b2083d
/db41445a-da7f-4121-a044-a2dd9fdd7bfa
/e86036b5-3399-4b0f-b657-8fef89112703
/ff317434-a04a-4bae-8e79-de4883632a90
/1cc80b51-27cd-4cc7-bad5-e1e54bf89d22
/b03509cf-7f7b-44c6-96af-955e956751f6
/dbe615db-38ab-4725-b9bf-bddae6fc21c3
/cd3a2a8f-b13d-4f42-9a0d-10fc3397d62c
/f06c5dbb-0eb2-4f81-8a72-5bfb455686a9
/4dd1661b-3be1-49d8-9a4a-fccd1840f501
/3c936848-c3e5-44d6-a47f-5560a8b14d1d
/4646b87a-477c-477b-b4ad-4e3679ea0d03
/1dbf6d11-bd

/9efde776-72ec-4623-8ff4-e251d19514b3
/f6287ca1-9282-43e9-9f9b-f96555677df7
/60a8148f-2ab6-45c6-9125-2269e205f42c
/e9f65718-e64f-463b-9e6a-c3e1aa2dae68
/f05689cb-e689-4d5b-8529-68f16e6ead8a
/f5d1b98c-6c3a-4659-9b3e-00b06e0efde0
/91254c37-417c-47e3-b6ba-7b958d352833
/2f8dec71-2dc1-4509-8e3d-de2feac94125
/b31c3498-670b-4ac6-8754-56d72758cf55
/ebbbbb98-2d20-4224-b775-b35d193a9932
/6f7426b0-8ca7-463a-bf77-ffac968b8542
/1d1648e1-f478-4c0d-931f-d7ada3a6c659
/509397ff-ab3b-458b-a33d-ccf687ddb5d7
/3f58e7f7-ca9c-4d82-9315-f7c6c2f9bc67
/a92cd058-6d0c-482e-a6f1-b21aa3456195
/89b1ba1d-bcc3-45bd-8aef-2ef25de85748
/07df5605-dced-48bb-a914-da394acde93a
/8c8293e0-9e5f-4ca2-890a-f5e6ee12d11e
/6813e71d-0a79-45b0-a73c-448c1584bc07
/fcb435c2-0ec6-47bf-92d3-d4c10b19f877
/4365c3a8-cf98-4624-a9bf-fd189245229d
/2605d68e-fe72-4d6f-aea5-f71f191b4cf8
/2b0cb60d-592e-4c81-9f0f-0e55d08f0f27
/2879882b-fed0-410c-a5ef-a9ec93a7f5b8
/be15263f-a201-4838-a421-c6617c9f4773
/6b934597-81c1-4826-9816-04186274dce2
/4341e462-c1

/cb9f57b4-9d7c-4b2b-b649-bcc4c0f4a326
/9dabe018-80cf-4722-8a3c-8a8c5e31ef14
/bd1fc241-0f90-4e0d-baf0-8d254d831112
/01e37a19-6762-45b5-8d54-f56ac4697541
/5c414f5e-3d10-4b0d-a814-1734840e929b
/96678dd9-b4e9-46dc-bfba-1fe953e990b9
/b98a47ae-a46b-4701-bf78-dce2224f8ced
/e1dc6dac-767f-437d-9328-a125e81f43e3
/57bb2e9c-0e63-4c71-a5c6-fb86451189e0
/aae45a25-9376-40b6-8971-f2bc8eca2286
/cb7f9e73-1345-43bb-aede-6a3b1c3e3aef
/c23bd931-c916-4319-973a-036885b691d0
/ab12db61-8161-48a4-8a6c-1892e1a2e2ec
/470bdc59-afa1-4e69-a2a1-67ac9dc24fcb
/d9dca2ef-6ad6-4e69-94e9-3d3ae20c9dfe
/98f9afe3-c888-4e95-abd5-8b04941d4ff9
/145b66b7-15f4-47a3-944a-8043007afcb6
/13e25908-8620-41a1-aad6-5213127a210b
/ab00249c-fb4c-4848-ad93-482708feb9cf
/685926ba-85c4-4bf1-86ec-8793981f9a71
/a0969576-483f-4459-a292-3b4a58f6861b
/b8ade25c-cfef-4af8-9d47-5fc40aae2669
/d0cd77d2-17f6-43dc-bcfc-cf967016bd42
/2be3221b-82bb-48f7-811d-8d77a1109368
/fcb5a087-8050-4464-acb5-2a77025723b9
/9cf05e2f-ad16-455a-a164-2955169e5f99
/2f24afb9-02

/36e516d3-2e28-4e44-8e75-2a90b40e2526
/e79b2b8e-26ef-41ed-bbcf-ad9d87d4284a
/b86d2a42-0e67-4b90-8520-531f2b954cbd
/1f0668af-dfc3-4d5e-8826-8df61210ec0c
/ae0113d2-7fbc-456f-9de6-a2b8059c08fa
/1562a2a9-10fe-4082-bcbe-b04365fb7680
/466ffac6-b7c4-439a-a57a-841751022689
/fa3579c5-265c-413f-b51b-13e45e4a4b75
/5eb813b8-3337-49e4-a971-786007269419
/d49634b4-2484-4518-96df-ee4f8d88c581
/317ad43f-818c-4b30-b32b-8d622b4d6ebe
/98972b1f-c813-4239-9735-e6c239b7b342
/c4d02bbe-eb77-4d8b-a9f1-ec17dc4ae749
/cfda1a3a-8a6c-48f1-8fde-d62ba48789a5
/50325952-6202-448c-88e1-cc95c53a0760
/ebb695f1-dbe3-4734-8851-dcdef0859ebf
/228335a2-177c-459a-90c2-304293693ac4
/3ae21605-dc52-43b4-94fe-3fe754b63f44
/c494bce8-4282-47f3-bc71-face74462202
/cb8abb18-9100-4b0d-b838-60dcc9c29980
/4d009f5a-97e6-40ad-9df1-027a806634ba
/c4d5d0d6-7438-43ae-be8e-b022adec3f0e
/fa2463e7-dce6-4d10-b825-fa63b892f981
/4f9d5c9b-00a0-4531-9ca3-e3c3508a6e39
/8da59b21-4812-446d-ab5c-7f67bcf022ce
/b4ead7c7-d7a8-4448-b535-2044b6b6f53c
/16160e48-e3

/d8b2ae0e-5ee0-4a88-8834-e2ce9b3b7569
/e9295627-5f36-411e-ae25-3a6554dcf3f0
/f7a162cd-4a7a-4f37-bdda-3e10ece5eeb9
/c935a417-2eb6-4c7f-966e-bf725b351058
/0a0da6ab-b052-46dd-8764-4e367a32aad0
/da490a36-40e1-492a-904b-080dee925806
/fd731c19-d796-4cde-99e3-e354f4bb9c53
/f8c16491-949c-4497-a6d6-f7c97909dc77
/be294e5f-778e-4bb2-9261-55a950bb13c3
/1cf27046-9b93-410a-9c50-d1f926530778
/becffb0d-6df3-418b-b360-b6d0ade9ae5c
/99b7869a-cc6e-4be7-8b83-67ecd139a017
/3df5b3f3-3189-48bd-98f0-995563280964
/be1e4a80-2dd0-41a7-8c00-4946765ec961
/a2e1895e-3fb5-4cb0-9902-8e679d76c469
/9a6e4585-10e8-4918-a4e7-eb2dfab942c3
/8911c3db-57f2-411e-8207-44523a5ce7b9
/e440d5ed-3b0a-4f2d-89e1-5b1f9352b967
/6b726580-0951-40a3-ae4b-b84b5fdea9e4
/338c7e44-fa75-4a70-94d9-3a17f23c932b
/e9c81029-3973-4969-b096-6d40e4b50cc8
/a3217add-3818-44d2-b61b-a2c364bc2df1
/71b507d4-65a4-4a29-b46a-a8d29799740e
/8125e535-03a0-4d03-8a10-21e87087b55d
/33df3155-9188-43bb-8b1e-2e14cf212466
/034da15e-bfee-4c0a-9541-07aef97d3d04
/0a6319c9-1e

In [1]:
#add grayscale channel to 2D image
def grayscale_2D(image_array):
    grayscale_images = []
    for image in image_array:
        new_image = np.expand_dims(image, axis=2)
        grayscale_images.append(new_image)
    return grayscale_images

In [2]:
#add grayscale channel to 3D image
def grayscale_3D(image_array):
    grayscale_images = []
    for image in image_array:
        new_image = np.expand_dims(image, axis=3)
        grayscale_images.append(new_image)
    return grayscale_images

In [3]:
#checked and good
def upload_dicom(path):
    PathDicom = path
    lstFilesDCM = []  # create an empty list
    for dirName, subdirList, fileList in os.walk(PathDicom):
        for filename in fileList:
            if ".dcm" in filename.lower():  # check whether the file's DICOM
                lstFilesDCM.append(os.path.join(dirName,filename))
    #print(lstFilesDCM[0])   

    #make a array to store all the 3D dicom images, should be 702 long by the end of the program
    images = []

    # loop through all the DICOM files
    for filenameDCM in lstFilesDCM:   
        # read the file
        ds = pydicom.dcmread(filenameDCM)
        # store the raw image data
        pixel_data = ds.pixel_array
        #convert pixel data to numpy array for one image
        array = pixel_data 
        #append to larger list of images
        images.append(array)
    #return array with dicom files
    return images

In [4]:
#split array into 70% train and 30% test (good)
def divide_arrays(array):
    #determine length of test and train array
    length_array = len(array)
    seventy = round(length_array*0.7)
    thirty = round(length_array*0.3)
    #determine the thirty indexes that will be removed from array and placed into the test_array
    removed_index = random.sample(range(0, len(array)-1), thirty)
    #remove each image at index from array and place into test_array
    test_array = []
    train_array = []
    for index, item in enumerate(array):
        if index in removed_index: #if the index of item appears in removed
            test_array.append(item)
        else:
            train_array.append(item)
    return test_array, train_array
        
    

In [5]:
#split dicom images into slices (good)
def slice_dicom(array):
    twoD = []
    for image in array:
        slices = []
        for z in range(image.shape[0] - 1):
            slices.append(image[z,:,:])
        twoD.append(slices)     
    return twoD
              

In [6]:
#normalize images (cool and good)
def normalize(array):
    array_minmax = []
    for image in array:
        max_value, min_value = image.max(), image.min()
        minmax = (image - min_value)/(max_value - min_value)
        array_minmax.append(minmax)
    return array_minmax

In [7]:
#select slices 35 to 48 from 3D images and create a mean image (cool and good)
def mean_image(array_images):
    mean_images = []
    for image in array_images:
        mean_image = image[34]  #2D
        num_images = 1
        for z in range (35, 47): #check this index is right and doesn't need a new index
            mean_image = mean_image+image[z]
            num_images = num_images + 1
        mean_image = mean_image/num_images
        mean_images.append(mean_image)
    return mean_images

In [8]:
#show slices 30-50 in one figure; slices need to be split beforehand(cool and good) 
def plot_slices(slices):
    fig = plt.figure()
    for num,each_slice in enumerate(slices[30:50]):
        y = fig.add_subplot(4,5, num+1)
        #to resize (if necessary): new_image = cv2.resize(np.array(each_slice.pixel_array),(IMG_PX_SIZE, IMG_PX_SIZE))
        y.imshow(each_slice)  #use , cmap='gray' in imshow() to see in grayscale
    plt.show()

In [9]:
#to label images as [0,1] for parkinson's and [1,0] for control (cool and good)
def label_images(array_images, label):
    labeled_slices = []
    for image in array_images:
        if label == 1:
            labeled_slices.append([image, [0,1]])
        elif label == 0: 
            labeled_slices.append([image, [1,0]])
    return labeled_slices

In [10]:
#saves file as numpy
def save_file(image, directory):
    unique = uuid.uuid4()
    file_name = "/" + str(unique)
    print(file_name)
    np.save(directory + file_name, image)

In [11]:
def resize_2D(array_images, pixel_length, pixel_width):
    resized_images = []
    for image in array_images:
        new_img = cv2.resize(image,(pixel_length,pixel_width))
        resized_images.append(new_img)
    return resized_images


In [12]:
import skimage.transform
def resize_3D(array_images, pixel_length, pixel_width, pixel_height):
    resized_images = []
    for image in array_images:
        new_img = skimage.transform.resize(image,(pixel_length,pixel_width,pixel_height))
        resized_images.append(new_img)
    return resized_images

In [25]:
def flip(array_images):
    flipped = []
    for image in array_images:
        flip = image = np.flip(image, axis=2)
        flipped.append(flip)
    return flipped