In [59]:
#import statements 
import pydicom
import os
import numpy as np
import matplotlib.pyplot as plt
import random
import uuid
import cv2

#set instance variables
path_pd = "/Users/anacismaru/asr_mac/pd"
path_control = "/Users/anacismaru/asr_mac/control"
PX_SIZE_2D = 64
PX_SIZE_3D = 64

#matrices that contain numpy arrays of pixel data
images_pd = upload_dicom(path_pd)
images_control = upload_dicom(path_control)

#normalize images
n_images_pd = normalize(images_pd)
n_images_control = normalize(images_control)

#determine test and train sets
test_pd, train_pd = divide_arrays(n_images_pd)
test_control, train_control = divide_arrays(n_images_control)

#resize 3D images here
threeD_resized_test_pd = resize_3D(test_pd, PX_SIZE_2D, PX_SIZE_2D, PX_SIZE_3D)
threeD_resized_train_pd = resize_3D(train_pd, PX_SIZE_2D, PX_SIZE_2D, PX_SIZE_3D)
threeD_resized_test_control = resize_3D(test_control, PX_SIZE_2D, PX_SIZE_2D, PX_SIZE_3D)
threeD_resized_train_control = resize_3D(train_control, PX_SIZE_2D, PX_SIZE_2D, PX_SIZE_3D)

#split images into slices for 2D portion of algorithm
slice_test_pd = slice_dicom(test_pd)
slice_train_pd = slice_dicom(train_pd)
slice_test_control = slice_dicom(test_control)
slice_train_control = slice_dicom(train_control)

#create mean image from slices
mean_test_pd = mean_image(slice_test_pd)
mean_train_pd = mean_image(slice_train_pd)
mean_test_control = mean_image(slice_test_control)
mean_train_control = mean_image(slice_train_control)

#resize 2D images
twoD_resized_test_pd = resize_2D(mean_test_pd, PX_SIZE_2D, PX_SIZE_2D)
twoD_resized_train_pd = resize_2D(mean_train_pd, PX_SIZE_2D, PX_SIZE_2D)
twoD_resized_test_control = resize_2D(mean_test_control, PX_SIZE_2D, PX_SIZE_2D)
twoD_resized_train_control = resize_2D(mean_train_control, PX_SIZE_2D, PX_SIZE_2D)

#add grayscale channel
twoD_test_pd = grayscale_2D(twoD_resized_test_pd)
twoD_train_pd = grayscale_2D(twoD_resized_train_pd)
twoD_test_control = grayscale_2D(twoD_resized_test_control)
twoD_train_control = grayscale_2D(twoD_resized_train_control)

threeD_test_pd = grayscale_3D(threeD_resized_test_pd)
threeD_train_pd = grayscale_3D(threeD_resized_train_pd)
threeD_test_control = grayscale_3D(threeD_resized_test_control)
threeD_train_control = grayscale_3D(threeD_resized_train_control)

#create version directory
current_directory = os.getcwd()
version_directory = os.path.join(current_directory, r'preprocessed_data_v3')
if not os.path.exists(version_directory):
    os.makedirs(version_directory)
#create 2D/3D directory
threeD_directory = os.path.join(version_directory, r'3D')
if not os.path.exists(threeD_directory):
    os.makedirs(threeD_directory)
twoD_directory = os.path.join(version_directory, r'2D')
if not os.path.exists(twoD_directory):
    os.makedirs(twoD_directory)

#Create class directories
threeD_train_pd_dir = os.path.join(threeD_directory, r'train/pd')
if not os.path.exists(threeD_train_pd_dir):
    os.makedirs(threeD_train_pd_dir)
threeD_test_pd_dir = os.path.join(threeD_directory, r'test/pd')
if not os.path.exists(threeD_test_pd_dir):
    os.makedirs(threeD_test_pd_dir)

twoD_train_pd_dir = os.path.join(twoD_directory, r'train/pd')
if not os.path.exists(twoD_train_pd_dir):
    os.makedirs(twoD_train_pd_dir)
twoD_test_pd_dir = os.path.join(twoD_directory, r'test/pd')
if not os.path.exists(twoD_test_pd_dir):
    os.makedirs(twoD_test_pd_dir)
    
threeD_train_control_dir = os.path.join(threeD_directory, r'train/control')
if not os.path.exists(threeD_train_control_dir):
    os.makedirs(threeD_train_control_dir)
threeD_test_control_dir = os.path.join(threeD_directory, r'test/control')
if not os.path.exists(threeD_test_control_dir):
    os.makedirs(threeD_test_control_dir)
    
twoD_train_control_dir = os.path.join(twoD_directory, r'train/control')
if not os.path.exists(twoD_train_control_dir):
    os.makedirs(twoD_train_control_dir)
twoD_test_control_dir = os.path.join(twoD_directory, r'test/control')
if not os.path.exists(twoD_test_control_dir):
    os.makedirs(twoD_test_control_dir)

#Save all numpy images in respective folders
for image in threeD_test_pd:
    save_file(image, threeD_test_pd_dir)
for image in threeD_test_control:
    save_file(image, threeD_test_control_dir)
for image in threeD_train_pd:
    save_file(image, threeD_train_pd_dir)
for image in threeD_train_control:
    save_file(image, threeD_train_control_dir)

for image in twoD_test_pd:
    save_file(image, twoD_test_pd_dir)
for image in twoD_test_control:
    save_file(image, twoD_test_control_dir)
for image in twoD_train_pd:
    save_file(image, twoD_train_pd_dir)
for image in twoD_train_control:
    save_file(image, twoD_train_control_dir)
    
    


/d6a0f8e9-50ae-4270-9f38-06884c1ef652
/cd37fdb5-efef-41da-b079-3e473399fd82
/66cfb7f2-961f-4239-8b16-63e8694395e7
/14caa0e1-a890-420c-887b-455b48251b92
/b821e2fb-5fd7-426d-a7b6-984521b63c82
/f0e8130c-5419-418c-960b-a84eda51f5b0
/a7e8ab3c-4b60-437a-9ad7-6315737b26ef
/cbc6da08-2294-403c-bdbf-e913815a611c
/3e6a39ee-21b8-4ec3-b52f-17be75cb5cc4
/5f233024-e94d-4037-a00f-07c5ffcc7b4a
/eb5bda0a-ac27-4e9f-8f3c-a9d0b1fdc904
/865f1a40-809c-42be-923a-8b24b868b36c
/7c828875-a60b-474c-acb9-37df758f65b0
/6681dc3b-8071-4cd8-84e0-6ecf3ed2ddf3
/591709de-ca54-471e-a398-1bec6ebc28ef
/c6a471d4-4de7-4918-ae4a-4f38679eead5
/3c399b41-d9fd-4c82-a72a-c96fd40deb52
/51a440e7-a275-488b-9ab6-9c797ad9b26a
/dcb2fbb6-2b8f-4265-aeea-6279a273175a
/94f657c2-3b0f-4966-bdf0-811c9fd1b721
/342379b0-713a-4ff0-af98-ec41633d1c1f
/473ea747-27d2-477d-9cc8-e73759f02f29
/e45055d0-9878-479d-926e-0513be4f2dd6
/ec005dd0-8909-4489-a0ee-7722f756a848
/58dd6b72-a020-4959-860a-8417d46cdec1
/0aa2cee6-a2a2-4a17-be9c-0dcd9163cc95
/30d604a5-ef

/065230c3-7fa6-4e4a-ba83-f8e199c62cbc
/9d5fbf7d-9d0b-4175-9a67-dd05bbf0403d
/1e887755-f5f6-4ab7-b3be-1b01c99d996e
/a5c136a8-ee4d-4b04-b7aa-0bcf6b1c84cc
/e510b961-d780-41ca-b19e-eb85e91039c2
/4fc1b860-d7ad-48af-acaa-531c08e7a6c5
/9c4ee329-c8d1-456f-a668-29d5b4b0c4cf
/c2319e1b-1c7f-485f-bc24-d085b033db86
/36609a31-8302-4e21-a554-17f936dc26a6
/e714462b-37c5-4e80-9511-151be7572f15
/c2ec25df-0ea9-420c-b1ec-10d78f881d8a
/315e3c91-343c-4691-829e-d9d3341e34c6
/252411c8-be5d-4ca1-83b6-1227f1bfa775
/6b9c0c60-d8cc-46bd-8b69-f4af7ad22220
/b03f1e61-6633-455e-a2bd-6d9b6f7811cc
/c6f9b22f-bb29-4b31-a719-fecfc0bbc16c
/6259d44d-8718-404e-b1ec-58b4eb1c23c6
/a3c033f4-0a77-4ff2-84fc-cd49649a8713
/fb0d9f22-d08e-40d5-a026-bcddc62b4808
/fb515b71-6e9f-464f-9c10-7a715e2e2e6f
/c5f80fae-5b8f-409f-8608-4bca5b2a6299
/5dfee0bb-d70d-4c0e-8617-8524b1a852c3
/418cfdab-425f-4e9b-9cd7-9b3cf4a6db19
/17808cdf-0b83-4740-a5d8-f7e9d7589e2e
/becf4c5f-c063-4663-ba11-be1b219a3eee
/cc280e74-9953-400e-a257-69e567da3779
/ea8904ca-8d

/c77d27ed-36bb-4e56-834b-a5518b1a87aa
/eac38edf-7cb9-4233-8a7e-704e334c3f93
/a4db938a-144d-4f20-a35d-41809825d58c
/732e4d96-df4d-4392-8a20-93d2439783ed
/4c27388f-c595-4233-b5c0-0836f71cd71e
/07ed6d90-6023-48a6-93c1-2d4479a7606d
/f70e09ef-0046-4f22-a160-019b4f503c82
/e5daf482-401d-4b4f-a52f-9c9a2417e5c5
/6128f85a-7ead-451d-8c76-c56683929d47
/e12332be-93fc-450f-b6c5-e4621c6f1549
/7c68ca91-e7c3-4697-8827-6ee871fa894f
/673b46e5-93be-4641-acbc-8b5114f0fe79
/647a1081-c3d8-4e18-a4e1-c6bbf4e674c2
/44f67c85-8ccf-4148-b5ea-35aa6d77ca14
/e7f35a65-59d4-4bc5-a9b1-4bc1ab45c1be
/8cd2ba89-932b-4c4d-b4c0-d01270d80e13
/cca65c41-bc77-4628-9c77-395700e6ce20
/832505b6-fc9e-497c-a523-f36ff5260513
/e1e24c44-fa2f-43e1-98a6-979d155d7dcb
/3f6a4426-b21b-4499-aba0-580fb044f521
/51be5884-8527-4cd6-81c5-cf9e5e0479b1
/3e6a7dce-1a65-47c3-b640-6b46770371f6
/35bc63b8-8a0a-4fdc-b9ab-d68c0d23c902
/2efda3c5-5ce7-45eb-9dbb-e7309179fbff
/9cfd1cb4-f48d-4bf4-985a-829bb2a5625d
/bfbeb5e2-0041-49cd-b57e-7ee9ce34de5e
/a5bf3926-a9

/d6b91496-f1d5-4555-a24f-550a4480171b
/3b36aa06-eb1e-4049-8d3c-7c68d95523ae
/5e7ba2ee-5ba8-4faf-9f06-f379e7cd0ecb
/a4ac7d91-8c73-449a-9824-eb4950ce839c
/ba024469-2d14-416e-83c7-86eb6da74657
/0e772e5a-c04c-458e-97ca-8f9b3b8d2717
/0f23d128-30a7-4bd1-bf67-4c380910363c
/0ad6446a-7894-433b-9a7c-d55a2698f3cb
/fcfa74bf-9a61-47c7-b5a2-b39596298eaf
/23c16314-e356-465d-b65c-8df0782ec662
/36661de6-b7d6-44a0-8c3f-867e61495965
/8e2c676d-caac-4788-852a-a3f9594341b7
/1baeef3c-4ca4-460b-9a07-2bb3cdd03aa2
/b9b25aa3-08ed-441d-a7a3-e250bef2e9b5
/cfd880a3-4ace-4098-90e5-ee9bbcbc9416
/dc2fe703-8b87-485e-a290-8242774091bf
/5a3eea93-8ec9-415b-a971-7f1c935fc324
/3a3ae0e8-7e62-477d-8872-475cf43ab139
/f835b8eb-31c0-4ec9-b802-348dab96fd45
/bb9811f2-c321-4717-87e8-f7f63bd79a19
/664885da-0d34-491c-b1cd-ec3fc9641407
/1776a735-b835-4c69-80d0-1682f35d6042
/7e9f1691-c4ec-414d-a862-02b5ebb91f29
/44093d47-4f18-4d97-afc9-7644528568db
/149d6b57-1efd-474b-b496-8a93de9bdc88
/e879de90-2506-4007-bfe9-382738fa576b
/1d3506ee-d4

/ae040555-3b66-4b39-acec-07f82a2f645b
/829a2f6b-e7ed-47a1-9430-335e4b0d775f
/32177328-f9ce-41cb-a64d-d9999024b2c8
/3e2104e3-2f07-4fac-a9cb-2d358ceb3aa3
/9b3d6169-a486-4a99-867e-db66acd1890d
/4cef0e86-8077-44e8-a200-78413517278d
/1331e9fd-a046-41fc-9bb2-4f8511c84df1
/2717c6ed-f547-4cb2-8dee-70bbf79f69b8
/a95fa02a-123f-4776-9361-a9b8d440a62a
/8eac3b20-15f9-4b69-8770-08066b80b0cb
/64f09f07-9ff2-47f0-a99c-dfe7d9d72623
/b609b5e0-0ff3-4725-a265-eebbc34d2d96
/004710be-bbfa-4222-9a26-1bdfac8ca773
/d141b8b5-4c38-4b06-8926-682ba80bf289
/0cccb451-a2d1-4bea-8039-d532387bd3c7
/093521e8-e6d5-47b9-a43e-13ad89f4a99a
/38262f0e-da7f-41b0-ae6e-84980efc8cfc
/d4ea0de2-87e6-4ec7-a1e7-185bf3b4e587
/275aa2b3-6201-48a8-93f0-5122be36bdb6
/69416ac4-967d-47bf-9165-acbf29e5318b
/a0b75aa2-0c6e-43fb-9e75-de82b5e7e231
/a9b42189-9e9e-4e46-b921-e401656d0f94
/e5f05381-1bdc-460d-82ea-cc3d39dea8a9
/41eec50c-8af2-45ee-9b45-d2ab14dac2f6
/4c76e697-4c94-45e7-974e-8479f5876328
/5c999051-532e-48dd-bd2f-90eeb999fa4b
/bc060113-a5

In [57]:
#add grayscale channel to 2D image
def grayscale_2D(image_array):
    grayscale_images = []
    for image in image_array:
        new_image = np.expand_dims(image, axis=2)
        grayscale_images.append(new_image)
    return grayscale_images

In [53]:
#add grayscale channel to 3D image
def grayscale_3D(image_array):
    grayscale_images = []
    for image in image_array:
        new_image = np.expand_dims(image, axis=3)
        grayscale_images.append(new_image)
    return grayscale_images

In [58]:
#checked and good
def upload_dicom(path):
    PathDicom = path
    lstFilesDCM = []  # create an empty list
    for dirName, subdirList, fileList in os.walk(PathDicom):
        for filename in fileList:
            if ".dcm" in filename.lower():  # check whether the file's DICOM
                lstFilesDCM.append(os.path.join(dirName,filename))
    #print(lstFilesDCM[0])   

    #make a array to store all the 3D dicom images, should be 702 long by the end of the program
    images = []

    # loop through all the DICOM files
    for filenameDCM in lstFilesDCM:   
        # read the file
        ds = pydicom.dcmread(filenameDCM)
        # store the raw image data
        pixel_data = ds.pixel_array
        #convert pixel data to numpy array for one image
        array = pixel_data 
        #append to larger list of images
        images.append(array)
    #return array with dicom files
    return images

In [28]:
#split array into 70% train and 30% test (good)
def divide_arrays(array):
    #determine length of test and train array
    length_array = len(array)
    seventy = round(length_array*0.7)
    thirty = round(length_array*0.3)
    #determine the thirty indexes that will be removed from array and placed into the test_array
    removed_index = random.sample(range(0, len(array)-1), thirty)
    #remove each image at index from array and place into test_array
    test_array = []
    train_array = []
    for index, item in enumerate(array):
        if index in removed_index: #if the index of item appears in removed
            test_array.append(item)
        else:
            train_array.append(item)
    return test_array, train_array
        
    

In [29]:
#split dicom images into slices (good)
def slice_dicom(array):
    twoD = []
    for image in array:
        slices = []
        for z in range(image.shape[0] - 1):
            slices.append(image[z,:,:])
        twoD.append(slices)     
    return twoD
              

In [30]:
#normalize images (cool and good)
def normalize(array):
    array_minmax = []
    for image in array:
        max_value, min_value = image.max(), image.min()
        minmax = (image - min_value)/(max_value - min_value)
        array_minmax.append(minmax)
    return array_minmax

In [31]:
#select slices 35 to 48 from 3D images and create a mean image (cool and good)
def mean_image(array_images):
    mean_images = []
    for image in array_images:
        mean_image = image[34]  #2D
        num_images = 1
        for z in range (35, 47): #check this index is right and doesn't need a new index
            mean_image = mean_image+image[z]
            num_images = num_images + 1
        mean_image = mean_image/num_images
        mean_images.append(mean_image)
    return mean_images

In [32]:
#show slices 30-50 in one figure; slices need to be split beforehand(cool and good) 
def plot_slices(slices):
    fig = plt.figure()
    for num,each_slice in enumerate(slices[30:50]):
        y = fig.add_subplot(4,5, num+1)
        #to resize (if necessary): new_image = cv2.resize(np.array(each_slice.pixel_array),(IMG_PX_SIZE, IMG_PX_SIZE))
        y.imshow(each_slice)  #use , cmap='gray' in imshow() to see in grayscale
    plt.show()

In [33]:
#to label images as [0,1] for parkinson's and [1,0] for control (cool and good)
def label_images(array_images, label):
    labeled_slices = []
    for image in array_images:
        if label == 1:
            labeled_slices.append([image, [0,1]])
        elif label == 0: 
            labeled_slices.append([image, [1,0]])
    return labeled_slices

In [34]:
#saves file as numpy
def save_file(image, directory):
    unique = uuid.uuid4()
    file_name = "/" + str(unique)
    print(file_name)
    np.save(directory + file_name, image)

In [35]:
def resize_2D(array_images, pixel_length, pixel_width):
    resized_images = []
    for image in array_images:
        new_img = cv2.resize(image,(pixel_length,pixel_width))
        resized_images.append(new_img)
    return resized_images


In [36]:
import skimage.transform
def resize_3D(array_images, pixel_length, pixel_width, pixel_height):
    resized_images = []
    for image in array_images:
        new_img = skimage.transform.resize(image,(pixel_length,pixel_width,pixel_height))
        resized_images.append(new_img)
    return resized_images