# Data curation

### Import some necessary functions

In [1]:
import os, pydicom, sys
sys.path.append('..')
import numpy as np
import SimpleITK as sitk
from Distribute_Patients import Separate_files
from Dicom_RT_and_Images_to_Mask.Image_Array_And_Mask_From_Dicom_RT import Dicom_to_Imagestack
from Plot_ScrollBar_Images import plot_Image_Scroll_Bar_Image
from Make_Single_Images.Make_Single_Images_Class import run_main

## Finding the Data

### Find where we put our data

In [2]:
data_path = os.path.join('..','Data','Whole_Patients')
print('We have ' + str(len(os.listdir(data_path))) + ' patients!')

We have 20 patients!


## Ensuring contour fidelity...

### Note that we've set 'get_images_mask' to False, this means we won't be getting any of the image data, just looking at the dicom RT files

In [3]:
Dicom_Reader = Dicom_to_Imagestack(get_images_mask=False)

In [4]:
Dicom_Reader.down_folder(data_path)

### What ROI names do we have?

#### This will tell us all the unique roi names, hence all_rois

In [5]:
for roi in Dicom_Reader.all_rois:
    print(roi)

Liver
bma_liver
best_liver
tried_liver
Liver_BMA_Program_4


## Make contour associations

#### We have quite a few contour names here.. now, we can either change the ROI names in the RT files, or make an associations file

#### The associations file associates a contour name with another one {'Current contour':'Desired name'}

In [6]:
associations = {'Liver_BMA_Program_4':'Liver',
                'bma_liver':'Liver',
                'best_liver':'Liver',
                'tried_liver':'Liver'}

### Tell the Dicom_Reader that we want to set the associations, get the images and mask for contour 'Liver'

In [18]:
Dicom_Reader.set_associations(associations)
Dicom_Reader.set_get_images_and_mask(True)
Dicom_Reader.set_contour_names(['liver'])

### Re-write RTs
#### This is commented out, because if I run it, then the example above won't show any different contour names

In [19]:
# Dicom_Reader.associations = associations
# for RT in Dicom_Reader.all_RTs:
#     Dicom_Reader.rewrite_RT(RT)

## Pulling images and mask

### We'll first do this with one patient

In [20]:
patient_data_path = os.path.join(data_path,'ABD_LYMPH_036')
Dicom_Reader.Make_Contour_From_directory(patient_data_path)
print('Done!')

Done!


## View images

In [21]:
%matplotlib inline

### The images and mask are saved within the Dicom_Reader class, so we just have to load them

In [22]:
Images = Dicom_Reader.ArrayDicom
mask = Dicom_Reader.mask # This is the mask

#### Threshold

In [12]:
Images[Images<-200] = -200
Images[Images>200] = 200

In [16]:
plot_Image_Scroll_Bar_Image(Images)

interactive(children=(IntSlider(value=0, description='Z', max=160), Text(value='2D', description='view'), Outp…

In [14]:
Images[mask==1] += 300

## Recap

### Checking ROI contour names and making associations

### Loading in image and mask from desired contour name

### Viewing images and mask

## Separate into Train/Test/Validation

### This is also important, but I would recommend using the 'Parallel' approach available in https://github.com/brianmanderson/Dicom_Data_to_Numpy_Arrays
### For ease, this has already been done for you

In [28]:
def write_data(data_path, out_path, Dicom_Reader,desc = 'TCIA_Liver_Patients'):
    if not os.path.exists(out_path):
        os.makedirs(out_path)
    print(out_path)
    Dicom_Reader.set_description(desc)
    iteration = 0
    for patient in os.listdir(data_path):
        print(patient)
        patient_data_path = os.path.join(data_path,patient)
        out_file = os.path.join(patient_data_path, desc + '_Iteration_' + str(iteration) + '.txt')
        if not os.path.exists(out_file):
            Dicom_Reader.Make_Contour_From_directory(patient_data_path)
            Dicom_Reader.set_iteration(iteration)
            Dicom_Reader.write_images_annotations(out_path)
        iteration += 1
    return None

In [29]:
output_path = data_path.replace('Whole_Patients','Niftii_Arrays')
#write_data(data_path,output_path, Dicom_Reader)
#Separate_files(output_path) # Separate into a Training/Validation/Test set
#run_main(output_path,extension=5)

..\..\Data\Niftii_Arrays
ABD_LYMPH_007
ABD_LYMPH_016
ABD_LYMPH_019
ABD_LYMPH_028
ABD_LYMPH_033
ABD_LYMPH_036
ABD_LYMPH_069
MED_LYMPH_002
MED_LYMPH_017
MED_LYMPH_024
MED_LYMPH_029
MED_LYMPH_030
MED_LYMPH_044
MED_LYMPH_055
MED_LYMPH_063
MED_LYMPH_064
MED_LYMPH_067
MED_LYMPH_069
MED_LYMPH_073
MED_LYMPH_075
This is running on 17 threads
Overall_mask_TCIA_Liver_Patients_y0.nii.gz
7.142857142857142
Overall_mask_TCIA_Liver_Patients_y1.nii.gz
14.285714285714285
Overall_mask_TCIA_Liver_Patients_y10.nii.gz
21.428571428571427
Overall_mask_TCIA_Liver_Patients_y11.nii.gz
28.57142857142857
Overall_mask_TCIA_Liver_Patients_y12.nii.gz
35.714285714285715
Overall_mask_TCIA_Liver_Patients_y13.nii.gz
42.857142857142854
Overall_mask_TCIA_Liver_Patients_y14.nii.gz
50.0
Overall_mask_TCIA_Liver_Patients_y15.nii.gz
57.14285714285714
Overall_mask_TCIA_Liver_Patients_y16.nii.gz
64.28571428571429
Overall_mask_TCIA_Liver_Patients_y17.nii.gz
71.42857142857143
Overall_mask_TCIA_Liver_Patients_y18.nii.gz
78.571428571