# Data curation

### Import some necessary functions

In [1]:
import os
def return_sys_path():
    path = '.'
    for _ in range(5):
        if 'Pre_Processing' in os.listdir(path):
            break
        else:
            path = os.path.join(path,'..')
    return path
def return_data_path():
    path = '.'
    for _ in range(5):
        if 'Data' in os.listdir(path):
            break
        else:
            path = os.path.join(path,'..')
    return path

In [2]:
import sys
sys.path.append(return_sys_path())
from Pre_Processing.Distribute_Patients import Separate_files
from Pre_Processing.Dicom_RT_and_Images_to_Mask.Image_Array_And_Mask_From_Dicom_RT import Dicom_to_Imagestack
from Pre_Processing.Dicom_RT_and_Images_to_Mask.Plot_And_Scroll_Images.Plot_Scroll_Images import plot_Image_Scroll_Bar_Image

## Finding the Data

### Find where we put our data

In [4]:
data_path = os.path.join(return_data_path(),'Data','Whole_Patients')
print('We have ' + str(len(os.listdir(data_path))) + ' patients!')

We have 20 patients!


## Ensuring contour fidelity...

### Note that we've set 'get_images_mask' to False, this means we won't be getting any of the image data, just looking at the dicom RT files

In [5]:
Dicom_Reader = Dicom_to_Imagestack(get_images_mask=False)

In [6]:
Dicom_Reader.down_folder(data_path)

### What ROI names do we have?

#### This will tell us all the unique roi names, hence all_rois

In [6]:
for roi in Dicom_Reader.all_rois:
    print(roi)

bma_liver
Liver_BMA_Program_4
tried_liver
best_liver
Liver


## Make contour associations

#### We have quite a few contour names here.. now, we can either change the ROI names in the RT files, or make an associations file

#### The associations file associates a contour name with another one {'Current contour':'Desired name'}

In [7]:
associations = {'Liver_BMA_Program_4':'Liver',
                'bma_liver':'Liver',
                'best_liver':'Liver',
                'tried_liver':'Liver'}

### Tell the Dicom_Reader that we want to set the associations, get the images and mask for contour 'Liver'

In [8]:
Dicom_Reader.set_associations(associations)
Dicom_Reader.set_get_images_and_mask(True)
Dicom_Reader.set_contour_names(['liver'])

### Re-write RTs
#### This is commented out, because if I run it, then the example above won't show any different contour names

In [None]:
# Dicom_Reader.associations = associations
# for RT in Dicom_Reader.all_RTs:
#     Dicom_Reader.rewrite_RT(RT)

## Pulling images and mask

### We'll first do this with one patient

In [9]:
patient_data_path = os.path.join(data_path,'ABD_LYMPH_036')
Dicom_Reader.Make_Contour_From_directory(patient_data_path)
print('Done!')

Done!


## View images

In [None]:
%matplotlib inline

### The images and mask are saved within the Dicom_Reader class, so we just have to load them

In [None]:
Images = Dicom_Reader.ArrayDicom
mask = Dicom_Reader.mask # This is the mask

#### Threshold

In [None]:
Images[Images<-200] = -200
Images[Images>200] = 200

In [None]:
plot_Image_Scroll_Bar_Image(Images)

In [None]:
Images[mask==1] += 300

## Recap

### Checking ROI contour names and making associations

### Loading in image and mask from desired contour name

### Viewing images and mask

## Separate into Train/Test/Validation

### This is also important, but I would recommend using the .write_parallel function
### This will create the niftii files and randomly separate them into Train/Validation/Test folders

In [9]:
desc = 'TCIA_Liver_Patients'
output_path = data_path.replace('Whole_Patients','Niftii_Arrays')
Dicom_Reader.set_get_images_and_mask(False)
Dicom_Reader.set_description(desc)
Dicom_Reader.down_folder(data_path,reset=True)
# Dicom_Reader.write_parallel(output_path,excel_file=os.path.join('.','patient_dist.xlsx'), thread_count=10)
# Separate_files(input_path=os.path.join(output_path,desc), out_path=output_path)

.\..\..\Data\Whole_Patients\ABD_LYMPH_007.\..\..\Data\Whole_Patients\ABD_LYMPH_016
.\..\..\Data\Whole_Patients\ABD_LYMPH_019
.\..\..\Data\Whole_Patients\ABD_LYMPH_028

.\..\..\Data\Whole_Patients\ABD_LYMPH_033.\..\..\Data\Whole_Patients\ABD_LYMPH_036

.\..\..\Data\Whole_Patients\ABD_LYMPH_069.\..\..\Data\Whole_Patients\MED_LYMPH_002

.\..\..\Data\Whole_Patients\MED_LYMPH_017.\..\..\Data\Whole_Patients\MED_LYMPH_024

.\..\..\Data\Whole_Patients\MED_LYMPH_029
.\..\..\Data\Whole_Patients\MED_LYMPH_030
.\..\..\Data\Whole_Patients\MED_LYMPH_044
.\..\..\Data\Whole_Patients\MED_LYMPH_055
.\..\..\Data\Whole_Patients\MED_LYMPH_063
.\..\..\Data\Whole_Patients\MED_LYMPH_064
.\..\..\Data\Whole_Patients\MED_LYMPH_067
.\..\..\Data\Whole_Patients\MED_LYMPH_069
.\..\..\Data\Whole_Patients\MED_LYMPH_073.\..\..\Data\Whole_Patients\MED_LYMPH_075



<Pre_Processing.Distribute_Patients.Separate_files at 0x2385d1907b8>

## Turn into TFRecords

In [10]:
from Pre_Processing.Make_Single_Images.Make_TFRecord_Class import write_tf_record
from Pre_Processing.Make_Single_Images.Image_Processors_Module.Image_Processors_TFRecord import *
image_processors = [To_Categorical(),
                    Cast_Data({"image":"float16",'annotation':'int8'}),
                    Distribute_into_2D()]
path = data_path.replace('Whole_Patients','Niftii_Arrays')
# write_tf_record(os.path.join(path, 'Train'), out_path=os.path.join(path,'Records','Train'), image_processors=image_processors,
#                 is_3D=False, verbose=False, thread_count=14)
image_processors = [To_Categorical(),
                    Cast_Data({"image":"float16",'annotation':'int8'}),
                    Distribute_into_3D()]
# write_tf_record(os.path.join(path, 'Validation'), out_path=os.path.join(path,'Records','Validation'), image_processors=image_processors,
#                 is_3D=True, thread_count=3)
image_processors = [To_Categorical(),
                    Cast_Data({"image":"float16",'annotation':'int8'}),
                    Distribute_into_3D()]
# write_tf_record(os.path.join(path, 'Test'), out_path=os.path.join(path,'Records','Test'), image_processors=image_processors,
#                 is_3D=True, thread_count=3)

.\..\..\Data\Niftii_Arrays\Train\Overall_Data_TCIA_Liver_Patients_0.nii.gz
.\..\..\Data\Niftii_Arrays\Train\Overall_Data_TCIA_Liver_Patients_1.nii.gz
.\..\..\Data\Niftii_Arrays\Train\Overall_Data_TCIA_Liver_Patients_10.nii.gz
.\..\..\Data\Niftii_Arrays\Train\Overall_Data_TCIA_Liver_Patients_11.nii.gz
.\..\..\Data\Niftii_Arrays\Train\Overall_Data_TCIA_Liver_Patients_12.nii.gz
.\..\..\Data\Niftii_Arrays\Train\Overall_Data_TCIA_Liver_Patients_14.nii.gz
.\..\..\Data\Niftii_Arrays\Train\Overall_Data_TCIA_Liver_Patients_15.nii.gz
.\..\..\Data\Niftii_Arrays\Train\Overall_Data_TCIA_Liver_Patients_18.nii.gz
.\..\..\Data\Niftii_Arrays\Train\Overall_Data_TCIA_Liver_Patients_19.nii.gz
.\..\..\Data\Niftii_Arrays\Train\Overall_Data_TCIA_Liver_Patients_2.nii.gz
.\..\..\Data\Niftii_Arrays\Train\Overall_Data_TCIA_Liver_Patients_3.nii.gz
.\..\..\Data\Niftii_Arrays\Train\Overall_Data_TCIA_Liver_Patients_4.nii.gz
.\..\..\Data\Niftii_Arrays\Train\Overall_Data_TCIA_Liver_Patients_6.nii.gz
.\..\..\Data\Nifti