# PyESAPI: Data Mining
*By Michael.Folkerts@varian.com*

In [25]:
import pyesapi
import atexit
app = pyesapi.CustomScriptExecutable.CreateApplication('python_demo')
atexit.register(app.Dispose);

## Extracting Structure Data with Pandas
* Open a patient
* Use pandas dataframe to display basic structure info
* Save to file
* **Extra Credit:** Loop through patients and save to sqlite database, query all PTV volumes

In [26]:
patient = app.OpenPatientById('TestUpperBody')

In [27]:
import pandas as pd

structure_dataframe = pd.DataFrame(
    [(s.Id, s.Name, s.DicomType, s.Volume, s.IsHighResolution) for s in patient.StructureSetsLot(0).Structures],
    columns = ('Id', 'Name', 'Dicom Type', 'Volume', 'Is High Res')
)

print("Structures:\n", structure_dataframe)

Structures:
            Id        Name Dicom Type       Volume  Is High Res
0       Heart       Heart        PTV    66.711074        False
1        BODY        BODY   EXTERNAL  9515.747656        False
2      R Lung      R Lung  AVOIDANCE   939.143211        False
3      L Lung      L Lung  AVOIDANCE   760.367520        False
4        Cord        Cord      ORGAN    39.258909        False
5  Total Lung  Total Lung  AVOIDANCE  1705.624076        False
6    Lg Tumor    Lg Tumor        PTV   825.509991        False


In [28]:
with open('./StructureData.csv','w') as f:
    f.write(structure_dataframe.to_csv())

## Saving DVH to HDF5 Using [H5Py](http://www.h5py.org)
*HDF5 lets you store huge amounts of numerical data, and easily manipulate that data from NumPy.*

Groups = Folders, Datasets = Files

* Create HDF5 file
* Create group for patient data in H5 file
* Get DVH data for a structure and save as dataset
* loop over patients DVH and save data to HDF5 file

In [66]:
import h5py
# we will reuse this code block to reset the file
try:
    h5file.close()
except NameError:
    pass
finally:
    h5file = h5py.File("DVH.h5",'w')  # truncate if exists


In [67]:
patient_group = h5file.create_group(f'patients/{patient.Id}')

In [68]:
for label in h5file:
    print(label)

patients


In [69]:
# works like dictionary
for label in h5file['patients']:
    print(label)

TestUpperBody


In [70]:
def ls(location,depth=0):
    for key,value in location.items():
        print(' ' * depth + ('\u21b3 ' if depth else '') + key)  # unicode for '↳'
        ls(value,depth+1)

In [71]:
ls(h5file)

patients
 ↳ TestUpperBody


In [72]:
dvh_group = patient_group.create_group('structures/{}')

In [73]:
ls(h5file)

patients
 ↳ TestUpperBody
  ↳ structures
   ↳ {}


In [None]:
dvh = plan.GetDVHCumulativeData(
    structure,
    pyesapi.DoseValuePresentation.Relative,
    pyesapi.VolumePresentation.Relative,
    .01
)
dose_x = [p.DoseValue.Dose for p in dvh.CurveData]
volume_y = [p.Volume for p in dvh.CurveData]