# Converting an HDF5 Dataset to a Cloud Volume Image Layer

This module demonstrates how to convert a monolothic 3D HDF5 dataset into a CloudVolume image layer. The dataset is assumed to be in a single .h5 file and 

In [None]:
# We need to add balsam and the modules it depends on to the Python search paths. 
import sys
sys.path.insert(0,'/soft/datascience/Balsam/0.3.5.1/env/lib/python3.6/site-packages/')
sys.path.insert(0,'/soft/datascience/Balsam/0.3.5.1/')

# We also need balsam and postgresql to be in the path. (Misha suggests this may not be necessary)
import os
os.environ['PATH'] ='/soft/datascience/Balsam/0.3.5.1/env/bin/:' + os.environ['PATH']
os.environ['PATH'] +=':/soft/datascience/PostgreSQL/9.6.12/bin/'
try:
    import balsam
except ImportError:
    print('Cannot find balsam, make sure balsam is installed or it is available in Python search paths')
    
os.environ["BALSAM_DB_PATH"]='/lus/theta-fs0/projects/connectomics_aesp/balsam_database/'

from balsam_helper import *

# Import widgets
from ipywidgets import interact, interactive
from ipywidgets import fixed, interact_manual 
from ipywidgets import Textarea, widgets, Layout, Accordion
from ipywidgets import VBox, HBox, Box, Text, BoundedIntText

env_preamble = '/lus/theta-fs0/projects/connectomics_aesp/software/HappyNeuron/macros_theta/theta_build_preamble.sh'

## Set up the Application

Before submitting HDF5 conversion jobs, make sure there is a Balsam application available to you. Here, we set up a HappyNeuron application and h52cv workflow. The application will run the h52cv executable installed with HappyNeuron.

In [None]:
add_app(
    'HappyNeuron_h52cv',
    'python /lus/theta-fs0/projects/software/HappyNeuron/happyneuron/io/hdf5_to_cloudvolume.py',  # 'hdf5_to_cloudvolume',
    description='Convert images to a CloudVolume layer.',
    envscript=env_preamble
)

## Set up the Workflow and Job

This is where job parameters will be set and added to a workflow in the application.

In [None]:
img_path = '/lus/theta-fs0/projects/connectomics_aesp/pipeline_data/h52cv/rafcube.h5'
cv_path = '/lus/theta-fs0/projects/connectomics_aesp/pipeline_data/h52cv/precomputed/image'
key = 'image'

args = f'--input {img_path} --output {cv_path} --key {key}'

job_id = add_job(
    'convert_rafcube',  # Job Name
    'h52cv_rafcube',   # Workflow Name
    'HappyNeuron_h52cv',
    description='Convert the rafcube monolithic h5 to a CloudVolume image layer.',
    args=args,
    num_nodes=2,
    ranks_per_node=4
)

## Run the Conversion

In [None]:
submit(project='connectomics_aesp',
   queue='debug-cache-quad',
   nodes=2,
   wall_minutes=20,
   wf_filter='h52cv_rafcube'
)

In [None]:
def get_job_info(job_id='',show_output=False):
    """
    Prints verbose job info for a given job id.
    Parameters
    ----------
    job_id: str, Partial or full Balsam job id.
    """
    from balsam.launcher.dag import BalsamJob as Job
    jobs = Job.objects.all().filter(job_id__contains=job_id)
    if len(jobs) == 1:
        thejob = jobs[0]
        print(jobs[0])
        if show_output:
            output = f'{thejob.working_directory}/{thejob.name}.out'
            with open(output) as f:
                out = f.read()
            print(f'Output file {output} content:')
            print(out)
    elif len(jobs) == 0:
        print('No matching jobs')
    else:
        print(f'{len(jobs)} jobs matched, enter full id.')
        
get_job_info(job_id=job_id, show_output=True)