In [None]:
%load_ext autoreload
%autoreload 2

## Imports

In [None]:
from dmsbatch import create_batch_client, create_blob_client
import datetime

## First create a batch client from the config file

In [None]:
client = create_batch_client('../tests/data/dmsbatch.config')
blob_client = create_blob_client('../tests/data/dmsbatch.config')

## Application packages
To copy large files and programs it is best to zip (or targz) them and upload them as application packages

Application packages are setup separately in either azure management apis or from the web console or cli tool

These are referenced here by their name and version
e.g. DSM2, python and other programs

One extra field (last one) is the path within the zip file where the executables can be found. These are used later to setup the PATH varible

In [None]:
app_pkgs = [('schism', '580', 'opt/schism/5.8.0')]

In [None]:
list(set([(app,version) for app,version,_ in app_pkgs]))

### Show vms available

https://docs.microsoft.com/en-us/azure/virtual-machines/fsv2-series

In [None]:
#display(client.skus_available())

### Create or resize existing pool
If the pool doesn't exist it will create it
If the pool exists, it will resize to the second arg

#### Use a pool start task to install libraries

In [None]:
pool_start_cmds = ['printenv',
                  'rpm --import https://yum.repos.intel.com/2020/setup/RPM-GPG-KEY-intel-psxe-runtime-2020',
                  'rpm -Uhv https://yum.repos.intel.com/2020/setup/intel-psxe-runtime-2020-reposetup-1-0.noarch.rpm',
                  'yum install -y intel-ifort*-2020.2-14']
client.wrap_commands_in_shell('linux',pool_start_cmds)

#### Specify VMs and number of cores and hosts

In [None]:
vm_core_map={'standard_hc44rs':44,'standard_hb120rs_v2':120}

In [None]:
vm_size='standard_hc44rs' #'standard_hb120rs_v2',
num_cores=2*(vm_core_map[vm_size]-1) # change with the vm_size
num_hosts=2 # change with num_hosts

#### Create pool with the OS image and enable internode communication

In [None]:
pool_name='schismpool'

In [None]:
pool_created = client.create_pool(pool_name,
                                num_hosts,
                                app_packages=list(set([(app,version) for app,version,_ in app_pkgs])), 
                                vm_size= vm_size,
                                tasks_per_vm=1,
                                os_image_data=('openlogic', 'centos-hpc', '7.7'), # ? can we use our custom image
                                start_task_cmd=client.wrap_commands_in_shell('linux',pool_start_cmds),
                                start_task_admin=True,
                                elevation_level='admin',
                                enable_inter_node_communication=True,
                                wait_for_success=True
                  )

In [None]:
if not pool_created: client.resize_pool(pool_name,num_hosts)

In [None]:
client.wait_for_pool_nodes(pool_name)

### Create job on pool or fail if it exists
Jobs are containers of tasks (things that run on nodes (machines) in the pool). If this exists, the next line will fail

In [None]:
job_name='schismjobs'
client.create_job(job_name,pool_name)

### Create a task
This uses the application package as pre -set up. If not, create one https://docs.microsoft.com/en-us/azure/batch/batch-application-packages

Create task name (unique) and the command string

In [None]:
tsnow = str(datetime.datetime.now().timestamp()).split('.')[0]
task_name = f'helloschism_version_{tsnow}'
cmd_string = client.wrap_cmd_with_app_path(
    """source /opt/intel/psxe_runtime/linux/bin/psxevars.sh intel64;
source /opt/intel/psxe_runtime/linux/mpi/intel64/bin/mpivars.sh;
export LD_LIBRARY_PATH=/opt/intel/psxe_runtime_2020.2.14/linux/compiler/lib/intel64_lin:$LD_LIBRARY_PATH;
export HDF5_DIR=${AZ_BATCH_APP_PACKAGE_schism_580}/opt/hdf5/1.12.0; 
export NETCDF_C_DIR=${AZ_BATCH_APP_PACKAGE_schism_580}/opt/netcdf-c/4.7.4;
export NETCDF_FORTRAN_DIR=${AZ_BATCH_APP_PACKAGE_schism_580}/opt/netcdf-fortran/4.5.3;
export SZIP_DIR=${AZ_BATCH_APP_PACKAGE_schism_580}/opt/szip/2.1.1;
export PATH=${SZIP_DIR}/bin:${HDF5_DIR}/bin:${NETCDF_C_DIR}/bin:${NETCDF_FORTRAN_DIR}/bin:${AZ_BATCH_APP_PACKAGE_schism_580}/opt/schism/5.8.0/bin:$PATH;
export LD_LIBRARY_PATH=${SZIP_DIR}/lib:${HDF5_DIR}/lib:${NETCDF_C_DIR}/lib:${NETCDF_FORTRAN_DIR}/lib:$LD_LIBRARY_PATH;
echo $AZ_BATCH_HOST_LIST;
echo $PATH;
echo $LD_LIBRARY_PATH;
export FI_PROVIDER=mlx;
export I_MPI_FABRICS=shm:ofi;
export I_MPI_DEBUG=5;
export I_MPI_PIN_DOMAIN=numa;
cd ${AZ_BATCH_TASK_ID};
tar xvzf hello_schism-master.tar.gz;
cd hello_schism;
mkdir outputs;
"""+
f"""export NUM_CORES={num_cores};
export NUM_HOSTS={num_hosts};
"""
+
"""mpirun -n $NUM_CORES -ppn $NUM_HOSTS -hosts $AZ_BATCH_HOST_LIST ${AZ_BATCH_APP_PACKAGE_schism_580}/opt/schism/5.8.0/pschism_PREC_EVAP_GOTM_TVD-VL;
""",app_pkgs,ostype='linux')
print(task_name)
print(cmd_string)
#mpirun -n $NUM_CORES -ppn $NUM_HOSTS -hosts $AZ_BATCH_HOST_LIST ${AZ_BATCH_APP_PACKAGE_schism_580}/opt/schism/5.8.0/pschism_PREC_EVAP_GOTM_TVD-VL;
#mpirun -n $NUM_CORES -ppn $NUM_HOSTS -hosts $AZ_BATCH_HOST_LIST IMB-MPI1 pingpong;

Upload input file and get the spec reference

In [None]:
import os
local_file = '../tests/data/helloschism/hello_schism-master.tar.gz'
fname=os.path.basename(local_file)
input_file=blob_client.upload_file_to_container(job_name,f'{task_name}/{fname}',local_file,30)
input_file = client.create_input_file_spec(job_name,blob_prefix=f'{task_name}/{fname}',file_path='.')

Specify output files as spec references

In [None]:
import dmsbatch
permissions = dmsbatch.commands.azureblob.BlobPermissions.WRITE
# |helpers.azureblob.BlobPermissions.ADD|helpers.azureblob.BlobPermissions.CREATE
output_dir_sas_url = blob_client.get_container_sas_url(job_name, permissions)
print(output_dir_sas_url)

std_out_files = client.create_output_file_spec('../std*.txt', output_dir_sas_url, blob_path=f'{task_name}')
output_dir = client.create_output_file_spec('**/outputs/*',output_dir_sas_url, blob_path=f'{task_name}')

Create task with all the above information

In [None]:
schism_task = client.create_task(task_name,cmd_string,
                                 resource_files=[input_file],
                                 output_files=[std_out_files, output_dir],
                                 num_instances=num_hosts, coordination_cmdline='echo $AZ_BATCH_HOST_LIST')

### Next submit the task and wait 

In [None]:
client.submit_tasks(job_name,[schism_task])

In [None]:
client.wait_for_tasks_to_complete(job_name,timeout=datetime.timedelta(seconds=120))

## Finally resize the pool to 0 to save costs

In [None]:
client.resize_pool(pool_name,0)