In [None]:
%load_ext autoreload
%autoreload 2

## Imports

In [None]:
import os
import dmsbatch
from dmsbatch import create_batch_client, create_blob_client
import datetime
import logging
#logger = logging.getLogger()
#logger.setLevel(logging.ERROR)

## First create a batch client from the config file

The config file is described in the [README](../README.md)

In [None]:
client = create_batch_client('../tests/data/dmsbatch.config')
blob_client = create_blob_client('../tests/data/dmsbatch.config')

## Application packages
To copy large files and programs it is best to zip (or targz) them and upload them as application packages

Application packages are setup separately in either azure management apis or from the web console or cli tool

These are referenced here by their name and version
e.g. DSM2, python and other programs

One extra field (last one) is the path within the zip file where the executables can be found. These are used later to setup the PATH varible

In [None]:
app_pkgs = [('dsm2linux', '8.2.8449db2', 'DSM2-8.2.8449db2-Linux/bin')]

### Show vms available

https://docs.microsoft.com/en-us/azure/virtual-machines/fsv2-series

In [None]:
#display(client.skus_available())

### Create or resize existing pool
If the pool doesn't exist it will create it
If the pool exists, it will resize to the second arg

In [None]:
pool_name = 'pydelmodlinuxpool'
container_name='ptmbatch'

In [None]:
client.create_pool(pool_name,
                    1,
                    app_packages=[(app,version) for app,version,_ in app_pkgs], 
                    vm_size='Standard_D11_v2', #standard_f2s_v2' -- too small disk size for task
                    tasks_per_vm=1,
                    os_image_data=('openlogic', 'centos', '7_8'),
                    start_task_cmd=client.wrap_commands_in_shell('linux',['printenv']),
                    start_task_admin=True,
                    elevation_level='admin'
                    )

## Autoscaling formula for this pool

This can be added manually via the console or batch explorer in the resizing section.
```
// In this example, the pool size is adjusted based on the number of tasks in the queue. Note that both comments and line breaks are acceptable in formula strings.

// Get pending tasks for the past 15 minutes.
$samples = $ActiveTasks.GetSamplePercent(TimeInterval_Minute * 15);
// If we have fewer than 70 percent data points, we use the last sample point, otherwise we use the maximum of last sample point and the history average.
$tasks = $samples < 70 ? max(0, $ActiveTasks.GetSample(1)) : max( $ActiveTasks.GetSample(1), avg($ActiveTasks.GetSample(TimeInterval_Minute * 15)));
// If number of pending tasks is not 0, set targetVM to pending tasks, otherwise half of current dedicated nodes.
$targetVMs = $tasks > 0 ? $tasks : max(0, $TargetDedicatedNodes / 2);
// The pool size is capped at 20, if target VM value is more than that, set it to 20. This value should be adjusted according to your use case.
cappedPoolSize = 2;
$TargetLowPriorityNodes = max(0, min($targetVMs, cappedPoolSize));
// Set node deallocation mode - keep nodes active only until tasks finish
$NodeDeallocationOption = taskcompletion;
```

### Create job on pool or fail if it exists
Jobs are containers of tasks (things that run on nodes (machines) in the pool). If this exists, the next line will fail

In [None]:
UPLOAD=False
if UPLOAD:
    blob_client.upload_file_to_container(container_name,'pydelmod-linux.tar.gz','../tests/data/pydelmod-linux.tar.gz',30,max_connections=10)


In [None]:

if UPLOAD:
    local_file_script = 'd:/dev/ptm_fate_postpro/ptm_fate_postpro.py'
    blob_client.upload_file_to_container(container_name,os.path.basename(local_file_script),local_file_script,30)


In [None]:
shared_file = client.create_input_file_spec(container_name,blob_prefix='pydelmod-linux.tar.gz',file_path='.')
# don't end command with ; and don't add extra line at end 
commands = ["printenv",
    "mkdir -p ${AZ_BATCH_NODE_SHARED_DIR}/pydelmod",
    "mv pydelmod-linux.tar.gz ${AZ_BATCH_NODE_SHARED_DIR}/pydelmod",
    "cd ${AZ_BATCH_NODE_SHARED_DIR}/pydelmod",
    "tar xvzf pydelmod-linux.tar.gz",
    'echo "Done setting up pydelmod!"']
startup_task = client.create_prep_task('startup_task',commands, resource_files=[shared_file],ostype='linux')
job_name = 'pydelmodjob'
client.create_job(job_name,pool_name,prep_task=startup_task)

### Create a task
This uses the application package as pre -set up. If not, create one https://docs.microsoft.com/en-us/azure/batch/batch-application-packages

In [None]:
def create_ptm_fate_postpro_task(task_name, blob_prefix, envvars):
    input_file_script = client.create_input_file_spec(
        container_name, 'ptm_fate_postpro.py', file_path='.')
    input_file_model_output = client.create_input_file_spec(
        container_name, blob_prefix=blob_prefix, file_path='.')
    permissions = dmsbatch.commands.azureblob.BlobPermissions.WRITE
    output_dir_sas_url = blob_client.get_container_sas_url(container_name, permissions)
    std_out_files = client.create_output_file_spec(
        '../std*.txt', output_dir_sas_url, blob_path=f'{task_name}')
    output_dat_files = client.create_output_file_spec(
        '**/*.dat', output_dir_sas_url, blob_path=f'{task_name}')

    set_path_string = 'export PATH=${AZ_BATCH_NODE_SHARED_DIR}/pydelmod/bin:$PATH'
    cmd_string = client.wrap_cmd_with_app_path(
        f"""
        {set_path_string};
        mv ptm_fate_postpro.py {blob_prefix};
        cd {blob_prefix};
        python ptm_fate_postpro.py --years 1923 2015 --days 92 --months 1 2 3 4 5 6;""", app_pkgs, ostype='linux')
    # print(cmd_string)
    ptm_task = client.create_task(task_name + '_' + os.path.basename(blob_prefix), cmd_string,
                                  resource_files=[input_file_script, input_file_model_output],
                                  output_files=[
                                      std_out_files, output_dat_files],
                                  env_settings=envvars)
    return ptm_task


### Create all tasks
This function looks at the insertion location file and the simulation years and months to create an array of tasks

In [None]:
postpro_task = create_ptm_fate_postpro_task(
    'ptm_fate_postpro', 'neutrally_buoyant_particles/ex_2020/psandhu_1639109550', {})


### Next submit the task and wait 
Azure batch limits to submitting 100 tasks at a time.

In [None]:
client.submit_tasks(job_name,[postpro_task])

## Finally resize the pool to 0 to save costs

In [None]:
#client.resize_pool(pool_name,0)