In [None]:
%load_ext autoreload
%autoreload 2

## Imports

In [None]:
from dmsbatch import create_batch_client, create_blob_client
import datetime
import logging
#logger = logging.getLogger()
#logger.setLevel(logging.ERROR)

## First create a batch client from the config file

The config file is described in the [README](../README.md)

In [None]:
client = create_batch_client('../tests/data/dmsbatch.config')
blob_client = create_blob_client('../tests/data/dmsbatch.config')

## Application packages
To copy large files and programs it is best to zip (or targz) them and upload them as application packages

Application packages are setup separately in either azure management apis or from the web console or cli tool

These are referenced here by their name and version
e.g. DSM2, python and other programs

One extra field (last one) is the path within the zip file where the executables can be found. These are used later to setup the PATH varible

In [None]:
app_pkgs = [('dsm2linux', '8.2.8449db2', 'DSM2-8.2.8449db2-Linux/bin')]

### Show vms available

https://docs.microsoft.com/en-us/azure/virtual-machines/fsv2-series

In [None]:
#display(client.skus_available())

### Create or resize existing pool
If the pool doesn't exist it will create it
If the pool exists, it will resize to the second arg

In [None]:
pool_name = 'ptmlinuxpool'

In [None]:
pool_start_cmds = ['printenv',
'yum install -y glibc.i686 libstdc++.i686 glibc.x86_64 libstdc++.x86_64',# --setopt=protected_multilib=false',
'yum-config-manager --add-repo https://yum.repos.intel.com/2019/setup/intel-psxe-runtime-2019.repo',
'rpm --import https://yum.repos.intel.com/2019/setup/RPM-GPG-KEY-intel-psxe-runtime-2019',
'yum install -y intel-icc-runtime-32bit intel-ifort-runtime-32bit']
client.wrap_commands_in_shell('linux',pool_start_cmds)

In [None]:
client.create_pool(pool_name,
                    1,
                    app_packages=[(app,version) for app,version,_ in app_pkgs], 
                    vm_size='standard_f32s_v2', 
                    tasks_per_vm=32,
                    os_image_data=('openlogic', 'centos', '7_8'),
                    start_task_cmd=client.wrap_commands_in_shell('linux',pool_start_cmds),
                    start_task_admin=True,
                    elevation_level='admin'
                    )

### Create job on pool or fail if it exists
Jobs are containers of tasks (things that run on nodes (machines) in the pool). If this exists, the next line will fail

In [None]:
# step is only needed as these files were upload to amazon s3. Otherwise only upload (next step) to Azure is needed
# downloading from aws takes 2 - 3 mins
#!aws s3 cp s3://ca.dwr.dms.dsm2/release82/ptm_runs/input/DCP_EX_EX.h5 .

In [None]:
# slow - 9 mins so use max_connections > 2 (default). Using 12 which seems to be a good fit here
#blob_client.upload_file_to_container('ptmnbjob','DCP_EX.h5','./DCP_EX_EX.h5',max_connections=12)
# much faster - 3 mins upload time
#azcopy.exe copy "D:\dev\azure_dms_batch\notebooks\DCP_EX_EX.h5" "https://dwrmodelingstore.blob.core.windows.net/ptmnbjob/DCP_EX_EX.h5?sv=2020-08-04&se=2021-12-26T04%3A22%3A42Z&sr=c&sp=rwl&sig=OCNOFWYMRpJ2lV9w7iAVzAZNttZXX9eDkLqoHyn7qXY%3D" --overwrite=prompt --from-to=LocalBlob --blob-type Detect --follow-symlinks --put-md5 --follow-symlinks --recursive --trusted-microsoft-suffixes= --log-level=INFO;


In [None]:
#TODO: client.delete_job

In [None]:
#input_tidefile = client.create_input_file_spec('ptmnbjob',blob_prefix='DCP_EX.h5',file_path='.')
copy_tidefile_task = client.create_task_copy_file_to_shared_dir('ptmnbjob','DCP_EX.h5',file_path='.',ostype='linux')
client.create_job('ptmlinuxnbjob',pool_name,prep_task=copy_tidefile_task)

In [None]:
#local_dir = 'd:/dev/ptm_batch/neutrally_bouyant_particles/ex'
#input_file=blob_client.zip_and_upload('ptmnbjob',None,local_dir,30)

In [None]:
input_file = client.create_input_file_spec('ptmnbjob',blob_prefix='ex.zip',file_path='.')

In [None]:
import dmsbatch
permissions = dmsbatch.commands.azureblob.BlobPermissions.WRITE
# |helpers.azureblob.BlobPermissions.ADD|helpers.azureblob.BlobPermissions.CREATE
output_dir_sas_url = blob_client.get_container_sas_url('ptmnbjob', permissions)
print(output_dir_sas_url)

### Create a task
This uses the application package as pre -set up. If not, create one https://docs.microsoft.com/en-us/azure/batch/batch-application-packages

In [None]:
def submit_ptm_single_task(task_name, envvars):
    std_out_files = client.create_output_file_spec(
        '../std*.txt', output_dir_sas_url, blob_path=f'{task_name}')
    output_dir = client.create_output_file_spec(
        '**/output/*', output_dir_sas_url, blob_path=f'{task_name}')
    set_path_string = client.set_path_to_apps(app_pkgs, ostype='linux')
    cmd_string = client.wrap_cmd_with_app_path(
        f"""
        source /opt/intel/psxe_runtime/linux/bin/compilervars.sh ia32;
        {set_path_string};
        unzip ex.zip; 
        rm *.zip; 
        export TIDEFILE_LOC=$AZ_BATCH_NODE_SHARED_DIR; 
        cd studies; 
        ptm planning_ptm.inp; 
        cd output; 
        rm trace.out;
        """, app_pkgs,ostype='linux')
    #print(cmd_string)
    ptm_task = client.create_task(task_name, cmd_string,
                                  resource_files=[input_file],
                                  output_files=[
                                      std_out_files, output_dir],
                                  env_settings=envvars)
    return ptm_task

### Create all tasks
This function looks at the insertion location file and the simulation years and months to create an array of tasks

In [None]:
import csv
import logging
def create_tasks(insertion_file='run_number_loc.txt',
               simulation_start_year=1923,
               simulation_end_year=2015,
               simulation_start_day=1,
               simulation_month=[1, 2, 3, 4, 5, 6],
               simulation_days=92,
               duration='1485minutes',
               delay='0day',
               study_name='ex',
               study_folder='neutrally_bouyant_particles',
               setup_tidefile=False):
    tasks = []
    with open(insertion_file, 'r') as input:
        for row in csv.DictReader(input):  # run#,particle#,node
            run_no = row['run#']
            particle_no = row['particle#']
            insertion_node = row['node']
            job_name_prefix = 'ptm-%s-%s-%s' % (
                study_folder[0:5], study_name, run_no)
            #
            sim_days = datetime.timedelta(days=simulation_days)
            for y in range(simulation_start_year, simulation_end_year+1):
                for m in simulation_month:
                    s_day = datetime.date(y, m, simulation_start_day)
                    e_day = s_day + sim_days
                    ptm_start_date = s_day.strftime("%d%b%Y")
                    ptm_end_date = e_day.strftime("%d%b%Y")
                    particle_insertion_row = '%s %s %s %s' % (
                        insertion_node, particle_no, delay, duration)
                    envvars = {'RUN_NO': '%s' % run_no,
                               'PTM_START_DATE': '%s' % ptm_start_date,
                               'PTM_END_DATE': '%s' % ptm_end_date,
                               'PARTICLE_INSERTION_ROW': '%s' % particle_insertion_row,
                               'DSM2_STUDY_NAME': 'DCP_%s_%sP' % (study_name, study_folder[0:1])
                               }
                    task = submit_ptm_single_task(
                        job_name_prefix+'-'+ptm_start_date, envvars)
                    tasks.append(task)
    logging.info('All done!')
    return tasks

In [None]:
tasks = create_tasks(insertion_file='d:/dev/ptm_batch/run_number_loc.txt',simulation_start_year=1923,simulation_end_year=2015)

In [None]:
len(tasks)

### Next submit the task and wait 
Azure batch limits to submitting 100 tasks at a time.

In [None]:
for i in range(0,round(len(tasks)/100)):
    client.submit_tasks('ptmlinuxnbjob',tasks[i*100:i*100+100])

## Finally resize the pool to 0 to save costs

In [None]:
#client.resize_pool(pool_name,0)