## Building the application

This step follows a similar set of commands as what exists in the README. It builds the Dockerfile that exists in directory

## First login by running `az login`

In [None]:
!./build-image.sh

# Test your code

We mount our app directory on the docker container and run our `generate_ofun.R` code.  This generates our optimization function `ofun.rds` a one-time step that we don't wish to repeat in every batch task.
Then we do the same volume mount, and run our `run_optimization.R` script with `K` and `N_0` input params. This runs our objective function using `subplex` to optimize the input parameters. This is what will be run as tasks on batch, with varying input parameter spaces.

In [None]:
!docker run --rm \
    -v "$(pwd)/app:/app" \
    <azureContainerRegistryName>.azurecr.io/r-pomp:4.4.1 Rscript /app/generate_ofun.R
    
!docker run --rm \
    -v "$(pwd)/app:/app" \
    <azureContainerRegistryName>.azurecr.io/r-pomp:4.4.1 Rscript /app/run_optimization.R 1500 200

## Setup

The following four cells do the required work before we actually start interacting with Batch.
- Install the required libraries in your python environment
- Import all the required libraries. You can view those in the `requirements.txt` in this directory.
- Put the required configuration into memory, pulling any sensitive information out of environment variables
- Create the requisite client and configuration objects 

In [None]:
%pip install -r requirements.txt

In [2]:
import azure.batch as batch
from azure.storage.blob import ContainerClient
from azure.identity import  DefaultAzureCredential
from msrest.authentication import BasicTokenAuthentication
from azure.storage.blob import BlobServiceClient
from datetime import datetime

In [3]:
timestamp = datetime.now().strftime('%Y%m%d-%H%M%S')

# Batch configuration
_BATCH_ACCOUNT_URL = 'https://<batchAccountName>.<region>.batch.azure.com'
_BATCH_ACCOUNT_MANAGED_ID_RESOURCE_ID="/subscriptions/<subscriptionId>/resourcegroups/<resourceGroupName>/providers/Microsoft.ManagedIdentity/userAssignedIdentities/<batchAccountManagedIdentity>"

_JOB_ID = f'r-pomp-trajectory-mapping-{timestamp}'
_POOL_ID = 'default'

# Storage configuration
_STORAGE_ACCOUNT_NAME = '<storageAccountName>'
_CONTAINER_NAME = 'output'
_ACCOUNT_URL = f'https://{_STORAGE_ACCOUNT_NAME}.blob.core.windows.net'

# for usage with sas keys enabled and not using user managed identity
# _OUTPUT_CONTAINER_URL = f'{_ACCOUNT_URL}/{_CONTAINER_NAME}{_OUTPUT_CONTAINER_SAS}'
_OUTPUT_CONTAINER_URL = f'{_ACCOUNT_URL}/{_CONTAINER_NAME}'

# ACR configuration
_ACR_SERVER='<azureContainerRegistryName>.azurecr.io'
_IMAGE = f'{_ACR_SERVER}/r-pomp:4.4.1'

In [4]:
# credentials

default_credential = DefaultAzureCredential()
identity_reference = batch.models.ComputeNodeIdentityReference(resource_id=_BATCH_ACCOUNT_MANAGED_ID_RESOURCE_ID)

token = {'access_token': default_credential.get_token('https://batch.core.windows.net/.default').token}
batch_credentials = BasicTokenAuthentication(token)

# service clients

batch_client = batch.BatchServiceClient(batch_credentials, batch_url=_BATCH_ACCOUNT_URL)
blob_service_client = BlobServiceClient(account_url=_ACCOUNT_URL, credential=default_credential)

input_container_client = blob_service_client.get_container_client('input')
output_container_client = ContainerClient(account_url=_ACCOUNT_URL, container_name=_CONTAINER_NAME, credential=default_credential)

# configuration objects
task_registry = batch.models.ContainerRegistry(registry_server=_ACR_SERVER, identity_reference=identity_reference)
task_container_settings = batch.models.TaskContainerSettings(image_name=_IMAGE, registry=task_registry)
environment_settings = [batch.models.EnvironmentSetting(name=k, value=v) for k, v in {}.items()]
user = batch.models.UserIdentity(auto_user=batch.models.AutoUserSpecification(elevation_level=batch.models.ElevationLevel.admin, scope=batch.models.AutoUserScope.task))

# Upload the files

In [None]:
import os

def upload_input_files(file_paths):

    print(f'Found {len(file_paths)} files to upload.')

    print(f'Uploading files to container input/{_JOB_ID}...')
    for file_path in file_paths:
        print(f'Uploading {file_path}...')
        with open(file_path, 'rb') as f:
            input_container_client.upload_blob(name=f'{_JOB_ID}/{os.path.basename(file_path)}', data=f, overwrite=True)

    print('Upload complete.')

args = ['./app/ofun.rds','./app/run_optimization.R']

upload_input_files(args)

# Creating the job

The Batch job itself is relatively simple at its core. All it needs is a pool and an id. There are more things that can be configured, such as preparation and completion tasks or behavior when a task fails.

In [6]:
job = batch.models.JobAddParameter(id=_JOB_ID, pool_info=batch.models.PoolInformation(pool_id=_POOL_ID))
batch_client.job.add(job)

## Creating the tasks



In [None]:
import azure.batch as batch

# Define the parameter spaces for the three tasks
parameter_initial_values = [
    {"K": 100, "N_0": 10},
    {"K": 200, "N_0": 20},
    {"K": 300, "N_0": 30}
]

tasks = []

for i, params in enumerate(parameter_initial_values):
    task_name = f"task_{i+1}"
    output_name = f'{_JOB_ID}/{task_name}'
    command = f'/bin/bash -c "cd {_JOB_ID} && chmod +x run_optimization.R && Rscript ./run_optimization.R  {params["K"]} {params["N_0"]}"'

    task = batch.models.TaskAddParameter(
        id=task_name,
        command_line=command,
        container_settings=task_container_settings,
        environment_settings=environment_settings,
        user_identity=user,
        resource_files=[
            batch.models.ResourceFile(
                auto_storage_container_name='input',
                blob_prefix=f'{_JOB_ID}/'
            )
        ],
        output_files=[
            batch.models.OutputFile(
                file_pattern='../std*.txt',
                destination=batch.models.OutputFileDestination(
                    container=batch.models.OutputFileBlobContainerDestination(
                        path=f'{output_name}/logs',
                        container_url=f'{_ACCOUNT_URL}/output',
                        identity_reference=identity_reference,
                        upload_headers=[batch.models.HttpHeader(name="Metadata", value="true")]
                    )
                ),
                upload_options=batch.models.OutputFileUploadOptions(
                    upload_condition=batch.models.OutputFileUploadCondition.task_completion)
            ),
            batch.models.OutputFile(
                file_pattern=f'./{_JOB_ID}/**/*',
                destination=batch.models.OutputFileDestination(
                    container=batch.models.OutputFileBlobContainerDestination(
                        path=f'{output_name}/data',
                        container_url=f'{_ACCOUNT_URL}/output',
                        identity_reference=identity_reference,
                        upload_headers=[batch.models.HttpHeader(name="Metadata", value="true")]
                    )
                ),
                upload_options=batch.models.OutputFileUploadOptions(
                    upload_condition=batch.models.OutputFileUploadCondition.task_success)
            )
        ]
    )
    
    tasks.append(task)

print(f'Firing off {len(tasks)} tasks!')

result = batch_client.task.add_collection(_JOB_ID, tasks)

batch_client.job.patch(_JOB_ID, batch.models.JobPatchParameter(on_all_tasks_complete=batch.models.OnAllTasksComplete.terminate_job))


## Creating an artifact

Now that our simulation tasks have all finished, we have a bunch of disparate files in Azure Storage. These steps download each of the files within the container path, load them into a MultiSim object that exists on this notebook host, and finally creates a plot out of the aggregated simulations.

In [None]:
# Get the list of simulations that completed
simulations = [b.name for b in output_container_client.list_blobs()]

# Filter and print only those simulations in the specified folder
for s in simulations:
    if s.startswith(_JOB_ID):
        print(s)

In [None]:

# Get the list of .RData files in the specified folder
rdata_files = [b.name for b in output_container_client.list_blobs() if b.name.startswith(_JOB_ID) and b.name.endswith('.RData')]

# Download the .RData files
if not os.path.exists('downloaded_data'):
    os.makedirs('downloaded_data')

for rdata_file in rdata_files:
    blob_client = blob_service_client.get_blob_client(container=_CONTAINER_NAME, blob=rdata_file)
    file_name = os.path.join('downloaded_data', os.path.basename(rdata_file))
    with open(file_name, "wb") as download_file:
        download_file.write(blob_client.download_blob().readall())

print(f"Downloaded {len(rdata_files)} .RData files to 'downloaded_data' directory.")

# Switch to R kernel

Switch your jupyter notebook to an R kernel

In [None]:
install.packages('pomp')
install.packages('ggplot2')

In [None]:
library(ggplot2)

# Directory where the .RData files are stored
data_dir <- "downloaded_data"

# List of RData files with full paths
rdata_files <- list.files(data_dir, pattern = "fit_results_.*\\.RData", full.names = TRUE)

# Initialize an empty data frame to store the results
results_df <- data.frame(neg_log_likelihood = numeric(), K = numeric(), N_0 = numeric(), file = character())

# Loop through the RData files to extract and store the final K and N_0
for (file in rdata_files) {
  # Load the fit object from the RData file
  load(file)
  
  # Extract the final optimized K and N_0
  final_K <- fit$par["K"]
  final_N_0 <- fit$par["N_0"]
  neg_log_likelihood <- fit$value

  # Append the results to the data frame
  results_df <- rbind(results_df, data.frame(neg_log_likelihood = neg_log_likelihood, K = final_K, N_0 = final_N_0, file = basename(file)))
}
rownames(results_df) <- NULL

results_df