# Deploy a model on WMLA

# 1. Setup and prepare working directory

In [None]:
import os
import json
import shutil
import subprocess

import requests
import urllib3

import wml_sdk_utils as wml_util
import storage_volume_utils as sv
import wml_sdk_utils as wsdk
import wmla_edi_utils as edi
import wmla_utils as wmla_util

urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)

In [None]:
# Supplied as environment variables in cli_mlops.py or A_MLOps_Pipeline.ipynb
WML_SPACE_ID = os.getenv('WML_SPACE_ID')
MODEL_ASSET_ID = os.getenv('MODEL_ASSET_ID')
REST_SERVER = os.getenv('REST_SERVER')
DLIM_PATH = os.getenv('DLIM_PATH')
KERNEL_FILENAME = os.getenv('KERNEL_FILENAME','kernel.py')
CUSTOM_ARG = os.getenv('CUSTOM_ARG')

In [None]:
# Initiate WML client
wml_client = wsdk.get_client(space_id=WML_SPACE_ID)
metadata_deployment = wml_util.metadata_yml_load(wml_client,'deployment')[MODEL_ASSET_ID]

In [None]:
# From config yaml file
WML_SPACE_MODEL = metadata_deployment['model_asset']
DEPLOY_NAME = metadata_deployment['wmla_deployment']['deployment_name']
DEPLOY_DEPENDENCY_FILE = metadata_deployment['wmla_deployment']['dependency_filename']
VOLUME_DISPLAY_NAME = metadata_deployment['wmla_deployment']['volume_display_name']
DEPLOYMENT_URL = metadata_deployment['wmla_deployment']['deployment_url']

resource_configs = metadata_deployment['wmla_deployment']['resource_configs']
KERNEL_MIN = resource_configs.get('kernel_min',1)
KERNEL_MAX = resource_configs.get('kernel_max',100)
KERNEL_DELAY_RELEASE_TIME = resource_configs.get('kernel_delay_release_time',60)
TASK_EXECUTION_TIMEOUT = resource_configs.get('task_execution_timeout',60)

ENABLE_GPUS = bool(resource_configs['enable_gpus'])
NCPUS = resource_configs.get('n_cpus',8)
MEM = resource_configs.get('memory_allocation',1000)
RESOURCES = f"ncpus={NCPUS},ncpus_limit={NCPUS},mem={MEM},mem_limit={MEM}"

In [None]:
# Prepare additional dependencies
DIR_DEPLOY_SUBMISSION = f'/userfs/deploy_submissions/{DEPLOY_NAME}'
os.environ['DIR_DEPLOY_SUBMISSION'] = DIR_DEPLOY_SUBMISSION
os.environ['DEPLOY_NAME'] = DEPLOY_NAME
    
# Add to path
if not DLIM_PATH in os.environ['PATH']:
    os.environ['PATH'] = os.environ['PATH'] + f':{DLIM_PATH}'

## 1.1 Save misc. model files to working directory

Deployments are submitted using the `dlim` CLI tool, which requires all deployment dependencies be collected in a job submission folder.

In [None]:
!rm -rf $DIR_DEPLOY_SUBMISSION
!mkdir -p $DIR_DEPLOY_SUBMISSION

In [None]:
general_dependencies = ['wmla_edi_utils.py',
                        'storage_volume_utils.py',
                        'cpd_utils.py',
                        'wml_sdk_utils.py',]
files = general_dependencies + [DEPLOY_DEPENDENCY_FILE]
wsdk.download_batch(files, wml_client, os.getenv('DIR_DEPLOY_SUBMISSION'))

# Extract dependency file & clean up
DEPLOY_DEPENDENCY = os.path.splitext(DEPLOY_DEPENDENCY_FILE)[0]
shutil.unpack_archive(f"{os.getenv('DIR_DEPLOY_SUBMISSION')}/{DEPLOY_DEPENDENCY_FILE}", 
                      extract_dir=os.getenv('DIR_DEPLOY_SUBMISSION'))
shutil.copytree(src=f"{os.getenv('DIR_DEPLOY_SUBMISSION')}/{DEPLOY_DEPENDENCY}/",
                dst=os.getenv('DIR_DEPLOY_SUBMISSION'),
                dirs_exist_ok=True)
shutil.rmtree(f"{os.getenv('DIR_DEPLOY_SUBMISSION')}/{DEPLOY_DEPENDENCY}")
os.remove(f"{os.getenv('DIR_DEPLOY_SUBMISSION')}/{DEPLOY_DEPENDENCY_FILE}")

In [None]:
if CUSTOM_ARG is not None:
    custom_arg = CUSTOM_ARG.split(' ')

    variables = {}
    for pair in custom_arg:
        pair_parsed = pair.split('=')
        variables[pair_parsed[0]] = pair_parsed[1]

    wmla_util.kernel_file_prepare(f'{DIR_DEPLOY_SUBMISSION}/{KERNEL_FILENAME}',variables)
    print('custom arguments added:',CUSTOM_ARG)

In [None]:
%%writefile $DIR_DEPLOY_SUBMISSION/model.json

{"name": "__PLACEHOLDER__", 
 "kernel_path": "__PLACEHOLDER__", 
 "readme": "__PLACEHOLDER__",
 "tag": "test", 
 "weight_path": "./",  
 "runtime": "dlipy3", 
 "framework": "PyTorch", 
 "schema_version": "1"}

In [None]:
## fill in the information
conf = json.load(open(f'{DIR_DEPLOY_SUBMISSION}/model.json'))
conf['name'] = DEPLOY_NAME
conf['kernel_path'] = KERNEL_FILENAME
conf['readme'] = 'README.md'
conf['mk_environments'] = [{'name':'WML_SPACE_ID', 'value':WML_SPACE_ID},
                           {'name':'WML_SPACE_MODEL', 'value':WML_SPACE_MODEL}]

with open(f'{DIR_DEPLOY_SUBMISSION}/model.json', 'w') as f:
    json.dump(conf, f)
    
conf = json.load(open(f'{DIR_DEPLOY_SUBMISSION}/model.json'))

## **Edit files before continuing**

# 2. Submit deployment

## 2.1 Remove deployment with same name if it already exists

In [None]:
edi.run_subprocess_and_retry(f"dlim model stop {DEPLOY_NAME} --rest-server {REST_SERVER} --jwt-token $USER_ACCESS_TOKEN -f",
                         f"Stopping model \"{DEPLOY_NAME}\", run \"dlim model view {DEPLOY_NAME} -s\" to ensure stop.")

In [None]:
edi.run_subprocess_and_retry(f"dlim model undeploy {DEPLOY_NAME} --rest-server {REST_SERVER} --jwt-token $USER_ACCESS_TOKEN -f", 
                         f"Undeployed model \"{DEPLOY_NAME}\", run \"dlim model list\" to ensure deletion.", delay=10)

## 2.2 Deploy model

In [None]:
edi.run_subprocess_and_retry(f"dlim model deploy -p {DIR_DEPLOY_SUBMISSION} --rest-server {REST_SERVER} --jwt-token $USER_ACCESS_TOKEN",
                         f"Model <{DEPLOY_NAME}> is deployed successfully", delay=5, verification_line=-1)

## 2.1 Modify configuration

In [None]:
# Request profile as JSON
!dlim model viewprofile $DEPLOY_NAME -j --rest-server $REST_SERVER --jwt-token $USER_ACCESS_TOKEN > $DIR_DEPLOY_SUBMISSION/update_model.json
with open(f"{DIR_DEPLOY_SUBMISSION}/update_model.json",'r') as f:
    update_model = json.load(f)

# Apply changes
update_model['policy']['kernel_min'] = KERNEL_MIN
update_model['policy']['kernel_max'] = KERNEL_MAX
update_model['policy']['kernel_delay_release_time'] = KERNEL_DELAY_RELEASE_TIME
update_model['policy']['task_execution_timeout'] = TASK_EXECUTION_TIMEOUT
update_model['resource_allocation']['kernel']['resources'] = RESOURCES
if ENABLE_GPUS:
    update_model['kernel']['gpu'] = 'exclusive'

# Update profile
with open(f"{DIR_DEPLOY_SUBMISSION}/update_model.json",'w') as f:
    json.dump(update_model, f)
edi.run_subprocess_and_retry(f"dlim model updateprofile {DEPLOY_NAME} -f {DIR_DEPLOY_SUBMISSION}/update_model.json --rest-server {REST_SERVER} --jwt-token $USER_ACCESS_TOKEN",
                         "Model is updated successfully")

# 3. Start deployment

In [None]:
edi.run_subprocess_and_retry(f"dlim model start {DEPLOY_NAME} --rest-server {REST_SERVER} --jwt-token $USER_ACCESS_TOKEN", 
                         f"Starting model \"{DEPLOY_NAME}\", run \"dlim model view {DEPLOY_NAME} -s\" to ensure startup.")

In [None]:
edi.wait_for_model_idle_status(DEPLOY_NAME)

# 4. Test scoring