<h1><center>MLDSL Demo</center></h1>
<a id="tc"></a>

## Table of Contents
1. [Configuration](#configuration) 
2. [Local Source Changes](#files)
2. [Prepare Signature](#signature)
3. [Train and Tune LGBM Model](#hptuning)
4. [Trained Model Deployment](#deployment)
5. [Batch Prediction](#prediction)

<a id="configuration"></a>
## Configuration
[back to Table Of Contents](#tc)

In [6]:
import os
import json

def get_transition(transition_file):
    with open(transition_file, 'r') as f:
        return json.load(f)

In [7]:
BASE_PATH='/home/jovyan/work/data'
PROJECT_PATH = f'{BASE_PATH}/demo'

SCRIPT_PATH=f'{PROJECT_PATH}/scripts'
BUCKET = 'ai4ops'
PROJECT = 'gd-gcp-techlead-experiments'
CLUSTER = 'ai4ops'
REGION='global'
AI_PLATFORM_REGION = 'us-central1'

AI_PALTFORM_MODEL_NAME='ai4ops_lgbm_throughput'

In [8]:
from mldsl import *
import importlib
from datetime import datetime
import sys
import pyspark

<a id="files"></a>
## Local Source Changes
[back to Table Of Contents](#tc)

In [35]:
%%py_script --exec True --name gs_utils.py --path demo/scripts
#!/usr/bin/python
from google.cloud import storage

def get_bucket(bucket_id, project):
    client = storage.Client(project)
    return client.get_bucket(bucket_id)


def upload_to_gs(bct, folder_path_gs, file):
    blob = bct.blob(folder_path_gs + '/' + file)
    blob.upload_from_filename(file)

def print_text(text):
    print(text)
    
print_text(get_bucket(BUCKET, PROJECT).__dict__) 

Script output:
 {'name': 'ai4ops', '_properties': {'kind': 'storage#bucket', 'id': 'ai4ops', 'selfLink': 'https://www.googleapis.com/storage/v1/b/ai4ops', 'projectNumber': '713133371133', 'name': 'ai4ops', 'metageneration': '7', 'location': 'US-EAST1', 'storageClass': 'REGIONAL', 'etag': 'CAc=', 'iamConfiguration': {'bucketPolicyOnly': {'enabled': False}, 'uniformBucketLevelAccess': {'enabled': False}}, 'locationType': 'region', 'timeCreated': '2019-04-09T10:30:07.044Z', 'updated': '2019-10-02T12:39:53.418Z'}, '_changes': set(), '_client': <google.cloud.storage.client.Client object at 0x7f94f55709e8>, '_acl': <google.cloud.storage.acl.BucketACL object at 0x7f94f5570908>, '_default_object_acl': <google.cloud.storage.acl.DefaultObjectACL object at 0x7f94f5570f98>, '_label_removals': set(), '_user_project': None}



<mldsl.PyScript at 0x7f94f56199b0>

<a id="signature"></a>
## Prepare LGBM Signature
[back to Table Of Contents](#tc)

In [24]:
builder = DataprocJobBuilder()
session = GCPSessionFactory.build_session(job_bucket=BUCKET,job_region=REGION, cluster=CLUSTER, job_project_id=PROJECT, 
                                          ml_region=AI_PLATFORM_REGION)

In [25]:
AI_PLATFORM_MODEL_BASE_VERSION = 'v1'

OUTPUT_PATH = 'demo_models/lgbm/input'
USE_POWER_TRANSFORMER = 'True'
WITH_CALENDAR_FEATURES = 'False'
SCALER_NAME = ""


CONFIG_FILENAME = 'lgbm_signature_august_throughput.json'
DATA_CONFIG = f'{PROJECT_PATH}/config/{CONFIG_FILENAME}'

SIGNATURE_INPUT=f'gs://{BUCKET}/demo/part-00000-b4a31031-df3f-4d82-945e-7b9b71ea790c-c000.csv'

In [26]:
builder = DataprocJobBuilder()

TIMESTAMP=int(datetime.now().timestamp())
sign_job_name = f"dsl_ai4ops_demo_signature_{TIMESTAMP}"

SIGNATURE_OUT = f"{OUTPUT_PATH}/{sign_job_name}"

arguments = Arguments()
arguments.set_args(**{"--input_data_path":SIGNATURE_INPUT,\
             "--config": CONFIG_FILENAME,\
             "--output_bucket":BUCKET,\
             "--output_bucket_project": PROJECT,\
            "--output_bucket_path":SIGNATURE_OUT,\
             "--workflow_id":str(TIMESTAMP),\
             "--with_calendar_features": WITH_CALENDAR_FEATURES, \
            "--with_power_transform": USE_POWER_TRANSFORMER \
            })

if SCALER_NAME:
    arguments.set_arg('--scaler_name', SCALER_NAME)

signature_job = builder.files_root(SCRIPT_PATH)\
.job_file('partial_signature_lgbm.py')\
.job_id(sign_job_name)\
.py_file('apigee_ingest_utils.py')\
.py_file('ai4ops_db.py')\
.py_file('yarn_logging.py')\
.py_script('gs_utils.py')\
.file(DATA_CONFIG)\
.arguments(arguments)\
.build_job()



signature_executor = DataprocExecutor(signature_job, session)

In [32]:
signature_res = signature_executor.submit_job(run_async=False)

Uploading file from dir: jobs-root/dsl_ai4ops_demo_signature_1570198312/partial_signature_lgbm.py
Uploading file from dir: jobs-root/dsl_ai4ops_demo_signature_1570198312/apigee_ingest_utils.py
Uploading file from dir: jobs-root/dsl_ai4ops_demo_signature_1570198312/ai4ops_db.py
Uploading file from dir: jobs-root/dsl_ai4ops_demo_signature_1570198312/yarn_logging.py
Uploading file from dir: jobs-root/dsl_ai4ops_demo_signature_1570198312/lgbm_signature_august_throughput.json
Job with id dsl_ai4ops_demo_signature_1570198312 was submitted to the cluster ai4ops
Job STATUS was set to PENDING at 2019-10-04 14:11:54
Job STATUS was set to SETUP_DONE at 2019-10-04 14:11:54
      Yarn APP /home/jovyan/work/data/demo/scripts/partial_signature_lgbm.py with STATUS ACCEPTED has PROGRESS 0
      Yarn APP /home/jovyan/work/data/demo/scripts/partial_signature_lgbm.py with STATUS RUNNING has PROGRESS 10
Job STATUS was set to RUNNING at 2019-10-04 14:11:54
      Yarn APP /home/jovyan/work/data/demo/scripts/

In [36]:
signature_executor.get_job()

NotFound: 404 Not found: Job projects/gd-gcp-techlead-experiments/regions/global/jobs/dsl_ai4ops_demo_signature_1570457542

In [35]:

state = signature_executor.get_job_state()

print('State : {}'.format(state))
if state not in ['DONE', 'RUNNING']:
    raise RuntimeError('Previous workflow step was failed')

State : DONE


In [None]:
transition_signature = {
    "SIGNATURE_JOB_ID": sign_job_name,
    "SIGNATURE_TIMESTAMP": TIMESTAMP,
    "SIGNATURE_BUCKET": BUCKET,
    "SIGNATURE_TRAIN": f"{SIGNATURE_OUT}/LGBM-TRAIN-{TIMESTAMP}.csv",
    "SIGNATURE_VAL": f"{SIGNATURE_OUT}/LGBM-VAL-{TIMESTAMP}.csv",
    "SIGNATURE_TEST": f"{SIGNATURE_OUT}/LGBM-TEST-{TIMESTAMP}.csv",
    "SIGNATURE_SCALER": f"{SIGNATURE_OUT}/LGBM-SCL-{TIMESTAMP}.pkl",
    "SIGNATURE_DROP_KEYS": f"{SIGNATURE_OUT}/LGBM-DROP-KEYS-{TIMESTAMP}.txt",
    "SIGNATURE_STATE": state
}

print(transition_signature)

with open('transitions/dsl_transition_signature.json', 'w') as file:
     file.write(json.dumps(transition_signature)) 

<a id="hptuning"></a>
## Train and Tune LGBM Model
[back to Table Of Contents](#tc)

In [10]:
SCRIPT_PATH = f"{PROJECT_PATH}/model"
signature_transition = get_transition('transitions/dsl_transition_signature.json')
print(signature_transition)
state = signature_transition.get('SIGNATURE_STATE', '')
print('State: {}'.format(state))
if state not in ['DONE']:
    raise RuntimeError('Previous workflow step was failed')


SIGNATURE_TRAIN = signature_transition.get('SIGNATURE_TRAIN', '')
SIGNATURE_VAL = signature_transition.get('SIGNATURE_VAL', '')
SIGNATURE_TEST = signature_transition.get('SIGNATURE_TEST', '')
SIGNATURE_SCALER = signature_transition.get('SIGNATURE_SCALER', '')
CATEGORICAL_COLUMNS = 'metric_class'
EXCLUDED_COLUMNS =  'time,metric,metric_id'

TUNING_CONFIG_FILE='demo/config/hptuning_config.yaml'
WAIT_DELAY='60'
WAIT_TRIES='6'
SCALE_TIER="custom"

{'SIGNATURE_JOB_ID': 'dsl_ai4ops_demo_signature_1570198312', 'SIGNATURE_TIMESTAMP': 1570198312, 'SIGNATURE_BUCKET': 'ai4ops', 'SIGNATURE_TRAIN': 'demo_models/lgbm/input/dsl_ai4ops_demo_signature_1570198312/LGBM-TRAIN-1570198312.csv', 'SIGNATURE_VAL': 'demo_models/lgbm/input/dsl_ai4ops_demo_signature_1570198312/LGBM-VAL-1570198312.csv', 'SIGNATURE_TEST': 'demo_models/lgbm/input/dsl_ai4ops_demo_signature_1570198312/LGBM-TEST-1570198312.csv', 'SIGNATURE_SCALER': 'demo_models/lgbm/input/dsl_ai4ops_demo_signature_1570198312/LGBM-SCL-1570198312.pkl', 'SIGNATURE_DROP_KEYS': 'demo_models/lgbm/input/dsl_ai4ops_demo_signature_1570198312/LGBM-DROP-KEYS-1570198312.txt', 'SIGNATURE_STATE': 'DONE'}
State: DONE


In [6]:
TIMESTAMP=int(datetime.now().timestamp())
TUNING_JOB_NAME=f"dsl_demo_tuning_lgbm_{TIMESTAMP}"
JOB_DIR=f"gs://{BUCKET}/demo/models/lightgbm/{TUNING_JOB_NAME}"#fix path
ERR_LOG_PATH_GS=f"demo/models/lightgbm/{TUNING_JOB_NAME}/output"
TRAINED_MODEL_PATH_GS = f"demo/models/lightgbm/{TUNING_JOB_NAME}/model"


training_input = {
  "region": AI_PLATFORM_REGION,
  "scaleTier": SCALE_TIER,
  "masterType":"large_model",\
  "workerType":"large_model",\
  "parameterServerType":"large_model",\
  "workerCount":"4",\
  "parameterServerCount":"3",\
  "masterConfig": {
    "imageUri": "gcr.io/gd-gcp-techlead-experiments/ai4ops_lgbm_image"
  },
  "jobDir": JOB_DIR
}

args = Arguments ()
args.set_args(**{
    '--is_hyperparameters_tuning': 'True',\
    '--bucket_id': BUCKET, \
  '--train_data_path_gs': SIGNATURE_TRAIN, \
  '--val_data_path_gs': SIGNATURE_VAL, \
  '--err_log_path_gs': ERR_LOG_PATH_GS, \
  '--trained_model_path_gs': TRAINED_MODEL_PATH_GS, \
  '--boosting_type': "gbdt", \
  '--n_jobs': '-1', \
  '--early_stopping_rounds': '10', \
  '--importance_type': "split", \
  '--categorical_feature': CATEGORICAL_COLUMNS, \
  '--target': "var1(t)", \
  '--excluded': EXCLUDED_COLUMNS
})

m_builder = ModelBuilder()

model = m_builder.train_image_uri('gcr.io/gd-gcp-techlead-experiments/ai4ops_lgbm_image')\
.name(AI_PALTFORM_MODEL_NAME)\
.train_arguments(args)\
.files_root(SCRIPT_PATH)\
.custom_predictor_path("setup.py")\
.build()

ai_job_builder = AIJobBuilder()

ai_tuning_job = ai_job_builder.model(model)\
.train_input(training_input)\
.name(TUNING_JOB_NAME)\
.load_hyperparameters_from_file(TUNING_CONFIG_FILE)\
.job_dir(JOB_DIR)\
.build()


In [None]:
tuning_executor = AIPlatformJobExecutor(session,ai_tuning_job, 60, 10)

response = tuning_executor.submit_train_job()
state = response['state']

In [13]:
transition_tuning = {
    "TRAIN_JOB_ID": TUNING_JOB_NAME,
    "TRAIN_JOB_DIR": JOB_DIR,
    "TRAIN_STATE": state,
    "TRAINED_MODEL": f"{JOB_DIR}/model",
    "IS_TUNING":True
}

print(transition_tuning)

with open('transitions/dsl_transition_tuning.json', 'w') as file:
     file.write(json.dumps(transition_tuning)) 

{'TRAIN_JOB_ID': 'dsl_demo_tuning_lgbm_1570212734', 'TRAIN_JOB_DIR': 'gs://ai4ops/ai4ops/demo/models/lightgbm/dsl_demo_tuning_lgbm_1570212734', 'TRAIN_STATE': 'SUCCEEDED', 'TRAINED_MODEL': 'gs://ai4ops/ai4ops/demo/models/lightgbm/dsl_demo_tuning_lgbm_1570212734/model', 'IS_TUNING': True}


In [34]:
ml_session = session.get_ml_session()
AIPlatformJobExecutor.get_job(ml_session, f'projects/{ml_session.project_id}/jobs/dsl_demo_tuning_lgbm_1570212734')

{'jobId': 'dsl_demo_tuning_lgbm_1570212734',
 'trainingInput': {'scaleTier': 'CUSTOM',
  'masterType': 'large_model',
  'workerType': 'large_model',
  'parameterServerType': 'large_model',
  'workerCount': '4',
  'parameterServerCount': '3',
  'args': ['--is_hyperparameters_tuning',
   'True',
   '--bucket_id',
   'ai4ops',
   '--train_data_path_gs',
   'demo_models/lgbm/input/dsl_ai4ops_demo_signature_1570198312/LGBM-TRAIN-1570198312.csv',
   '--val_data_path_gs',
   'demo_models/lgbm/input/dsl_ai4ops_demo_signature_1570198312/LGBM-VAL-1570198312.csv',
   '--err_log_path_gs',
   'demo/models/lightgbm/dsl_demo_tuning_lgbm_1570212734/output',
   '--trained_model_path_gs',
   'demo/models/lightgbm/dsl_demo_tuning_lgbm_1570212734/model',
   '--boosting_type',
   'gbdt',
   '--n_jobs',
   '-1',
   '--early_stopping_rounds',
   '10',
   '--importance_type',
   'split',
   '--categorical_feature',
   'metric_class',
   '--target',
   'var1(t)',
   '--excluded',
   'time,metric,metric_id'],
 

<a id="deployment"></a>
## Trained Model Deployment
See AI Platform Models https://console.cloud.google.com/mlengine/models
<br/>[back to Table Of Contents](#tc)

In [11]:
SCRIPT_PATH = f"{PROJECT_PATH}/model"
train_out = get_transition('transitions/dsl_transition_tuning.json')

AI_PLATFORM_MODEL_BASE_VERSION = 'v1'

TRAIN_JOB_ID = train_out.get('TRAIN_JOB_ID', '')
TRAIN_JOB_DIR = train_out.get('TRAIN_JOB_DIR', '')
TRAINED_MODEL = train_out.get('TRAINED_MODEL', '')
IS_TUNING = train_out.get('IS_TUNING', '')

In [12]:
SIGNATURE_SCALER = signature_transition.get('SIGNATURE_SCALER', '')
SIGNATURE_DROP_KEYS = signature_transition.get('SIGNATURE_DROP_KEYS', '')
SIGNATURE_BUCKET = signature_transition.get('SIGNATURE_BUCKET', '')

DEPLOYMENT_PATH = 'deployment'

MODEL_NAME = AI_PALTFORM_MODEL_NAME
VERSION_NAME = AI_PLATFORM_MODEL_BASE_VERSION
OBJECTIVE_VALUE_IS_MAXIMUM_NEEDED = False

artifacts_path = f'gs://{BUCKET}/{DEPLOYMENT_PATH}/{MODEL_NAME}_{VERSION_NAME}/stage'

In [14]:
deploy_job_input = {
  'pythonVersion': "3.5", \
  'deploymentUri': TRAIN_JOB_DIR,\
  'autoScaling':{'minNodes':1},
  'runtimeVersion': '1.13',\
  'predictionClass': 'custom_predictor.LGBMPredictor'
}

model = ModelBuilder()\
.name(AI_PALTFORM_MODEL_NAME)\
.is_tuning(True)\
.files_root(SCRIPT_PATH)\
.custom_predictor_path("custom_predictor/custom_predictor.py")\
.artifact(Artifact(f'gs://{BUCKET}/{SIGNATURE_SCALER}', "scaler.pkl", artifacts_path))\
.artifact(Artifact(f'gs://{BUCKET}/{SIGNATURE_DROP_KEYS}', "drop_keys.txt", artifacts_path))\
.build()



deploy_job_builder = AIJobBuilder()
model.train_job_id = TRAIN_JOB_ID

deploy_job = deploy_job_builder.model(model)\
.deploy_input(deploy_job_input)\
.name(f'{TRAIN_JOB_ID}_{VERSION_NAME}')\
.job_dir(TRAIN_JOB_DIR)\
.build()

In [15]:
deploy_executor = AIPlatformJobExecutor(session, deploy_job, 5,5)
deploy_executor.submit_deploy_model_job(VERSION_NAME, TRAIN_JOB_ID, create_new_model=False)

running sdist
running egg_info
creating custom_predictor.egg-info
writing custom_predictor.egg-info/PKG-INFO
writing dependency_links to custom_predictor.egg-info/dependency_links.txt
writing top-level names to custom_predictor.egg-info/top_level.txt
writing manifest file 'custom_predictor.egg-info/SOURCES.txt'
reading manifest file 'custom_predictor.egg-info/SOURCES.txt'
writing manifest file 'custom_predictor.egg-info/SOURCES.txt'





running check






creating custom_predictor-1.0
creating custom_predictor-1.0/custom_predictor.egg-info
copying files to custom_predictor-1.0...
copying custom_predictor.py -> custom_predictor-1.0
copying setup.py -> custom_predictor-1.0
copying custom_predictor.egg-info/PKG-INFO -> custom_predictor-1.0/custom_predictor.egg-info
copying custom_predictor.egg-info/SOURCES.txt -> custom_predictor-1.0/custom_predictor.egg-info
copying custom_predictor.egg-info/dependency_links.txt -> custom_predictor-1.0/custom_predictor.egg-info
copying custom_predictor.egg-info/top_level.txt -> custom_predictor-1.0/custom_predictor.egg-info
Writing custom_predictor-1.0/setup.cfg
creating dist
Creating tar archive
removing 'custom_predictor-1.0' (and everything under it)
MAX is needed: False
Best trial path:gs://ai4ops/demo/models/lightgbm/dsl_demo_tuning_lgbm_1570212734/model_trial_8
gs://ai4ops/demo/models/lightgbm/dsl_demo_tuning_lgbm_1570212734/model_trial_8


{'name': 'projects/gd-gcp-techlead-experiments/models/ai4ops_lgbm_throughput/versions/v1',
 'deploymentUri': 'gs://ai4ops/demo/models/lightgbm/dsl_demo_tuning_lgbm_1570212734/model_trial_8',
 'createTime': '2019-10-07T13:40:03Z',
 'runtimeVersion': '1.13',
 'autoScaling': {'minNodes': 1},
 'state': 'CREATING',
 'packageUris': ['gs://ai4ops/demo/models/lightgbm/dsl_demo_tuning_lgbm_1570212734/staging/custom_predictor-1.0.tar.gz'],
 'etag': '6grvXV+tfro=',
 'machineType': 'mls1-c1-m2',
 'pythonVersion': '3.5',
 'predictionClass': 'custom_predictor.LGBMPredictor'}

In [16]:
transition_deployment = {
    "MODEL_NAME": MODEL_NAME,
    "VERSION_NAME": VERSION_NAME,
    "MODEL_DIR": TRAINED_MODEL,
    "STAGING_DIR": artifacts_path,
    "DEPLOYMENT": f"{DEPLOYMENT_PATH}/{MODEL_NAME}_{VERSION_NAME}",
    "SCALER": f"{DEPLOYMENT_PATH}/{MODEL_NAME}_{VERSION_NAME}/scaler.pkl",
    "DROP_KEYS": f"{DEPLOYMENT_PATH}/{MODEL_NAME}_{VERSION_NAME}/drop_keys.txt"
}

print(transition_deployment)

with open('transitions/dsl_transition_deployment.json', 'w') as file:
     file.write(json.dumps(transition_deployment)) 

{'MODEL_NAME': 'ai4ops_lgbm_throughput', 'VERSION_NAME': 'v1', 'MODEL_DIR': 'gs://ai4ops/demo/models/lightgbm/dsl_demo_tuning_lgbm_1570212734/model', 'STAGING_DIR': 'gs://ai4ops/deployment/ai4ops_lgbm_throughput_v1/stage', 'DEPLOYMENT': 'deployment/ai4ops_lgbm_throughput_v1', 'SCALER': 'deployment/ai4ops_lgbm_throughput_v1/scaler.pkl', 'DROP_KEYS': 'deployment/ai4ops_lgbm_throughput_v1/drop_keys.txt'}
