# TorQ: Create Everything
This notebook will use the AWS boto3 APIs to create the needed resources for a TorQ based application.

## AWS Resources Created
- Database   
- Changeset to add data to database   
- Scaling Group that will contain all clusters   
- Shared Volume   
- Dataview of database on the shared volume   
- Clusters


In [1]:
%%html
<style>
table {float:left}
</style>

In [2]:
import os
import subprocess
import boto3
import json
import datetime

import pykx as kx

from managed_kx import *
from env import *

from clusters import *

# ----------------------------------------------------------------

TORQ_CODEBASE="TorQ"
TORQ_FINSPACE_CODEBASE="TorQ-Amazon-FinSpace-Starter-Pack"

# Source data directory
SOURCE_DATA_DIR=f"{TORQ_FINSPACE_CODEBASE}/hdb"

# Code directory
CODEBASE="torq_app"

# unique code zipfile name
today=datetime.datetime.now().strftime("%Y%m%d_%H%M")    
CODE_ZIP=f"{CODEBASE}_{today}.zip"

# S3 Destinations
S3_CODE_PATH="code"
S3_DATA_PATH="data"

NODE_TYPE="kx.sg.4xlarge"

DATABASE_CONFIG=[{ 
    'databaseName': DB_NAME,
    'dataviewName': DBVIEW_NAME
    }]
CODE_CONFIG={ 's3Bucket': S3_BUCKET, 's3Key': f'{S3_CODE_PATH}/{CODE_ZIP}' }

NAS1_CONFIG= {
        'type': 'SSD_250',
        'size': 1200
}

#update_type="NO_RESTART"
#update_type="ROLLING"
update_type="FORCE"

In [3]:
# Using credentials and create service client
session = boto3.Session()

# create finspace client
client = session.client(service_name='finspace')

Using Defaults ...


# Stage Fresh Code to S3
Code to be used in this application must be staged to an S3 bucket the service can read from, that code will then be deployed to the clusters as part of their creation workflow.

In [4]:
# zip the code
os.system(f"zip -q -r {CODE_ZIP} TorQ/ TorQ-Amazon-FinSpace-Starter-Pack/ -x '*.ipynb_checkpoints*' -x 'TorQ-Amazon-FinSpace-Starter-Pack/hdb/*' -x '*.git*' -x 'TorQ/tests/*' -x 'TorQ-Amazon-FinSpace-Starter-Pack/terraform-deployment/*'")

# copy code to S3
if AWS_ACCESS_KEY_ID is not None:
    cp = f"""
export AWS_ACCESS_KEY_ID={AWS_ACCESS_KEY_ID}
export AWS_SECRET_ACCESS_KEY={AWS_SECRET_ACCESS_KEY}
export AWS_SESSION_TOKEN={AWS_SESSION_TOKEN}

aws s3 cp  --exclude .DS_Store {CODE_ZIP} s3://{S3_BUCKET}/code/{CODE_ZIP}
aws s3 ls s3://{S3_BUCKET}/code/{CODE_ZIP}
"""
else:
    cp = f"""
aws s3 cp  --exclude .DS_Store {CODE_ZIP} s3://{S3_BUCKET}/code/{CODE_ZIP}
aws s3 ls s3://{S3_BUCKET}/code/{CODE_ZIP}
"""
    
# execute the S3 copy
os.system(cp)

upload: ./torq_app_20240318_2009.zip to s3://kdb-demo-829845998889-kms/code/torq_app_20240318_2009.zip
2024-03-18 20:09:13    8925181 torq_app_20240318_2009.zip


0

In [5]:
# update clusters in same order as they where created
# this will account for dependencies

for c in clusters:

    # wait for a cluster?
    if c['type'] == "WAIT":
        wait_for_cluster_status(client, environmentId=ENV_ID, clusterName=c['name'], show_wait=True)
        continue

    # cluster must be running to update it
    wait_for_cluster_status(client, environmentId=ENV_ID, clusterName=c['name'], show_wait=True)

    cluster_name = c['name']
    cluster_type = c['type']
    cluster_init = c['init']
    cluster_args = c['args']
    
    print(f"Updating: {cluster_name}")
    
    update_args = {}
    
    update_args['environmentId']=ENV_ID
    update_args['clusterName']=cluster_name 
    update_args['code']= CODE_CONFIG
    update_args['deploymentConfiguration']={
        'deploymentStrategy': update_type
    }
    
    if update_type != "NO_RESTART":
        update_args['initializationScript']=cluster_init
        update_args['commandLineArguments']=cluster_args
 
    display( update_args )
    
    resp = client.update_kx_cluster_code_configuration( **update_args )
 
    display(resp)

Cluster: discovery1 status is now RUNNING, total wait 0:00:00
Updating: discovery1


{'environmentId': 'jlcenjvtkgzrdek2qqv7ic',
 'clusterName': 'discovery1',
 'code': {'s3Bucket': 'kdb-demo-829845998889-kms',
  's3Key': 'code/torq_app_20240318_2009.zip'},
 'deploymentConfiguration': {'deploymentStrategy': 'FORCE'},
 'initializationScript': 'TorQ-Amazon-FinSpace-Starter-Pack/finspace_torq.q',
 'commandLineArguments': [{'key': 'proctype', 'value': 'discovery'},
  {'key': 'procname', 'value': 'discovery1'},
  {'key': 'jsonlogs', 'value': 'true'},
  {'key': 'noredirect', 'value': 'true'},
  {'key': 's', 'value': '2'}]}

{'ResponseMetadata': {'RequestId': 'a8d305a6-41d0-46a7-9bba-41126cac5319',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'content-type': 'application/json',
   'content-length': '2',
   'connection': 'keep-alive',
   'date': 'Mon, 18 Mar 2024 20:09:15 GMT',
   'x-amzn-requestid': 'a8d305a6-41d0-46a7-9bba-41126cac5319',
   'x-amz-apigw-id': 'U13YiFdEIAMEhGQ=',
   'x-amzn-trace-id': 'Root=1-65f89f69-527529b01e61bc6f0a4c6100',
   'x-cache': 'Miss from cloudfront',
   'via': '1.1 2b74e5ee4d30afba8f9df9907896c5f4.cloudfront.net (CloudFront)',
   'x-amz-cf-pop': 'IAD50-C2',
   'x-amz-cf-id': 'pEv0bBpivhb1IhDje_F0Tb-YS5KWFy6vHYNUtBep57uM-7ZI31p4-g=='},
  'RetryAttempts': 0}}

Cluster: rdb1 status is now RUNNING, total wait 0:00:00
Updating: rdb1


{'environmentId': 'jlcenjvtkgzrdek2qqv7ic',
 'clusterName': 'rdb1',
 'code': {'s3Bucket': 'kdb-demo-829845998889-kms',
  's3Key': 'code/torq_app_20240318_2009.zip'},
 'deploymentConfiguration': {'deploymentStrategy': 'FORCE'},
 'initializationScript': 'TorQ-Amazon-FinSpace-Starter-Pack/finspace_torq.q',
 'commandLineArguments': [{'key': 'proctype', 'value': 'rdb'},
  {'key': 'procname', 'value': 'rdb1'},
  {'key': 'jsonlogs', 'value': 'true'},
  {'key': 'noredirect', 'value': 'true'},
  {'key': 's', 'value': '2'}]}

{'ResponseMetadata': {'RequestId': 'f45d3863-646d-4f83-9d98-ea8afda91fa8',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'content-type': 'application/json',
   'content-length': '2',
   'connection': 'keep-alive',
   'date': 'Mon, 18 Mar 2024 20:09:15 GMT',
   'x-amzn-requestid': 'f45d3863-646d-4f83-9d98-ea8afda91fa8',
   'x-amz-apigw-id': 'U13Y1G9VoAMEtog=',
   'x-amzn-trace-id': 'Root=1-65f89f6b-6e6a7768359a98e0068e9641',
   'x-cache': 'Miss from cloudfront',
   'via': '1.1 2b74e5ee4d30afba8f9df9907896c5f4.cloudfront.net (CloudFront)',
   'x-amz-cf-pop': 'IAD50-C2',
   'x-amz-cf-id': 'zZMRFQ8zBCbtbCicxgtF0q8tUzg7GHTEZBgKL__FWyuJ5MjB-AL9Dg=='},
  'RetryAttempts': 0}}

Cluster: hdb1 status is now RUNNING, total wait 0:00:00
Updating: hdb1


{'environmentId': 'jlcenjvtkgzrdek2qqv7ic',
 'clusterName': 'hdb1',
 'code': {'s3Bucket': 'kdb-demo-829845998889-kms',
  's3Key': 'code/torq_app_20240318_2009.zip'},
 'deploymentConfiguration': {'deploymentStrategy': 'FORCE'},
 'initializationScript': 'TorQ-Amazon-FinSpace-Starter-Pack/finspace_torq.q',
 'commandLineArguments': [{'key': 'proctype', 'value': 'hdb'},
  {'key': 'procname', 'value': 'hdb1'},
  {'key': 'jsonlogs', 'value': 'true'},
  {'key': 'noredirect', 'value': 'true'},
  {'key': 's', 'value': '4'}]}

{'ResponseMetadata': {'RequestId': '98f96694-4747-4b3c-b08b-b700d860af05',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'content-type': 'application/json',
   'content-length': '2',
   'connection': 'keep-alive',
   'date': 'Mon, 18 Mar 2024 20:09:16 GMT',
   'x-amzn-requestid': '98f96694-4747-4b3c-b08b-b700d860af05',
   'x-amz-apigw-id': 'U13Y7HtSoAMEIAA=',
   'x-amzn-trace-id': 'Root=1-65f89f6c-725b2ba36a7f128c57836a62',
   'x-cache': 'Miss from cloudfront',
   'via': '1.1 2b74e5ee4d30afba8f9df9907896c5f4.cloudfront.net (CloudFront)',
   'x-amz-cf-pop': 'IAD50-C2',
   'x-amz-cf-id': 'peolMkj8HuFrRVzLM_nf9SpVTf0QB3OVJ-txxk16qPWTduv81wBLXw=='},
  'RetryAttempts': 0}}

Cluster: discovery1 status is UPDATING, total wait 0:00:00, waiting 30 sec ...
Cluster: discovery1 status is UPDATING, total wait 0:00:30, waiting 30 sec ...
Cluster: discovery1 status is UPDATING, total wait 0:01:00, waiting 30 sec ...
Cluster: discovery1 status is UPDATING, total wait 0:01:30, waiting 30 sec ...
Cluster: discovery1 status is UPDATING, total wait 0:02:00, waiting 30 sec ...
Cluster: discovery1 status is UPDATING, total wait 0:02:30, waiting 30 sec ...
Cluster: discovery1 status is UPDATING, total wait 0:03:00, waiting 30 sec ...
Cluster: discovery1 status is UPDATING, total wait 0:03:30, waiting 30 sec ...
Cluster: discovery1 status is UPDATING, total wait 0:04:00, waiting 30 sec ...
Cluster: discovery1 status is UPDATING, total wait 0:04:30, waiting 30 sec ...
Cluster: discovery1 status is UPDATING, total wait 0:05:00, waiting 30 sec ...
Cluster: discovery1 status is UPDATING, total wait 0:05:30, waiting 30 sec ...
Cluster: discovery1 status is UPDATING, total wait 0

{'environmentId': 'jlcenjvtkgzrdek2qqv7ic',
 'clusterName': 'gateway1',
 'code': {'s3Bucket': 'kdb-demo-829845998889-kms',
  's3Key': 'code/torq_app_20240318_2009.zip'},
 'deploymentConfiguration': {'deploymentStrategy': 'FORCE'},
 'initializationScript': 'TorQ-Amazon-FinSpace-Starter-Pack/finspace_torq.q',
 'commandLineArguments': [{'key': 'proctype', 'value': 'gateway'},
  {'key': 'procname', 'value': 'gateway1'},
  {'key': 'jsonlogs', 'value': 'true'},
  {'key': 'noredirect', 'value': 'true'},
  {'key': 's', 'value': '2'}]}

{'ResponseMetadata': {'RequestId': '37a74256-785d-46c1-a296-b608e0871c2b',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'content-type': 'application/json',
   'content-length': '2',
   'connection': 'keep-alive',
   'date': 'Mon, 18 Mar 2024 20:22:58 GMT',
   'x-amzn-requestid': '37a74256-785d-46c1-a296-b608e0871c2b',
   'x-amz-apigw-id': 'U15ZBEWJoAMEssw=',
   'x-amzn-trace-id': 'Root=1-65f8a29f-0d7e6937304e16613280fec7',
   'x-cache': 'Miss from cloudfront',
   'via': '1.1 2b74e5ee4d30afba8f9df9907896c5f4.cloudfront.net (CloudFront)',
   'x-amz-cf-pop': 'IAD50-C2',
   'x-amz-cf-id': 'qRzGMC30K7Ky_eGCilckMB2A-0fUuyK1POtxBLH_RYdIHXtV9TdBVg=='},
  'RetryAttempts': 0}}

Cluster: feed1 status is now RUNNING, total wait 0:00:00
Updating: feed1


{'environmentId': 'jlcenjvtkgzrdek2qqv7ic',
 'clusterName': 'feed1',
 'code': {'s3Bucket': 'kdb-demo-829845998889-kms',
  's3Key': 'code/torq_app_20240318_2009.zip'},
 'deploymentConfiguration': {'deploymentStrategy': 'FORCE'},
 'initializationScript': 'TorQ-Amazon-FinSpace-Starter-Pack/finspace_torq.q',
 'commandLineArguments': [{'key': 'proctype', 'value': 'tradeFeed'},
  {'key': 'procname', 'value': 'tradeFeed1'},
  {'key': 'jsonlogs', 'value': 'true'},
  {'key': 'noredirect', 'value': 'true'},
  {'key': 's', 'value': '2'}]}

{'ResponseMetadata': {'RequestId': '6dea9835-f97d-43e0-935f-e6dcf0e7fe04',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'content-type': 'application/json',
   'content-length': '2',
   'connection': 'keep-alive',
   'date': 'Mon, 18 Mar 2024 20:22:59 GMT',
   'x-amzn-requestid': '6dea9835-f97d-43e0-935f-e6dcf0e7fe04',
   'x-amz-apigw-id': 'U15ZdHmsoAMEnZg=',
   'x-amzn-trace-id': 'Root=1-65f8a2a2-04ef600d262f3777688ccd06',
   'x-cache': 'Miss from cloudfront',
   'via': '1.1 2b74e5ee4d30afba8f9df9907896c5f4.cloudfront.net (CloudFront)',
   'x-amz-cf-pop': 'IAD50-C2',
   'x-amz-cf-id': 'eXO4gnRoyFeH0DyznIqLAUJmwwCAK19mmg8q_d0IBUzpmFHXldor-A=='},
  'RetryAttempts': 0}}

Cluster: monitor1 status is now RUNNING, total wait 0:00:00
Updating: monitor1


{'environmentId': 'jlcenjvtkgzrdek2qqv7ic',
 'clusterName': 'monitor1',
 'code': {'s3Bucket': 'kdb-demo-829845998889-kms',
  's3Key': 'code/torq_app_20240318_2009.zip'},
 'deploymentConfiguration': {'deploymentStrategy': 'FORCE'},
 'initializationScript': 'TorQ-Amazon-FinSpace-Starter-Pack/finspace_torq.q',
 'commandLineArguments': [{'key': 'proctype', 'value': 'monitor'},
  {'key': 'procname', 'value': 'monitor1'},
  {'key': 'jsonlogs', 'value': 'true'},
  {'key': 'noredirect', 'value': 'true'},
  {'key': 's', 'value': '1'}]}

{'ResponseMetadata': {'RequestId': '51a9e455-16a3-45d9-9749-f427896382a9',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'content-type': 'application/json',
   'content-length': '2',
   'connection': 'keep-alive',
   'date': 'Mon, 18 Mar 2024 20:22:59 GMT',
   'x-amzn-requestid': '51a9e455-16a3-45d9-9749-f427896382a9',
   'x-amz-apigw-id': 'U15ZkECAIAMEOeg=',
   'x-amzn-trace-id': 'Root=1-65f8a2a3-7633f690205a045c75e90545',
   'x-cache': 'Miss from cloudfront',
   'via': '1.1 2b74e5ee4d30afba8f9df9907896c5f4.cloudfront.net (CloudFront)',
   'x-amz-cf-pop': 'IAD50-C2',
   'x-amz-cf-id': '083QlkPfr89pPOg1UVE9LzDRHDrEMTrBPci6RCB-28hgnOUTHF8FZA=='},
  'RetryAttempts': 0}}

## Wait for all clusters to finish updating

In [6]:
# Wait for all clusters to start
for c in clusters:
    cluster_name = c['name']
    wait_for_cluster_status(client, environmentId=ENV_ID, clusterName=cluster_name, show_wait=True)

print("** ALL DONE **")

Cluster: discovery1 status is now RUNNING, total wait 0:00:00
Cluster: rdb1 status is now RUNNING, total wait 0:00:00
Cluster: hdb1 status is now RUNNING, total wait 0:00:00
Cluster: discovery1 status is now RUNNING, total wait 0:00:00
Cluster: rdb1 status is now RUNNING, total wait 0:00:00
Cluster: gateway1 status is UPDATING, total wait 0:00:00, waiting 30 sec ...
Cluster: gateway1 status is UPDATING, total wait 0:00:30, waiting 30 sec ...
Cluster: gateway1 status is UPDATING, total wait 0:01:00, waiting 30 sec ...
Cluster: gateway1 status is UPDATING, total wait 0:01:30, waiting 30 sec ...
Cluster: gateway1 status is UPDATING, total wait 0:02:00, waiting 30 sec ...
Cluster: gateway1 status is UPDATING, total wait 0:02:30, waiting 30 sec ...
Cluster: gateway1 status is UPDATING, total wait 0:03:00, waiting 30 sec ...
Cluster: gateway1 status is UPDATING, total wait 0:03:30, waiting 30 sec ...
Cluster: gateway1 status is UPDATING, total wait 0:04:00, waiting 30 sec ...
Cluster: gatewa

# List Clusters

In [7]:
cdf = get_clusters(client, environmentId=ENV_ID)

all_clusters = []

for c in clusters:
    all_clusters.append(c['name'])

if cdf is not None:
    cdf = cdf[cdf['clusterName'].isin(all_clusters)]

display(cdf)

Unnamed: 0,clusterName,status,clusterType,capacityConfiguration,commandLineArguments,clusterDescription,lastModifiedTimestamp,createdTimestamp,databaseName,cacheConfigurations
0,discovery1,RUNNING,GP,,"[{'key': 's', 'value': '2'}, {'key': 'jsonlogs', 'value': 'true'}, {'key': 'noredirect', 'value': 'true'}, {'key': 'procname', 'value': 'discovery1'}, {'key': 'proctype', 'value': 'discovery'}]",Created with create_all notebook,2024-03-18 20:22:32.476000+00:00,2024-03-18 19:34:28.565000+00:00,finspace-database,
1,feed1,RUNNING,GP,,"[{'key': 's', 'value': '2'}, {'key': 'jsonlogs', 'value': 'true'}, {'key': 'noredirect', 'value': 'true'}, {'key': 'procname', 'value': 'tradeFeed1'}, {'key': 'proctype', 'value': 'tradeFeed'}]",Created with create_all notebook,2024-03-18 20:36:18.125000+00:00,2024-03-18 19:51:26.204000+00:00,finspace-database,
2,gateway1,RUNNING,GP,,"[{'key': 's', 'value': '2'}, {'key': 'jsonlogs', 'value': 'true'}, {'key': 'noredirect', 'value': 'true'}, {'key': 'procname', 'value': 'gateway1'}, {'key': 'proctype', 'value': 'gateway'}]",Created with create_all notebook,2024-03-18 20:36:17.656000+00:00,2024-03-18 19:51:22.794000+00:00,finspace-database,
3,hdb1,RUNNING,GP,,"[{'key': 's', 'value': '4'}, {'key': 'jsonlogs', 'value': 'true'}, {'key': 'noredirect', 'value': 'true'}, {'key': 'procname', 'value': 'hdb1'}, {'key': 'proctype', 'value': 'hdb'}]",Created with create_all notebook,2024-03-18 20:22:31.186000+00:00,2024-03-18 19:34:34.442000+00:00,finspace-database,
4,monitor1,RUNNING,GP,,"[{'key': 's', 'value': '1'}, {'key': 'jsonlogs', 'value': 'true'}, {'key': 'noredirect', 'value': 'true'}, {'key': 'procname', 'value': 'monitor1'}, {'key': 'proctype', 'value': 'monitor'}]",Created with create_all notebook,2024-03-18 20:36:20.202000+00:00,2024-03-18 19:51:29.359000+00:00,finspace-database,
5,rdb1,RUNNING,GP,,"[{'key': 's', 'value': '2'}, {'key': 'jsonlogs', 'value': 'true'}, {'key': 'noredirect', 'value': 'true'}, {'key': 'procname', 'value': 'rdb1'}, {'key': 'proctype', 'value': 'rdb'}]",Created with create_all notebook,2024-03-18 20:22:32.112000+00:00,2024-03-18 19:34:31.625000+00:00,finspace-database,


# Logs
You can use AWS contols and log trail to monitor logs, below are terminal based ways to tail the log as well for each cluster.

In [8]:
def get_log_group(environmentId: str, clusterName: str):
    return f"/aws/vendedlogs/finspace/{environmentId}/{clusterName}"

#log_format="detailed"
#log_format="short"
log_format="json"

for c in clusters:
    cluster_name = c['name']

    lg = get_log_group(environmentId=ENV_ID, clusterName=cluster_name)
    
    print( f"aws logs tail {lg} --follow --format {log_format}" )

aws logs tail /aws/vendedlogs/finspace/jlcenjvtkgzrdek2qqv7ic/discovery1 --follow --format json
aws logs tail /aws/vendedlogs/finspace/jlcenjvtkgzrdek2qqv7ic/rdb1 --follow --format json
aws logs tail /aws/vendedlogs/finspace/jlcenjvtkgzrdek2qqv7ic/hdb1 --follow --format json
aws logs tail /aws/vendedlogs/finspace/jlcenjvtkgzrdek2qqv7ic/discovery1 --follow --format json
aws logs tail /aws/vendedlogs/finspace/jlcenjvtkgzrdek2qqv7ic/rdb1 --follow --format json
aws logs tail /aws/vendedlogs/finspace/jlcenjvtkgzrdek2qqv7ic/gateway1 --follow --format json
aws logs tail /aws/vendedlogs/finspace/jlcenjvtkgzrdek2qqv7ic/feed1 --follow --format json
aws logs tail /aws/vendedlogs/finspace/jlcenjvtkgzrdek2qqv7ic/monitor1 --follow --format json


# All Processes Running

In [9]:
print( f"Last Run: {datetime.datetime.now()}" )

Last Run: 2024-03-18 20:36:44.265616
