# BasicTick: Create Everything
This notebook will use the AWS boto3 APIs to create the needed resources for the basic tick application.

## AWS Resources Created
- Database   
- Changeset to add data to database   
- Scaling Group that will contain all clusters   
- Shared Volume   
- Dataview of database on the shared volume   
- Clusters: TP, HDB, Gateway, and RDB   

### Non AWS
- Feedhandler (run locally) to push data to TP   

In [1]:
%%html
<style>
table {float:left}
</style>

In [2]:
import os
import subprocess
import boto3
import json
import datetime

import pykx as kx

from managed_kx import *
from env import *

# Cluster names and database
from basictick_setup import *

# ----------------------------------------------------------------

# Source data directory
SOURCE_DATA_DIR="hdb"

# Code directory
CODEBASE="basictick"

# S3 Destinations
S3_CODE_PATH="code"
S3_DATA_PATH="data"

NODE_TYPE="kx.sg.4xlarge"

DATABASE_CONFIG=[{ 
    'databaseName': DB_NAME,
    'dataviewName': DBVIEW_NAME
    }]
CODE_CONFIG={ 's3Bucket': S3_BUCKET, 's3Key': f'{S3_CODE_PATH}/{CODEBASE}.zip' }

NAS1_CONFIG= {
        'type': 'SSD_250',
        'size': 1200
}

RDB_INIT_SCRIPT='rdbmkdb.q'
RDB_CMD_ARGS=[
    { 'key': 's', 'value': '2' }, 
    { 'key': 'dbname', 'value': DB_NAME}, 
    { 'key': 'tp', 'value': TP_CLUSTER_NAME }, 
    { 'key': 'AWS_ZIP_DEFAULT', 'value': '17,2,6' },
]

TP_INIT_SCRIPT='tpmkdb.q'
TP_CMD_ARGS=[
    { 'key': 'AWS_ZIP_DEFAULT', 'value': '17,2,6' },
]

HDB_INIT_SCRIPT='hdbmkdb.q'
HDB_CMD_ARGS=[
    { 'key': 's', 'value': '2' }, 
    { 'key': 'dbname', 'value': DB_NAME}, 
    { 'key': 'AWS_ZIP_DEFAULT', 'value': '17,2,6' },
]

GW_INIT_SCRIPT='gwmkdb.q'
GW_CMD_ARGS=[
    { 'key': 's', 'value': '2' }, 
    { 'key': 'rdb_name', 'value': RDB_CLUSTER_NAME}, 
    { 'key': 'hdb_name', 'value': HDB_CLUSTER_NAME}, 
]


In [3]:
# triggers credential get
session=None

try:
    # aws: use ada for credentials
    os.system(["which", "ada"])
    os.system(f"ada credentials update --account={ACCOUNT_ID} --provider=isengard --role=Admin --once")
except: 
    None

if AWS_ACCESS_KEY_ID is None:
    print("Using Defaults ...")
    # create AWS session: using access variables
    session = boto3.Session()
else:
    print("Using variables ...")
    session = boto3.Session(
        aws_access_key_id=AWS_ACCESS_KEY_ID,
        aws_secret_access_key=AWS_SECRET_ACCESS_KEY,
        aws_session_token=AWS_SESSION_TOKEN
    )

# create finspace client
client = session.client(service_name='finspace', endpoint_url=ENDPOINT_URL)

Using variables ...


# Create the Database
Create a database from the supplied data in hdb.tar.gz.  

## Untar HDB Data in hdb.tar.gz
Data will be found in hdb directory

In [4]:
!tar -xf hdb.tar.gz

In [5]:
!ls -la hdb

total 24
drwxr-xr-x 12 ec2-user ec2-user   245 Apr 24  2023 .
drwxrwxr-x  6 ec2-user ec2-user  4096 Feb 13 16:02 ..
drwxr-xr-x  3 ec2-user ec2-user    29 Apr 24  2023 2023.04.14
drwxr-xr-x  3 ec2-user ec2-user    29 Apr 24  2023 2023.04.15
drwxr-xr-x  3 ec2-user ec2-user    29 Apr 24  2023 2023.04.16
drwxr-xr-x  3 ec2-user ec2-user    29 Apr 24  2023 2023.04.17
drwxr-xr-x  3 ec2-user ec2-user    29 Apr 24  2023 2023.04.18
drwxr-xr-x  3 ec2-user ec2-user    29 Apr 24  2023 2023.04.19
drwxr-xr-x  3 ec2-user ec2-user    29 Apr 24  2023 2023.04.20
drwxr-xr-x  3 ec2-user ec2-user    29 Apr 24  2023 2023.04.21
drwxr-xr-x  3 ec2-user ec2-user    29 Apr 24  2023 2023.04.22
drwxr-xr-x  3 ec2-user ec2-user    29 Apr 24  2023 2023.04.23
-rw-r--r--  1 ec2-user ec2-user 16392 Apr 24  2023 sym


## Stage HDB Data on S3
Using AWS cli, copy hdb to staging bucket

In [6]:
S3_DEST=f"s3://{S3_BUCKET}/{S3_DATA_PATH}/{SOURCE_DATA_DIR}/"

if AWS_ACCESS_KEY_ID is not None:
    cp = f"""
export AWS_ACCESS_KEY_ID={AWS_ACCESS_KEY_ID}
export AWS_SECRET_ACCESS_KEY={AWS_SECRET_ACCESS_KEY}
export AWS_SESSION_TOKEN={AWS_SESSION_TOKEN}

aws s3 sync  --exclude .DS_Store {SOURCE_DATA_DIR} {S3_DEST}
aws s3 ls {S3_DEST}
"""
else:
    cp = f"""
aws s3 sync  --exclude .DS_Store {SOURCE_DATA_DIR} {S3_DEST}
aws s3 ls {S3_DEST}
"""
    
# execute the S3 copy
os.system(cp)

                           PRE 2023.04.14/
                           PRE 2023.04.15/
                           PRE 2023.04.16/
                           PRE 2023.04.17/
                           PRE 2023.04.18/
                           PRE 2023.04.19/
                           PRE 2023.04.20/
                           PRE 2023.04.21/
                           PRE 2023.04.22/
                           PRE 2023.04.23/
2023-10-18 17:14:47      16392 sym


0

## Create Managed Database
Using the AWS APIs, create a managed database in Managed kdb Insights.

In [7]:
# assume it exists
create_db=False

try:
    resp = client.get_kx_database(environmentId=ENV_ID, databaseName=DB_NAME)
    resp.pop('ResponseMetadata', None)
except:
    # does not exist, will create
    create_db=True

if create_db:
    print(f"CREATING Database: {DB_NAME}")
    resp = client.create_kx_database(environmentId=ENV_ID, databaseName=DB_NAME, description="Basictick kdb database")
    resp.pop('ResponseMetadata', None)

    print(f"CREATED Database: {DB_NAME}")

print(json.dumps(resp,sort_keys=True,indent=4,default=str))

CREATING Database: basictickdb
CREATED Database: basictickdb
{
    "createdTimestamp": "2024-02-13 16:04:27.175000+00:00",
    "databaseArn": "arn:aws:finspace:us-east-1:829845998889:kxEnvironment/jlcenjvtkgzrdek2qqv7ic/kxDatabase/basictickdb",
    "databaseName": "basictickdb",
    "description": "Basictick kdb database",
    "environmentId": "jlcenjvtkgzrdek2qqv7ic",
    "lastModifiedTimestamp": "2024-02-13 16:04:27.175000+00:00"
}


## Add HDB Data to Database
Add the data in the local hdb directory to the managed database using the changeset mechanism. The Data will be copied to S3 then ingested with the create-kx-changeset API.

In [8]:
changes=[]

for f in os.listdir(f"{SOURCE_DATA_DIR}"):
    if os.path.isdir(f"{SOURCE_DATA_DIR}/{f}"):
        changes.append( { 'changeType': 'PUT', 's3Path': f"{S3_DEST}{f}/", 'dbPath': f"/{f}/" } )
    else:
        changes.append( { 'changeType': 'PUT', 's3Path': f"{S3_DEST}{f}", 'dbPath': f"/" } )
        
resp = client.create_kx_changeset(environmentId=ENV_ID, databaseName=DB_NAME, 
    changeRequests=changes)

resp.pop('ResponseMetadata', None)
changeset_id = resp['changesetId']

print("Changeset...")
print(json.dumps(resp,sort_keys=True,indent=4,default=str))

Changeset...
{
    "changeRequests": [
        {
            "changeType": "PUT",
            "dbPath": "/2023.04.23/",
            "s3Path": "s3://kdb-demo-829845998889-kms/data/hdb/2023.04.23/"
        },
        {
            "changeType": "PUT",
            "dbPath": "/2023.04.15/",
            "s3Path": "s3://kdb-demo-829845998889-kms/data/hdb/2023.04.15/"
        },
        {
            "changeType": "PUT",
            "dbPath": "/2023.04.14/",
            "s3Path": "s3://kdb-demo-829845998889-kms/data/hdb/2023.04.14/"
        },
        {
            "changeType": "PUT",
            "dbPath": "/2023.04.22/",
            "s3Path": "s3://kdb-demo-829845998889-kms/data/hdb/2023.04.22/"
        },
        {
            "changeType": "PUT",
            "dbPath": "/2023.04.18/",
            "s3Path": "s3://kdb-demo-829845998889-kms/data/hdb/2023.04.18/"
        },
        {
            "changeType": "PUT",
            "dbPath": "/2023.04.20/",
            "s3Path": "s3://kdb-demo-829

In [9]:
wait_for_changeset_status(client, environmentId=ENV_ID, databaseName=DB_NAME, changesetId=changeset_id, show_wait=True)
print("**Done**")

Status is IN_PROGRESS, total wait 0:00:00, waiting 10 sec ...
Status is IN_PROGRESS, total wait 0:00:10, waiting 10 sec ...
**Done**


In [10]:
note_str = ""

c_set_list = list_kx_changesets(client, environmentId=ENV_ID, databaseName=DB_NAME)

if len(c_set_list) == 0:
    note_str = "<<Could not get changesets>>"
    
print(100*"=")
print(f"Database: {DB_NAME}, Changesets: {len(c_set_list)} {note_str}")
print(100*"=")

# sort by create time
c_set_list = sorted(c_set_list, key=lambda d: d['createdTimestamp']) 

for c in c_set_list:
    c_set_id = c['changesetId']
    print(f"  Changeset: {c_set_id}: Created: {c['createdTimestamp']} ({c['status']})")
    c_rqs = client.get_kx_changeset(environmentId=ENV_ID, databaseName=DB_NAME, changesetId=c_set_id)['changeRequests']

    chs_pdf = pd.DataFrame.from_dict(c_rqs).style.hide(axis='index')
    display(chs_pdf)

Database: basictickdb, Changesets: 1 
  Changeset: GMbRmzL61rRX1jXmU2zGlg: Created: 2024-02-13 16:04:27.768000+00:00 (COMPLETED)


changeType,s3Path,dbPath
PUT,s3://kdb-demo-829845998889-kms/data/hdb/2023.04.23/,/2023.04.23/
PUT,s3://kdb-demo-829845998889-kms/data/hdb/2023.04.15/,/2023.04.15/
PUT,s3://kdb-demo-829845998889-kms/data/hdb/2023.04.14/,/2023.04.14/
PUT,s3://kdb-demo-829845998889-kms/data/hdb/2023.04.22/,/2023.04.22/
PUT,s3://kdb-demo-829845998889-kms/data/hdb/2023.04.18/,/2023.04.18/
PUT,s3://kdb-demo-829845998889-kms/data/hdb/2023.04.20/,/2023.04.20/
PUT,s3://kdb-demo-829845998889-kms/data/hdb/2023.04.16/,/2023.04.16/
PUT,s3://kdb-demo-829845998889-kms/data/hdb/2023.04.17/,/2023.04.17/
PUT,s3://kdb-demo-829845998889-kms/data/hdb/2023.04.21/,/2023.04.21/
PUT,s3://kdb-demo-829845998889-kms/data/hdb/2023.04.19/,/2023.04.19/


# Create Scaling Group
The scaling group represents the total compute avilable to the application. All clusters will be placed into the scaling group ans share the compute and memory of the scaling group.

In [11]:
resp = client.create_kx_scaling_group(
    environmentId = ENV_ID, 
    scalingGroupName = SCALING_GROUP_NAME,
    hostType=NODE_TYPE,
    availabilityZoneId = AZ_ID
)

In [12]:
resp

{'ResponseMetadata': {'RequestId': '056438c6-9a08-423d-bafc-de227f027697',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'content-type': 'application/json',
   'content-length': '245',
   'connection': 'keep-alive',
   'date': 'Tue, 13 Feb 2024 16:04:51 GMT',
   'x-amzn-requestid': '056438c6-9a08-423d-bafc-de227f027697',
   'x-amz-apigw-id': 'TFPtUGx5oAMEGTw=',
   'x-amzn-trace-id': 'Root=1-65cb9321-78b3a3f80bbc8eef198171e3',
   'x-cache': 'Miss from cloudfront',
   'via': '1.1 d5710f445906ae917df909d01c495c9e.cloudfront.net (CloudFront)',
   'x-amz-cf-pop': 'IAD50-C2',
   'x-amz-cf-id': 'qo0LdQR8UVy9bg0Mnink-YBq3A5-xCMPcKYN-zhd-QDoGGGq4EceBw=='},
  'RetryAttempts': 0},
 'environmentId': 'jlcenjvtkgzrdek2qqv7ic',
 'scalingGroupName': 'SCALING_GROUP_basictickdb',
 'hostType': 'kx.sg.4xlarge',
 'availabilityZoneId': 'use1-az6',
 'status': 'CREATING',
 'lastModifiedTimestamp': datetime.datetime(2024, 2, 13, 16, 4, 50, 803000, tzinfo=tzlocal()),
 'createdTimestamp': datetime.datetime(2024, 2,

# Create Shared Volume
The shared volume is a common storage device for the application. Every cluster using the shared volume will have a writable directory named after the cluster, can read the directories named after other clusters in the application using the volume. Also, there is a common 

In [13]:
resp = client.create_kx_volume(
    environmentId = ENV_ID, 
    volumeType = 'NAS_1',
    volumeName = VOLUME_NAME,
    description = 'Shared volume between TP and RDB',
    nas1Configuration = NAS1_CONFIG,
    azMode='SINGLE',
    availabilityZoneIds=[ AZ_ID ]    
)

In [14]:
resp

{'ResponseMetadata': {'RequestId': '6e2fd5f2-4334-49c2-bce6-9de7155ece56',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'content-type': 'application/json',
   'content-length': '435',
   'connection': 'keep-alive',
   'date': 'Tue, 13 Feb 2024 16:04:52 GMT',
   'x-amzn-requestid': '6e2fd5f2-4334-49c2-bce6-9de7155ece56',
   'x-amz-apigw-id': 'TFPtkEvNIAMEZFA=',
   'x-amzn-trace-id': 'Root=1-65cb9323-67e0fd8155d27a9850420e6b',
   'x-cache': 'Miss from cloudfront',
   'via': '1.1 d5710f445906ae917df909d01c495c9e.cloudfront.net (CloudFront)',
   'x-amz-cf-pop': 'IAD50-C2',
   'x-amz-cf-id': 'JP8G3ADX1HJ9IIDJlsUigbhKgcbRi_y3yqfr2NkzQV32F_0AbzB59Q=='},
  'RetryAttempts': 0},
 'environmentId': 'jlcenjvtkgzrdek2qqv7ic',
 'volumeName': 'RDB_TP_SHARED',
 'volumeType': 'NAS_1',
 'volumeArn': 'arn:aws:finspace:us-east-1:829845998889:kxEnvironment/jlcenjvtkgzrdek2qqv7ic/kxVolume/RDB_TP_SHARED',
 'nas1Configuration': {'type': 'SSD_250', 'size': 1200},
 'status': 'CREATING',
 'azMode': 'SINGLE',
 'desc

# Wait for Volume and Scaling Group
Before proceeding to use Volumes and Scaling groups, wait for their creation to complete.

In [15]:
# wait for the scaling group to create
wait_for_scaling_group_status(client=client, environmentId=ENV_ID, scalingGroupName=SCALING_GROUP_NAME, show_wait=True)
print("** DONE **")

# wait for the volume to create
wait_for_volume_status(client=client, environmentId=ENV_ID, volumeName=VOLUME_NAME, show_wait=True)
print("** DONE **")

Scaling Group: SCALING_GROUP_basictickdb status is CREATING, total wait 0:00:00, waiting 30 sec ...
Scaling Group: SCALING_GROUP_basictickdb status is CREATING, total wait 0:00:30, waiting 30 sec ...
Scaling Group: SCALING_GROUP_basictickdb status is CREATING, total wait 0:01:00, waiting 30 sec ...
Scaling Group: SCALING_GROUP_basictickdb status is CREATING, total wait 0:01:30, waiting 30 sec ...
Scaling Group: SCALING_GROUP_basictickdb status is CREATING, total wait 0:02:00, waiting 30 sec ...
Scaling Group: SCALING_GROUP_basictickdb status is CREATING, total wait 0:02:30, waiting 30 sec ...
Scaling Group: SCALING_GROUP_basictickdb status is CREATING, total wait 0:03:00, waiting 30 sec ...
Scaling Group: SCALING_GROUP_basictickdb status is CREATING, total wait 0:03:30, waiting 30 sec ...
Scaling Group: SCALING_GROUP_basictickdb status is CREATING, total wait 0:04:00, waiting 30 sec ...
Scaling Group: SCALING_GROUP_basictickdb status is CREATING, total wait 0:04:30, waiting 30 sec ...


# Create Dataview
Create a dataview, for a specific (static) version of the database and have all of its data cached using the shared volume.

In [16]:
# sort by create time
c_set_list = sorted(c_set_list, key=lambda d: d['createdTimestamp']) 

resp = client.create_kx_dataview(
    environmentId = ENV_ID, 
    databaseName=DB_NAME, 
    dataviewName=DBVIEW_NAME,
    azMode='SINGLE',
    availabilityZoneId=AZ_ID,
    changesetId=c_set_list[-1]['changesetId'],
    segmentConfigurations=[
        { 
            'dbPaths': ['/*'],
            'volumeName': VOLUME_NAME
        }
    ],
    autoUpdate=False,
    description = f'Dataview of database'
)

In [17]:
# wait for the view to create
wait_for_dataview_status(client=client, environmentId=ENV_ID, databaseName=DB_NAME, dataviewName=DBVIEW_NAME, show_wait=True)
print("** DONE **")

Dataview: basictickdb_DBVIEW status is CREATING, total wait 0:00:00, waiting 30 sec ...
Dataview: basictickdb_DBVIEW status is CREATING, total wait 0:00:30, waiting 30 sec ...
Dataview: basictickdb_DBVIEW status is CREATING, total wait 0:01:00, waiting 30 sec ...
Dataview: basictickdb_DBVIEW status is CREATING, total wait 0:01:30, waiting 30 sec ...
Dataview: basictickdb_DBVIEW status is CREATING, total wait 0:02:00, waiting 30 sec ...
Dataview: basictickdb_DBVIEW status is CREATING, total wait 0:02:30, waiting 30 sec ...
Dataview: basictickdb_DBVIEW status is CREATING, total wait 0:03:00, waiting 30 sec ...
Dataview: basictickdb_DBVIEW status is CREATING, total wait 0:03:30, waiting 30 sec ...
Dataview: basictickdb_DBVIEW status is CREATING, total wait 0:04:00, waiting 30 sec ...
Dataview: basictickdb_DBVIEW status is CREATING, total wait 0:04:30, waiting 30 sec ...
Dataview: basictickdb_DBVIEW status is CREATING, total wait 0:05:00, waiting 30 sec ...
Dataview: basictickdb_DBVIEW sta

# Create Clusters
With foundation resources now completed, create the needed clusters for the application.

## Stage Code to S3
Code to be used in this application must be staged to an S3 bucket the service can read from, that code will then be deployed to the clusters as part of their creation workflow.

In [18]:
# zip the code
os.system(f"cd {CODEBASE}; zip -r -X ../{CODEBASE}.zip . -x '*.ipynb_checkpoints*';")

# copy code to S3
if AWS_ACCESS_KEY_ID is not None:
    cp = f"""
export AWS_ACCESS_KEY_ID={AWS_ACCESS_KEY_ID}
export AWS_SECRET_ACCESS_KEY={AWS_SECRET_ACCESS_KEY}
export AWS_SESSION_TOKEN={AWS_SESSION_TOKEN}

aws s3 cp  --exclude .DS_Store {CODEBASE}.zip s3://{S3_BUCKET}/code/{CODEBASE}.zip
aws s3 ls s3://{S3_BUCKET}/code/
"""
else:
    cp = f"""
aws s3 cp  --exclude .DS_Store {CODEBASE}.zip s3://{S3_BUCKET}/code/{CODEBASE}.zip
aws s3 ls s3://{S3_BUCKET}/code/
"""
    
# execute the S3 copy
os.system(cp)

  adding: connectmkdb.q (deflated 63%)
  adding: example.schema.q (deflated 12%)
  adding: feedmkdb.q (deflated 53%)
  adding: funcDownHandle.q (deflated 33%)
  adding: gwmkdbcluster.q (deflated 61%)
  adding: hdbmkdb.q (deflated 44%)
  adding: loadDep.q (deflated 58%)
  adding: query.q (deflated 19%)
  adding: rdbmkdb.q (deflated 55%)
  adding: tp.q (deflated 52%)
  adding: tpmkdb.q (deflated 52%)
  adding: gwmkdb.q (deflated 61%)
  adding: aws.q (deflated 73%)
upload: ./basictick.zip to s3://kdb-demo-829845998889-kms/code/basictick.zip
2023-06-05 21:25:21          0 
2024-02-13 16:19:33      13775 basictick.zip
2024-01-09 20:42:41        542 code.zip
2023-12-21 19:47:37        574 codebundle.zip
2024-02-02 21:34:56        582 codebundle1.zip
2023-12-21 21:26:00        582 codebundle2.zip
2023-11-22 14:58:53       1530 jpmc_code.zip
2024-01-01 19:57:08      33781 kdb-tick-flat-largetable.zip
2023-12-30 22:56:33      38867 kdb-tick-flat.zip
2024-01-08 13:05:33      28741 kdb-tick.zip
2

0

## Create Tickerplant (TP) Cluster
Tickerplant will deliver data from feedhandler to subscribing RDB.

In [19]:
resp = client.create_kx_cluster(
    environmentId=ENV_ID, 
    clusterName=TP_CLUSTER_NAME,
    clusterType='TICKERPLANT',
    releaseLabel = '1.0',
    executionRole=EXECUTION_ROLE,
    scalingGroupConfiguration={
#        'cpu': 1,
#        'memoryLimit': 6,
        'memoryReservation': 6,
        'nodeCount': 1,
        'scalingGroupName': SCALING_GROUP_NAME,
    },
#    savedownStorageConfiguration ={ 'volumeName': VOLUME_NAME },
    tickerplantLogConfiguration ={ 'tickerplantLogVolumes': [ VOLUME_NAME ] },
    clusterDescription="Created with create_all notebook",
    code=CODE_CONFIG,
    initializationScript=TP_INIT_SCRIPT,
    commandLineArguments=TP_CMD_ARGS,
    azMode=AZ_MODE,
    availabilityZoneId=AZ_ID,
    vpcConfiguration={ 
        'vpcId': VPC_ID,
        'securityGroupIds': SECURITY_GROUPS,
        'subnetIds': SUBNET_IDS,
        'ipAddressType': 'IP_V4' }
)

In [20]:
resp

{'ResponseMetadata': {'RequestId': '53cc4b44-70ea-43be-83eb-127613cb7f73',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'content-type': 'application/json',
   'content-length': '1179',
   'connection': 'keep-alive',
   'date': 'Tue, 13 Feb 2024 16:19:38 GMT',
   'x-amzn-requestid': '53cc4b44-70ea-43be-83eb-127613cb7f73',
   'x-amz-apigw-id': 'TFR3UGiGoAMEc2g=',
   'x-amzn-trace-id': 'Root=1-65cb9694-581924657a93fb96117a19fd',
   'x-cache': 'Miss from cloudfront',
   'via': '1.1 d5710f445906ae917df909d01c495c9e.cloudfront.net (CloudFront)',
   'x-amz-cf-pop': 'IAD50-C2',
   'x-amz-cf-id': '2Suq522Z8ILXJh-GBObBsk9vRUD8t4v05IvYhEGd6A02f1nkKHdREw=='},
  'RetryAttempts': 0},
 'status': 'PENDING',
 'clusterName': 'TP_basictickdb',
 'clusterType': 'TICKERPLANT',
 'tickerplantLogConfiguration': {'tickerplantLogVolumes': ['RDB_TP_SHARED']},
 'volumes': [{'volumeName': 'RDB_TP_SHARED', 'volumeType': 'NAS_1'}],
 'clusterDescription': 'Created with create_all notebook',
 'releaseLabel': '1.0',
 'vpc

## Create historical Database (HDB) Cluster
A 3 node HDB cluster will serve up queries for T+1 and older data.

In [21]:
resp = client.create_kx_cluster(
    environmentId=ENV_ID, 
    clusterName=HDB_CLUSTER_NAME,
    clusterType='HDB',
    releaseLabel = '1.0',
    executionRole=EXECUTION_ROLE,
    databases=DATABASE_CONFIG,
    scalingGroupConfiguration={
#        'cpu': 1,
#        'memoryLimit': 6,
        'memoryReservation': 6,
        'nodeCount': 3,
        'scalingGroupName': SCALING_GROUP_NAME,
    },
    clusterDescription="Created with create_all notebook",
    code=CODE_CONFIG,
    initializationScript=HDB_INIT_SCRIPT,
    commandLineArguments=HDB_CMD_ARGS,
    azMode=AZ_MODE,
    availabilityZoneId=AZ_ID,
    vpcConfiguration={ 
        'vpcId': VPC_ID,
        'securityGroupIds': SECURITY_GROUPS,
        'subnetIds': SUBNET_IDS,
        'ipAddressType': 'IP_V4' }
)

In [22]:
resp

{'ResponseMetadata': {'RequestId': 'f5a2f194-a2d7-4a53-8cc8-a382badaea59',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'content-type': 'application/json',
   'content-length': '1542',
   'connection': 'keep-alive',
   'date': 'Tue, 13 Feb 2024 16:19:41 GMT',
   'x-amzn-requestid': 'f5a2f194-a2d7-4a53-8cc8-a382badaea59',
   'x-amz-apigw-id': 'TFR4MGCHIAMEAoQ=',
   'x-amzn-trace-id': 'Root=1-65cb969a-5f7cf4f8116d848761f09918',
   'x-cache': 'Miss from cloudfront',
   'via': '1.1 d5710f445906ae917df909d01c495c9e.cloudfront.net (CloudFront)',
   'x-amz-cf-pop': 'IAD50-C2',
   'x-amz-cf-id': 'FSiMIxb9cmx8ox8_QqF7EG9AUbmFvoFOTL42pQaKERPdhOgnLWOlzw=='},
  'RetryAttempts': 0},
 'status': 'PENDING',
 'clusterName': 'HDB_basictickdb',
 'clusterType': 'HDB',
 'volumes': [{'volumeName': 'RDB_TP_SHARED', 'volumeType': 'NAS_1'}],
 'databases': [{'databaseName': 'basictickdb',
   'cacheConfigurations': [],
   'dataviewConfiguration': {'dataviewName': 'basictickdb_DBVIEW',
    'dataviewVersionId': 'rMb

## Create Gateway
The Gateway will handle client queries for data in the RDB and HDB. Gateways act as single API access points for data queries and query both the RDB and HDB for data and aggregate results back to requestor.

In [23]:
resp = client.create_kx_cluster(
    environmentId=ENV_ID, 
    clusterName=GW_CLUSTER_NAME,
    clusterType='GATEWAY',
    releaseLabel = '1.0',
    scalingGroupConfiguration={
#        'cpu': 2,
#        'memoryLimit': 6,
        'memoryReservation': 6,
        'nodeCount': 1,
        'scalingGroupName': SCALING_GROUP_NAME,
    },
#    savedownStorageConfiguration ={ 'volumeName': VOLUME_NAME },
    clusterDescription="Created with create_all notebook",
    executionRole=EXECUTION_ROLE,
    code=CODE_CONFIG,
    initializationScript=GW_INIT_SCRIPT,
    commandLineArguments=GW_CMD_ARGS,
    azMode=AZ_MODE,
    availabilityZoneId=AZ_ID,
    vpcConfiguration={ 
        'vpcId': VPC_ID,
        'securityGroupIds': SECURITY_GROUPS,
        'subnetIds': SUBNET_IDS,
        'ipAddressType': 'IP_V4' }
)

In [24]:
resp

{'ResponseMetadata': {'RequestId': '8ad41437-c89b-4209-9391-d5829ea0140e',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'content-type': 'application/json',
   'content-length': '1161',
   'connection': 'keep-alive',
   'date': 'Tue, 13 Feb 2024 16:19:44 GMT',
   'x-amzn-requestid': '8ad41437-c89b-4209-9391-d5829ea0140e',
   'x-amz-apigw-id': 'TFR4rFMdIAMEPMQ=',
   'x-amzn-trace-id': 'Root=1-65cb969d-157daa614e5986e70a579574',
   'x-cache': 'Miss from cloudfront',
   'via': '1.1 d5710f445906ae917df909d01c495c9e.cloudfront.net (CloudFront)',
   'x-amz-cf-pop': 'IAD50-C2',
   'x-amz-cf-id': 'BY3yUPUfWvE8IxmQynTnI6U6VTTeesTjFYbe2eWZLrXYwSblWhyvWw=='},
  'RetryAttempts': 0},
 'status': 'PENDING',
 'clusterName': 'GATEWAY_basictickdb',
 'clusterType': 'GATEWAY',
 'volumes': [],
 'clusterDescription': 'Created with create_all notebook',
 'releaseLabel': '1.0',
 'vpcConfiguration': {'vpcId': 'vpc-0fe2b9c50f3ad382f',
  'securityGroupIds': ['sg-0c99f1cfb9c3c7fd9'],
  'subnetIds': ['subnet-04052219

## Create Realtime Database (RDB)
The RDB will subscribe to the tickerplant and capture real time data published by the tickerplant (as published by the feedhandler).

### Wait for TP before creating RDB

In [25]:
wait_for_cluster_status(client, environmentId=ENV_ID, clusterName=TP_CLUSTER_NAME, show_wait=True)
print("TP is running")

Cluster: TP_basictickdb status is PENDING, total wait 0:00:00, waiting 30 sec ...
Cluster: TP_basictickdb status is CREATING, total wait 0:00:30, waiting 30 sec ...
Cluster: TP_basictickdb status is CREATING, total wait 0:01:00, waiting 30 sec ...
Cluster: TP_basictickdb status is CREATING, total wait 0:01:30, waiting 30 sec ...
Cluster: TP_basictickdb status is CREATING, total wait 0:02:00, waiting 30 sec ...
Cluster: TP_basictickdb status is CREATING, total wait 0:02:30, waiting 30 sec ...
Cluster: TP_basictickdb status is CREATING, total wait 0:03:00, waiting 30 sec ...
Cluster: TP_basictickdb status is CREATING, total wait 0:03:30, waiting 30 sec ...
Cluster: TP_basictickdb status is CREATING, total wait 0:04:00, waiting 30 sec ...
Cluster: TP_basictickdb status is CREATING, total wait 0:04:30, waiting 30 sec ...
Cluster: TP_basictickdb status is CREATING, total wait 0:05:00, waiting 30 sec ...
Cluster: TP_basictickdb status is CREATING, total wait 0:05:30, waiting 30 sec ...
Clust

In [26]:
resp = client.create_kx_cluster(
    environmentId=ENV_ID, 
    clusterName=RDB_CLUSTER_NAME,
    clusterType='RDB',
    releaseLabel = '1.0',
    executionRole=EXECUTION_ROLE,
    databases=DATABASE_CONFIG,
    scalingGroupConfiguration={
#        'cpu': 1,
#        'memoryLimit': 6,
        'memoryReservation': 6,
        'nodeCount': 1,
        'scalingGroupName': SCALING_GROUP_NAME,
    },
    savedownStorageConfiguration ={ 'volumeName': VOLUME_NAME },
    clusterDescription="Created with create_all notebook",
    code=CODE_CONFIG,
    initializationScript=RDB_INIT_SCRIPT,
    commandLineArguments=RDB_CMD_ARGS,
    azMode=AZ_MODE,
    availabilityZoneId=AZ_ID,
    vpcConfiguration={ 
        'vpcId': VPC_ID,
        'securityGroupIds': SECURITY_GROUPS,
        'subnetIds': SUBNET_IDS,
        'ipAddressType': 'IP_V4' }
)

In [27]:
resp

{'ResponseMetadata': {'RequestId': '2e375d13-18f6-4e58-aebe-d855f08731c8',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'content-type': 'application/json',
   'content-length': '1631',
   'connection': 'keep-alive',
   'date': 'Tue, 13 Feb 2024 16:35:55 GMT',
   'x-amzn-requestid': '2e375d13-18f6-4e58-aebe-d855f08731c8',
   'x-amz-apigw-id': 'TFUQRHARIAMEf4g=',
   'x-amzn-trace-id': 'Root=1-65cb9a67-799aa87a7a3e6ca7357c968c',
   'x-cache': 'Miss from cloudfront',
   'via': '1.1 d5710f445906ae917df909d01c495c9e.cloudfront.net (CloudFront)',
   'x-amz-cf-pop': 'IAD50-C2',
   'x-amz-cf-id': 'nrFg1CxHQ7NdDm5AiFD3muCy9z2iD3PVtduqyU1xfPb89D6aAv800g=='},
  'RetryAttempts': 0},
 'status': 'PENDING',
 'clusterName': 'RDB_basictickdb',
 'clusterType': 'RDB',
 'volumes': [{'volumeName': 'RDB_TP_SHARED', 'volumeType': 'NAS_1'}],
 'databases': [{'databaseName': 'basictickdb',
   'cacheConfigurations': [],
   'dataviewConfiguration': {'dataviewName': 'basictickdb_DBVIEW',
    'dataviewVersionId': 'rMb

## Wait for all clusters to finish creating

In [28]:
# Wait for all clusters to start
for c in all_clusters.values():
    wait_for_cluster_status(client, environmentId=ENV_ID, clusterName=c, show_wait=True)

print("** ALL DONE **")

Cluster: TP_basictickdb status is now RUNNING, total wait 0:00:00
Cluster: RDB_basictickdb status is PENDING, total wait 0:00:00, waiting 30 sec ...
Cluster: RDB_basictickdb status is CREATING, total wait 0:00:30, waiting 30 sec ...
Cluster: RDB_basictickdb status is CREATING, total wait 0:01:00, waiting 30 sec ...
Cluster: RDB_basictickdb status is CREATING, total wait 0:01:30, waiting 30 sec ...
Cluster: RDB_basictickdb status is CREATING, total wait 0:02:00, waiting 30 sec ...
Cluster: RDB_basictickdb status is CREATING, total wait 0:02:30, waiting 30 sec ...
Cluster: RDB_basictickdb status is CREATING, total wait 0:03:00, waiting 30 sec ...
Cluster: RDB_basictickdb status is CREATING, total wait 0:03:30, waiting 30 sec ...
Cluster: RDB_basictickdb status is CREATING, total wait 0:04:00, waiting 30 sec ...
Cluster: RDB_basictickdb status is CREATING, total wait 0:04:30, waiting 30 sec ...
Cluster: RDB_basictickdb status is CREATING, total wait 0:05:00, waiting 30 sec ...
Cluster: RD

# List Clusters

In [29]:
cdf = get_clusters(client, environmentId=ENV_ID)

if cdf is not None:
    cdf = cdf[cdf['clusterName'].isin(all_clusters.values())]

display(cdf)

Unnamed: 0,clusterName,status,clusterType,capacityConfiguration,commandLineArguments,clusterDescription,lastModifiedTimestamp,createdTimestamp,databaseName,cacheConfigurations
4,GATEWAY_basictickdb,RUNNING,GATEWAY,,"[{'key': 's', 'value': '2'}, {'key': 'rdb_name', 'value': 'RDB_basictickdb'}, {'key': 'hdb_name', 'value': 'HDB_basictickdb'}]",Created with create_all notebook,2024-02-13 16:36:33.229000+00:00,2024-02-13 16:19:43.813000+00:00,,
5,HDB_basictickdb,RUNNING,HDB,,"[{'key': 's', 'value': '2'}, {'key': 'dbname', 'value': 'basictickdb'}, {'key': 'AWS_ZIP_DEFAULT', 'value': '17,2,6'}]",Created with create_all notebook,2024-02-13 16:36:24.568000+00:00,2024-02-13 16:19:41.460000+00:00,basictickdb,
9,RDB_basictickdb,RUNNING,RDB,,"[{'key': 's', 'value': '2'}, {'key': 'dbname', 'value': 'basictickdb'}, {'key': 'tp', 'value': 'TP_basictickdb'}, {'key': 'AWS_ZIP_DEFAULT', 'value': '17,2,6'}]",Created with create_all notebook,2024-02-13 16:49:01.534000+00:00,2024-02-13 16:35:55.246000+00:00,basictickdb,
10,TP_basictickdb,RUNNING,TICKERPLANT,,"[{'key': 'AWS_ZIP_DEFAULT', 'value': '17,2,6'}]",Created with create_all notebook,2024-02-13 16:35:24.743000+00:00,2024-02-13 16:19:37.947000+00:00,,


# Start FeedHandler
All infrastructure is now running, You can start a feedhandler to send data into the running tickerplant (TP).


## From the console
```
$ TP_CONN="<connection string to cluster>"
$ cd basictick
$ q feedmkdb.q -p 5030 -tp $TP_CONN
```

Here we use Python to get the connection string, set environment variables, and run the feedhandler.

In [30]:
# get the connection string
conn_str = get_kx_connection_string(client, environmentId=ENV_ID, clusterName=TP_CLUSTER_NAME, userName=KDB_USERNAME, boto_session=session)

# populate the environment variable with connection string
os.putenv("CONN_STR", conn_str)
os.putenv("FH_PORT", f"{FH_PORT}")

# start q process feedmkdb to connect to the TP at $TP_CONN
subprocess.Popen(f"cd {CODEBASE}; nohup q feedmkdb.q -p $FH_PORT -tp $CONN_STR", shell=True)

# wait for feedhandler to start doing its thing
time.sleep(2)

"connected to tp"
"connected to tp"


In [31]:
# Check Feedhandler connections, should show connected to the 
fh=kx.QConnection(port=FH_PORT)

display( fh ("select process,connected,handle,address from .conn.procs") )

Unnamed: 0,process,connected,handle,address
,,,,
0.0,tp,1b,5i,:tcps://vpce-03780042a641200b8-0y7p4ih5.vpce-svc-0be765235870753a5.us-east-1.vpce.amazonaws.com:443:bob:Host=vpce-03780042a641200b8-0y7p4ih5.vpce-svc-0be765235870753a5.us-east-1.vpce.amazonaws.com&Port=443&User=bob&Action=finspace%3AConnectKxCluster&X-Amz-Security-Token=IQoJb3JpZ2luX2VjEKH%2F%2F%2F%2F%2F%2F%2F%2F%2F%2FwEaCXVzLWVhc3QtMSJHMEUCIQDNHqDzDk3vs9Bv01G%2Bj0IgYwAHG8fOSXRtayUJTZ11jAIgWAoxGCGWV0bTPTYx7M3o7Qxn4muwky8FPfW5l1YhHH0q9wIIehAAGgw4Mjk4NDU5OTg4ODkiDOoo2Z3tNDAz%2FOKW4irUAr8JHJ591G%2BXMNpbAUyGEj9oNV9CrhbYG5%2BIAwvujFjcai0NcYnuMo9djbqFyLscv%2BCOxDHTX3pd%2FkLj9muuS6kbTNoyrhXVDPshQ9mGqqUkGVdpnvFbxay4Ky%2F3QckgwA4NHUm5i4yXGYpSX5Gfw6rFOLkhTs0JPfNFhxoFBGYayBMawDz6b2t0X9C4aIe6LxOkhZb7xns%2FMznk02xkDSmkANSM9AK%2B1FNm8qhHM7xDcJmQUVyhC84koVTnPngnI81jdGHIKKOh0EyhVd3YVlf%2FSjFDHw%2BjocotuDntWYzHV4ImIQeUo4IhuVh4qX0YszHe4XDY7e6WI0gt45j0VMG4JL1zV5P0lnx0kLTkSbpBo0v90PDeoCO8yW5G1tea09o0tNUFd3JBnw6u%2FKGCkgq0UmKZqf4ViooWXtDiFiG1WvHQgDcHjotlXYXqwJqyKqqkMmUwgruurgY6vwGOnE4FBbZRs5diqIhBPdCj7TvnHdSldSnO8JUEDW2lPw0GACZSxfgmu9Y%2Bu2Jvc490gkjkKmKK3w60x3j1C9NJhMPPiiadia0zw11eRknsD97nv5pQSBPmfd7jiyeJtWpXu%2BSjAFC7YLmXfd2dIJ%2Fqn4vb7WYGi5Ju7V7%2B7ekdkRxTkmyH3RiTVaZXg4seZQeM3qcwO1tKwV0LtdpsMP%2FdOMN9tZ%2F0rQZqFLC2hVVcaPHent8rZ%2BataVep7qWSpg%3D%3D&X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Date=20240213T164906Z&X-Amz-SignedHeaders=host&X-Amz-Expires=900&X-Amz-Credential=ASIA4CNVNBUU476M7IM4%2F20240213%2Fus-east-1%2Ffinspace-apricot%2Faws4_request&X-Amz-Signature=a115b2ab31acc1e57d570bf11a26f54b6e80316ba5cab6535862be8dfec636e8


# All Processes Running

In [32]:
print( f"Last Run: {datetime.datetime.now()}" )

Last Run: 2024-02-13 16:49:08.091687
