# Create Cluster: HDB
This notebook will create (start) an HDB cluster on a named database.

In [1]:
%%html
<style>
table {float:left}
</style>

## Setup

### Node Types
|Type|Mem (GB)|vCPUs|
|:---|---:|---:|
|kx.s.large|12|2|
|kx.s.xlarge|27|4|
|kx.s.2xlarge|54|8|
|kx.s.4xlarge|108|16|
|kx.s.8xlarge|216|32|
|kx.s.16xlarge|432|64|
|kx.s.32xlarge|864|128|


In [2]:
import os
import boto3
import json
import datetime

from managed_kx import *
from env2 import *

# Managed KX Database and Cluster names to create
DB_NAME="welcomedb"

SEC_THREADS='4'
CLUSTER_NAME=f"HDB_{DB_NAME}"

# Cluster Settings
CODEBASE="code"
S3_CODE_PATH="code"

DB_PATHS = [ '/' ]

CAPACITY_CONFIG={ 'nodeCount': 3, 'nodeType': 'kx.s.2xlarge'}
DATABASE_CONFIG=[{ 'databaseName': DB_NAME,'cacheConfigurations': [{'dbPaths': DB_PATHS, 'cacheType': 'CACHE_1000' }] }]
CACHE_CONFIG=[{'type': 'CACHE_1000', 'size':1200}]

CODE_CONFIG={ 's3Bucket': S3_BUCKET, 's3Key': f'{S3_CODE_PATH}/{CODEBASE}.zip' }

INIT_SCRIPT=f'{CODEBASE}/init.q'
CMD_ARGS=[
    { 'key': 's', 'value': SEC_THREADS }, 
    { 'key': 'dbname', 'value': DB_NAME}, 
    { 'key': 'codebase', 'value': CODEBASE} 
]

In [3]:
# triggers credential get
session=None

try:
    subprocess.call(["which", "ada"])
    os.system(f"ada credentials update --account={ACCOUNT_ID} --provider=isengard --role=Admin --once")
except: 
    None

if AWS_ACCESS_KEY_ID is None:
    print("Using Defaults ...")
    # create AWS sessio: using access variables
    session = boto3.Session()
else:
    print("Using variables ...")
    session = boto3.Session(
        aws_access_key_id=AWS_ACCESS_KEY_ID,
        aws_secret_access_key=AWS_SECRET_ACCESS_KEY,
        aws_session_token=AWS_SESSION_TOKEN
    )

# create finspace client
client = session.client(service_name='finspace', endpoint_url=ENDPOINT_URL)

Using variables ...


## Check Database

In [4]:
note_str = ""

c_set_list = []

try:
    c_set_list = client.list_kx_changesets(environmentId=ENV_ID, databaseName=DB_NAME)['kxChangesets']
except:
    note_str = "<<Could not get changesets>>"

print(100*"=")
print(f"Database: {DB_NAME}, Changesets: {len(c_set_list)} {note_str}")
print(100*"=")

# sort by create time
c_set_list = sorted(c_set_list, key=lambda d: d['createdTimestamp']) 

for c in c_set_list:
    c_set_id = c['changesetId']
    print(f"Changeset ({c['status']}): {c_set_id}: Created: {c['createdTimestamp']}")
    c_rqs = client.get_kx_changeset(environmentId=ENV_ID, databaseName=DB_NAME, changesetId=c_set_id)['changeRequests']

    chs_pdf = pd.DataFrame.from_dict(c_rqs).style.hide(axis='index')
    display(chs_pdf)

Database: welcomedb, Changesets: 1 
Changeset (COMPLETED): VsQ3bsYloWVaDQSZp7fWmw: Created: 2023-05-30 22:53:13.165000+00:00


changeType,s3Path,dbPath
PUT,s3://kdb-demo-612841383594/data/hdb/2023.04.23/,/2023.04.23/
PUT,s3://kdb-demo-612841383594/data/hdb/2023.04.15/,/2023.04.15/
PUT,s3://kdb-demo-612841383594/data/hdb/2023.04.14/,/2023.04.14/
PUT,s3://kdb-demo-612841383594/data/hdb/2023.04.22/,/2023.04.22/
PUT,s3://kdb-demo-612841383594/data/hdb/2023.04.18/,/2023.04.18/
PUT,s3://kdb-demo-612841383594/data/hdb/2023.04.20/,/2023.04.20/
PUT,s3://kdb-demo-612841383594/data/hdb/2023.04.16/,/2023.04.16/
PUT,s3://kdb-demo-612841383594/data/hdb/2023.04.17/,/2023.04.17/
PUT,s3://kdb-demo-612841383594/data/hdb/2023.04.21/,/2023.04.21/
PUT,s3://kdb-demo-612841383594/data/hdb/2023.04.19/,/2023.04.19/


## Create Cluster

In [5]:
resp = client.create_kx_cluster(
    environmentId=ENV_ID, 
    clusterName=CLUSTER_NAME,
    clusterType='HDB',
    releaseLabel = '1.0',
    capacityConfiguration=CAPACITY_CONFIG,
    databases=DATABASE_CONFIG,
    cacheStorageConfigurations=CACHE_CONFIG,
    clusterDescription="Created with create_cluster_HDB notebook",
    code=CODE_CONFIG,
    initializationScript=INIT_SCRIPT,
    commandLineArguments=CMD_ARGS,
    azMode=AZ_MODE,
    availabilityZoneId=AZ_ID,
    vpcConfiguration={ 
        'vpcId': VPC_ID,
        'securityGroupIds': SECURITY_GROUPS,
        'subnetIds': SUBNET_IDS,
        'ipAddressType': 'IP_V4' }
)

In [6]:
resp

{'ResponseMetadata': {'RequestId': '110c62dc-a686-4c80-b497-236d71f15967',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'content-type': 'application/json',
   'content-length': '1102',
   'connection': 'keep-alive',
   'date': 'Tue, 30 May 2023 22:56:12 GMT',
   'x-amzn-requestid': '110c62dc-a686-4c80-b497-236d71f15967',
   'x-amz-apigw-id': 'FwjI2FamoAMFzfQ=',
   'x-amzn-trace-id': 'Root=1-64767f05-77dc6b8867b0a5922dfba3bc',
   'x-cache': 'Miss from cloudfront',
   'via': '1.1 2174e600dd54879ba9f49d0337eeb2dc.cloudfront.net (CloudFront)',
   'x-amz-cf-pop': 'IAD55-P1',
   'x-amz-cf-id': 'ZFuiAQQJr3DaoSTHPT23d4znMeMdFGjcPkW5CEZj0wMr0Lc3TAB-gg=='},
  'RetryAttempts': 0},
 'status': 'PENDING',
 'clusterName': 'HDB_welcomedb',
 'clusterType': 'HDB',
 'databases': [{'databaseName': 'welcomedb',
   'cacheConfigurations': [{'cacheType': 'CACHE_1000', 'dbPaths': ['/']}],
   'changesetId': 'VsQ3bsYloWVaDQSZp7fWmw'}],
 'cacheStorageConfigurations': [{'type': 'CACHE_1000', 'size': 1200}],
 'cluste

In [7]:
wait_for_cluster_status(client, environmentId=ENV_ID, clusterName=CLUSTER_NAME, show_wait=True)
print()
print("** DONE **")

Cluster: HDB_welcomedb status is PENDING, total wait 0:00:00, waiting 30 sec ...
Cluster: HDB_welcomedb status is CREATING, total wait 0:00:30, waiting 30 sec ...
Cluster: HDB_welcomedb status is CREATING, total wait 0:01:00, waiting 30 sec ...
Cluster: HDB_welcomedb status is CREATING, total wait 0:01:30, waiting 30 sec ...
Cluster: HDB_welcomedb status is CREATING, total wait 0:02:00, waiting 30 sec ...
Cluster: HDB_welcomedb status is CREATING, total wait 0:02:30, waiting 30 sec ...
Cluster: HDB_welcomedb status is CREATING, total wait 0:03:00, waiting 30 sec ...
Cluster: HDB_welcomedb status is CREATING, total wait 0:03:30, waiting 30 sec ...
Cluster: HDB_welcomedb status is CREATING, total wait 0:04:00, waiting 30 sec ...
Cluster: HDB_welcomedb status is CREATING, total wait 0:04:30, waiting 30 sec ...
Cluster: HDB_welcomedb status is CREATING, total wait 0:05:00, waiting 30 sec ...
Cluster: HDB_welcomedb status is CREATING, total wait 0:05:30, waiting 30 sec ...
Cluster: HDB_welc

In [8]:
# Give permissions time to propogate after cluster creation....
time.sleep(20)

conn_str = get_kx_connection_string(client, environmentId=ENV_ID, clusterName=CLUSTER_NAME, userName=KDB_USERNAME, boto_session=session)
print(conn_str)

:tcps://vpce-0dd6405d5f17bd0b1-csyy0sfn.vpce-svc-053c2d828f26cfa5e.us-east-1.vpce.amazonaws.com:443:bob:Host=vpce-0dd6405d5f17bd0b1-csyy0sfn.vpce-svc-053c2d828f26cfa5e.us-east-1.vpce.amazonaws.com&Port=5000&User=bob&Action=finspace%3AConnectKxCluster&X-Amz-Security-Token=IQoJb3JpZ2luX2VjEGAaCXVzLWVhc3QtMSJGMEQCIBleACTxr8AEhHyjon0s%2BhlDIVs85BZvgH%2FPaGo9JfT0AiA1siPgqh0JNo9HbTNenqnK%2Bk%2F%2BMR6IUWxs%2Bo%2Fwbh7uOyqAAwiY%2F%2F%2F%2F%2F%2F%2F%2F%2F%2F8BEAAaDDYxMjg0MTM4MzU5NCIMYI6ewMqpjULxcHlaKtQCbCXKkCp6SkXKfYxOu6vQxbRsqFx1%2FASpzHqlGMIZrjhhJ3WBlGZJJXpgD82CFZM%2F0dQKSyVqeqHYze3dHTjAlGr39aSEN1cu5Bmo6kDUS6Fpjo4pKjQ%2Bvt2E1ZUUG%2BleywNMQWm3hmUjntqQCy38kOANfd5FS4D3v9WTM2iNxh79LG%2F016kxKFCL1Vcch2dnxMaz2pAOoxkGxHVty3VSTCa%2BtGZeY2viqBnixqpqVevMXBVSLhEqHmke5EpSyCu53nSFIfucTRAx1sdAWMQHdWoyAcG2MZLQaNbe1ZwbGIFDsXKxeoE7KF605P6q0yDMU3meczQu7OJmBHYiW2bckO0X5aq2ELstuWBdMJfb6Ubfl8%2BR9a%2BAAS478opGvIcj7qjjuvG8ygnXRdG%2FDl4vd0evE7Wm5zOgkRHuL1Xu4V8kPE4Vh1gBb5sqkLbPy3GxvtxftzC2htqjBjrAAXll4gbH0SBe%2FtD9aX

In [9]:
cdf = get_clusters(client, ENV_ID)

display(cdf)

clusterName,status,clusterType,capacityConfiguration,commandLineArguments,clusterDescription,lastModifiedTimestamp,createdTimestamp,databaseName,cacheConfigurations
HDB_TAQ_2021H1,CREATING,HDB,"{'nodeType': 'kx.s.32xlarge', 'nodeCount': 2}","[{'key': 's', 'value': '32'}, {'key': 'dbname', 'value': 'TAQ_2021H1'}, {'key': 'codebase', 'value': 'taqcode'}]",Created with create_cluster_TAQ_H1 notebook,2023-05-30 23:11:36.953000+00:00,2023-05-30 23:11:20.394000+00:00,TAQ_2021H1,"[{'cacheType': 'CACHE_1000', 'dbPaths': ['/2021.01.04/', '/2021.01.05/', '/2021.01.06/', '/2021.01.07/', '/2021.01.08/', '/2021.01.11/', '/2021.01.12/', '/2021.01.13/', '/2021.01.14/', '/2021.01.15/', '/2021.01.19/', '/2021.01.20/', '/2021.01.21/', '/2021.01.22/', '/2021.01.25/', '/2021.01.26/', '/2021.01.27/', '/2021.01.28/', '/2021.01.29/', '/2021.02.01/', '/2021.02.02/', '/2021.02.03/', '/2021.02.04/', '/2021.02.05/']}]"
HDB_TAQ_2021_2D,CREATING,HDB,"{'nodeType': 'kx.s.32xlarge', 'nodeCount': 2}","[{'key': 's', 'value': '8'}, {'key': 'dbname', 'value': 'TAQ_2021_2D'}, {'key': 'codebase', 'value': 'taqcode'}]",Created with create_cluster_TAQ_2D notebook,2023-05-30 23:09:47.415000+00:00,2023-05-30 23:09:30.508000+00:00,TAQ_2021_2D,"[{'cacheType': 'CACHE_1000', 'dbPaths': ['/2021.01.04/', '/2021.01.05/']}]"
HDB_welcomedb,RUNNING,HDB,"{'nodeType': 'kx.s.2xlarge', 'nodeCount': 3}","[{'key': 's', 'value': '4'}, {'key': 'dbname', 'value': 'welcomedb'}, {'key': 'codebase', 'value': 'code'}]",Created with create_cluster_HDB notebook,2023-05-30 23:13:20.948000+00:00,2023-05-30 22:56:11.773000+00:00,welcomedb,"[{'cacheType': 'CACHE_1000', 'dbPaths': ['/']}]"
RDB_welcomedb,RUNNING,RDB,"{'nodeType': 'kx.s.2xlarge', 'nodeCount': 1}","[{'key': 's', 'value': '8'}, {'key': 'dbname', 'value': 'welcomedb'}, {'key': 'codebase', 'value': 'code'}]",Created with create_cluster_RDB notebook,2023-05-30 23:10:28.477000+00:00,2023-05-30 22:56:52.812000+00:00,welcomedb,[]
cluster_create_delete_db,DELETING,HDB,"{'nodeType': 'kx.s.xlarge', 'nodeCount': 3}","[{'key': 's', 'value': '4'}, {'key': 'dbname', 'value': 'create_delete_db'}, {'key': 'codebase', 'value': 'welcomedb'}]",Demo Cluster for database create_delete_db,2023-05-30 22:48:35.185000+00:00,2023-05-30 22:17:04.188000+00:00,create_delete_db,"[{'cacheType': 'CACHE_1000', 'dbPaths': ['/']}]"
cluster_welcomedb,RUNNING,HDB,"{'nodeType': 'kx.s.xlarge', 'nodeCount': 3}","[{'key': 's', 'value': '4'}, {'key': 'dbname', 'value': 'welcomedb'}, {'key': 'codebase', 'value': 'welcomedb'}]",Demo Cluster for database welcomedb,2023-05-30 23:10:58.551000+00:00,2023-05-30 22:53:31.356000+00:00,welcomedb,"[{'cacheType': 'CACHE_1000', 'dbPaths': ['/']}]"


In [10]:
print( f"Last Run: {datetime.datetime.now()}" )

Last Run: 2023-05-30 23:14:00.803892
