# BasicTick: End of Day (EOD) Processing
This notebook an example of an end of day (EOD) process that adds the contents of an RDB as a changeset to an HDB.

Instead of moving between python and q code this notebokk makes use of PyKX to do all its work with the RDB, HDB, and Gateway for end of day processing.

**RDB: Save Day's Data**
1. Save table and sym locally    
2. Savedown: add changeset to database    

**HDB: Update**
1. Update the Cluster's Database to New Changeset ID

**Gateway: Re-Connect**
1. Update the Database Connections

In [1]:
# clusters
RDB_CLUSTER_NAME="RDB_basictickdb_20230606"
HDB_CLUSTER_NAME="HDB_basictickdb_20230606"
GW_CLUSTER_NAME ="GATEWAY_20230606"

# database
DB_NAME="basictickdb"

# scratch location 
scratch_path = "/opt/kx/app/scratch"

# clean rdb?
clear_rdb = True

In [2]:
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

import os
import boto3
import json
import datetime

import pykx as kx

from managed_kx import *
from env_2 import *


In [3]:
# triggers credential get
session=None

try:
    # aws: use ada for credentials
    subprocess.call(["which", "ada"])
    os.system(f"ada credentials update --account={ACCOUNT_ID} --provider=isengard --role=Admin --once")
except: 
    None

if AWS_ACCESS_KEY_ID is None:
    print("Using Defaults ...")
    # create AWS session: using access variables
    session = boto3.Session()
else:
    print("Using variables ...")
    session = boto3.Session(
        aws_access_key_id=AWS_ACCESS_KEY_ID,
        aws_secret_access_key=AWS_SECRET_ACCESS_KEY,
        aws_session_token=AWS_SESSION_TOKEN
    )

# create finspace client
client = session.client(service_name='finspace', endpoint_url=ENDPOINT_URL)

Using variables ...


# Current State of HDB

In [4]:
# Query the HDB
hdb = get_pykx_connection(client, 
                          environmentId=ENV_ID, clusterName=HDB_CLUSTER_NAME, 
                          userName=KDB_USERNAME, boto_session=session)

# Dates and Counts
before_update_pdf = hdb("select counts:count i by date from example").pd()

# Number of Rows
before_rows = hdb("count example").py()

# RDB: Save Day's Data
1. Save table and sym locally   
2. Savedown: add changeset to database

In [5]:
# Connect to the RDB
rdb = get_pykx_connection(client, 
                          environmentId=ENV_ID, clusterName=RDB_CLUSTER_NAME, 
                          userName=KDB_USERNAME, boto_session=session)

In [6]:
# Dates and Counts
rdb_sample_pdf = rdb("select [-5] from example").pd()
rdb_rows = rdb("count example").py()

In [7]:
display(rdb_sample_pdf)
print(f"Rows: {rdb_rows:,}")

Unnamed: 0,sym,time,number
0,eep,2023-06-09 14:34:56.914024044,83
1,hhc,2023-06-09 14:34:56.914024044,55
2,mef,2023-06-09 14:34:56.914024044,15
3,kph,2023-06-09 14:34:56.914024044,54
4,bcd,2023-06-09 14:34:56.914024044,67


Rows: 6,142,270


## Step 1: Save table and sym locally

In [8]:
# date directory
today = datetime.date.today()

date_dir = today.strftime("%Y.%m.%d")

print( f"Saving to: {date_dir}" )
rdb( f".Q.dpfts[`:{scratch_path};{date_dir};`sym;`example;`sym]" )

Saving to: 2023.06.09


pykx.SymbolAtom(pykx.q('`example'))

## Step 2: Savedown: add changeset to database

a. Table of changes for the changset  
b. Create Changeset  
c. Wait for Changeset to be added

In [9]:
rdb(f"c_r:(`input_path`database_path`change_type!(\"{scratch_path}/{date_dir}\";\"/{date_dir}/\";\"PUT\");\
    `input_path`database_path`change_type!(\"{scratch_path}/sym\";\"/\";\"PUT\"));")

rdb("c_r").pd()

Unnamed: 0,input_path,database_path,change_type
0,b'/opt/kx/app/scratch/2023.06.09',b'/2023.06.09/',b'PUT'
1,b'/opt/kx/app/scratch/sym',b'/',b'PUT'


In [10]:
print(f"Creating changeset for: {DB_NAME}")

res = rdb(f".aws.create_changeset[\"{DB_NAME}\";c_r]")

CHANGESET_ID = str(res.get("id"))
print(f"ChangesetID: {CHANGESET_ID}")

Creating changeset for: basictickdb
ChangesetID: iMRQSnX1uK1uOXYhtzeqzg


In [11]:
# wait for ingestion
wait_for_changeset_status(client, environmentId=ENV_ID, databaseName=DB_NAME, changesetId=CHANGESET_ID, show_wait=True)
print("** Done **")

Status is IN_PROGRESS, total wait 0:00:00, waiting 10 sec ...
** Done **


### Optional: Clean up RDB
Optionally clean up by deleting files created and clear the example table.

In [12]:
# clear the RDB
if clear_rdb:
    print(f"Cleaning: {scratch_path}")

    rdb(f"system \" rm -rf {scratch_path}/*\"")

    # remove tables
    rdb("delete from `example")
    rdb("delete c_r from `.")

Cleaning: /opt/kx/app/scratch


# HDB: Update
Update the cluster's database to new changeset.


In [13]:
DB_CONFIG=[{'databaseName': DB_NAME,
   'cacheConfigurations': [{'cacheType': 'CACHE_1000', 'dbPaths': ['/']}],
   'changesetId': CHANGESET_ID}]

resp=client.update_kx_cluster_databases(environmentId=ENV_ID, clusterName=HDB_CLUSTER_NAME, databases=DB_CONFIG)
resp

{'ResponseMetadata': {'RequestId': 'a45f574e-b7d7-43a2-9707-ef2d0a835662',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'content-type': 'application/json',
   'content-length': '2',
   'connection': 'keep-alive',
   'date': 'Fri, 09 Jun 2023 14:35:13 GMT',
   'x-amzn-requestid': 'a45f574e-b7d7-43a2-9707-ef2d0a835662',
   'x-amz-apigw-id': 'GQXIzFJ6oAMFbTg=',
   'x-amzn-trace-id': 'Root=1-6483389e-0f5ec2225e84917a576f128e',
   'x-cache': 'Miss from cloudfront',
   'via': '1.1 eb30892e0d329aead17e943e1b1cc5ae.cloudfront.net (CloudFront)',
   'x-amz-cf-pop': 'IAD55-P1',
   'x-amz-cf-id': 'lFcnKBw0bJOAhgiVgu59UQNMOOdf0LNa8DOXt73_zFi36xgSM4E1Uw=='},
  'RetryAttempts': 0}}

In [14]:
wait_for_cluster_status(client, environmentId=ENV_ID, clusterName=HDB_CLUSTER_NAME, show_wait=True)
print("** Done **")

Cluster: HDB_basictickdb_20230606 status is UPDATING, total wait 0:00:00, waiting 30 sec ...
Cluster: HDB_basictickdb_20230606 status is UPDATING, total wait 0:00:30, waiting 30 sec ...
Cluster: HDB_basictickdb_20230606 status is UPDATING, total wait 0:01:00, waiting 30 sec ...
Cluster: HDB_basictickdb_20230606 status is UPDATING, total wait 0:01:30, waiting 30 sec ...
Cluster: HDB_basictickdb_20230606 status is UPDATING, total wait 0:02:00, waiting 30 sec ...
Cluster: HDB_basictickdb_20230606 status is UPDATING, total wait 0:02:30, waiting 30 sec ...
Cluster: HDB_basictickdb_20230606 status is UPDATING, total wait 0:03:00, waiting 30 sec ...
Cluster: HDB_basictickdb_20230606 status is UPDATING, total wait 0:03:30, waiting 30 sec ...
Cluster: HDB_basictickdb_20230606 status is UPDATING, total wait 0:04:00, waiting 30 sec ...
Cluster: HDB_basictickdb_20230606 status is UPDATING, total wait 0:04:30, waiting 30 sec ...
Cluster: HDB_basictickdb_20230606 status is UPDATING, total wait 0:05:

# Gateway: Re-Connect
Using PyKX, connect to the Gateway cluster and have it re-connect to its Databases. Connection to the HDB would have been lost during the HDB update process.


In [15]:
# Connect to the Gateway with PyKX
gw = get_pykx_connection(client, 
                          environmentId=ENV_ID, clusterName=GW_CLUSTER_NAME, 
                          userName=KDB_USERNAME, boto_session=session)

In [16]:
# reinit the gateway, will re-connect to databases
gw("reinit[hdb_name; rdb_name]")

pykx.Identity(pykx.q('::'))

In [17]:
# Updated state of connected processes
display( gw("select process, handle, connected, address from .conn.procs").pd() )

Unnamed: 0,process,handle,connected,address
0,rdb,27,True,:ip-192-168-2-98.ec2.internal:5000:GATEWAY_20230606:Host=ip-192-168-2-98.ec2.internal&Port=5000&User=GATEWAY_20230606&Action=finspace%3AConnectKxCluster&X-Amz-Security-Token=IQoJb3JpZ2luX2VjEEcaCXVzLWVhc3QtMSJGMEQCIAq4ycLVjzUdK4nfAugLIj%2B91mAA4PUd72zn4npaQvJYAiAhhArDoRi5%2Fg2NqRQ7jH%2BfQ%2Fjz8ybZQkVYqLxMRXhZ9CqBAwiQ%2F%2F%2F%2F%2F%2F%2F%2F%2F%2F8BEAAaDDYxMjg0MTM4MzU5NCIMLbc0mugTHqgBK156KtUCLsksrLl%2FW6vV6bLpvxc0q6sCOoDYbQ%2BVZucBNZuERsASR%2FOW%2FV3f6jp7z25tDr7jpjFHb%2BhWjEGOgefkDllXNbFZmSENuBZ5ypkOdr4SSl6Nicy15aBoFYQCENb9wtfYB8iirLAada7Z5r39z2p%2FSHlbSE3ule8w26UFYhoQ1425bI7HIt3Pq6JI9th%2FJsrHq75sGtYMvp%2BmPjtWUdCjlK74L7wDQS%2Fk1ADuChRQlGiAaMOmULTnAu6hHGBSD4fyrh44OgAY1SBggcYRk3TkO6ZzKumkMNOJKJ5ev87S3VPPQT85Q3SrVXG0kgCEu3yuOdvdZU4wl884sgns1kYXo%2F0NUkvmYNf%2FsDA3ou4g%2F3Xn3rnYYFVL8eCGYzRuSEYEpsthBbDfx08f6eF1caiD3lo7OndwUo9JgFHPnxSaKm4pPR85VUMlHXft9W34dx7nZrzr1j4w3PeMpAY6wAEzaeuuSRkQcl2IxaQbH3KD2fjsf1Yff2mOaYTC7m32mLYm%2F4jRZm%2F70FS5v6mh9LTuCcgQy6NmnPGCRAk9xz7n9TrYLk011MFmOlp35VJ8YBvwsmArpcFLvQKstMzvCTfygMdHqQXnYoaZ%2F6r44vhLl2kE83fgMWoO%2FdLGLsCUfl6EEsCxHBaVpNyPFC4xva2hByTZWVZhuYLySNnElUPPQ27RMgbL3NaNMPV2D23uruwI69X3rV7ltHvmTTLQ5eA%3D&X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Date=20230609T144900Z&X-Amz-SignedHeaders=host&X-Amz-Expires=900&X-Amz-Credential=ASIAY5MBRM2VFTHPIB7J%2F20230609%2Fus-east-1%2Ffinspace-apricot%2Faws4_request&X-Amz-Signature=55c33689bb6a421c6f28f0e6305ea69d63dc5d5b930dfaf760c459f24de9f598
1,hdb,28,True,:ip-192-168-11-223.ec2.internal:5000:GATEWAY_20230606:Host=ip-192-168-11-223.ec2.internal&Port=5000&User=GATEWAY_20230606&Action=finspace%3AConnectKxCluster&X-Amz-Security-Token=IQoJb3JpZ2luX2VjEEcaCXVzLWVhc3QtMSJHMEUCIBuk7ntJYYJvD%2BE9Z4Xnw5WCotxBtz5teSkXU4QgJgXYAiEAtQfXgputcI%2BsUl03F9mx7Gt9P9nOwoH%2Fc099I9ViNH0qgQMIkP%2F%2F%2F%2F%2F%2F%2F%2F%2F%2FARAAGgw2MTI4NDEzODM1OTQiDAHCvEh5N29WenAI3SrVAmm5vTY9sDYocPbklqeC6O7vH3xnrVszKfECNJCLN8%2FgDS9F%2Bf6vjkt3z6Mdr8NSpVKhOfu2R9SKmpiqaDgAkIDhjBcaRopEMpUtGQ6wXwv%2FS8RjqUk5Ls6KBDpNEpGG7rd%2FUMLTzjjXQ%2F8USqydXjLsq3sNc23Nzx1W1uD1qa%2FAx3I6yRxk%2BPMlYaNVy1ChoPVTeC3crE1%2FPiHuXvcCXSPyA7uoZgX03rz7dlO8yalqALVijEY%2BBGE%2Bj%2F6A9Fa6gNmTlfiLTcG2TY5ohx0RFGoCpsPwnMLKHeGmQjMGrm8Fn5m3%2B8PX58fxCMykN6WdA7fS025P91%2BrXWKR1T3dw39rmFFrbKDwPRub9W6wsfl1IKL73QGo8JMeLmYnxuwCAXyBunwZFA1apI1W2dH80r6vl7RLPLD0rq8pNBzlk%2BFPRsbKVY4SYAe47OO9YvXGYGDaVR0nMN33jKQGOr8BAZk%2BhUQadCi2gi6h%2BYwRIzdDpUJvg3hx5lvxdFGV3rCFcqb7b5%2FuO0QEh4Q%2B16VdOOVFCJnrkP3K92HjEHLf2pC0cNu8bS6MiMXNviq%2FD0ablgiUf6Fo29OSxLaPXYjLt5UZce9ixR1mGZTWqOyZmvYWI5LBJ16DtHgOR4x5Ftd%2B8%2B2cN4fLhugs%2FGybXC0tHQgA%2FV2Cm3BByEPuOM6kZmVKjMCtNKORJ%2FazCN%2FClMtb1KSnzmnuzmFZTYXJHVI%3D&X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Date=20230609T144901Z&X-Amz-SignedHeaders=host&X-Amz-Expires=900&X-Amz-Credential=ASIAY5MBRM2VDWMZ775Y%2F20230609%2Fus-east-1%2Ffinspace-apricot%2Faws4_request&X-Amz-Signature=8136dd7a065ff85058897b544511297000c2497435d665836393dc363b2bb79e
2,hdb,29,True,:ip-192-168-6-102.ec2.internal:5000:GATEWAY_20230606:Host=ip-192-168-6-102.ec2.internal&Port=5000&User=GATEWAY_20230606&Action=finspace%3AConnectKxCluster&X-Amz-Security-Token=IQoJb3JpZ2luX2VjEEcaCXVzLWVhc3QtMSJGMEQCIDUGs91b2GeWVFVULRhUh8AR26eKm2ZW%2FbdTwt4DLyKMAiBOUm1tBfWdZAYpEkBZg7seOc5TKnvI3NQJsq5B%2BFbH%2FSqAAwiQ%2F%2F%2F%2F%2F%2F%2F%2F%2F%2F8BEAAaDDYxMjg0MTM4MzU5NCIM0rjyTwV3W1CTa3yiKtQCL0o8yxk95vtEnSx4ED%2BE%2BbDCUi%2B%2FmnCsQTfkLuPfBwknTOk5QPOcd18Kwl9XZEcaxY%2FD%2BmV%2FTQJqwqXcLcWqibyXrTXz2aKJDsuib2OxYMRLE%2FwEr0tEVvmgebYL4U9TiwZelVAOWfNU%2FZ%2BcBIBZgyb3RIpVHjrrRq2xy7UieGCt6rPwVUpaZQzFx7G0qpNKjOgI6xudHbpIq8%2BEgxG3h4dk5h83n6VG8TXRDxEkfSjncSIvofqRAPc24n5bq43lXkJFWJSUwv%2BiBrwj%2BZmQm3GKaThMDuN1zaJZ0FRM6dXefao8zmXXObmqWTZCQbZ%2Bg%2BnWoEw1MmWqpeVDiggAh4xCynIB7lguxZjWB1vQZB9EJdyK5vM2H4itu97lsOvEeknKEwNcsrX2lMc6sgTiuIH67E0WPZyR2REGvP56D4IRguCrADawU9UoOK1o6sPmL%2FRQ3zDd94ykBjrAAaGtuQxPF%2FhtYNsGgq1hkwMn9kgoxVxRw5VXVfmtdt7Ky2VOqybFvqELMDxfaWyvPa3lYK7XPMhwdqpJtGq2kHpP%2F41M%2FRGX2PzMxmzkG5nVdjCjU1koFMtlkn2voNWwVhXWPKgPHHm1UbeY2C6y4C5BQjB7kTubnYzmQurazlj777xIvh7SclAlZ2twVe5brhhs4z1%2FDGNFOUvSL%2BLCXsM8M0VWn1Iw%2Bl1Gqcdh0aANO%2B%2FpFCeNAgJQ89xDOGDEmQ%3D%3D&X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Date=20230609T144901Z&X-Amz-SignedHeaders=host&X-Amz-Expires=900&X-Amz-Credential=ASIAY5MBRM2VJAOKA3UR%2F20230609%2Fus-east-1%2Ffinspace-apricot%2Faws4_request&X-Amz-Signature=5403f8a992ec65c73530b80cb0445be81417977b88ce76f8c18447cc965d6139


# HDB: Before and After
Dates and counts of the HDB before update and after.

In [18]:
# Query the HDB for after state
hdb = get_pykx_connection(client, 
                          environmentId=ENV_ID, clusterName=HDB_CLUSTER_NAME, 
                          userName=KDB_USERNAME, boto_session=session)

In [19]:
# Latest Dates and Counts
after_update_pdf = hdb("select counts:count i by date from example").pd()
after_rows = hdb("count example").py()

### Before

In [20]:
display(before_update_pdf)

# Number of Rows
print(f"Rows: {before_rows:,}")

Unnamed: 0_level_0,counts
date,Unnamed: 1_level_1
2023-04-14,1000000
2023-04-15,1000000
2023-04-16,1000000
2023-04-17,1000000
2023-04-18,1000000
2023-04-19,1000000
2023-04-20,1000000
2023-04-21,1000000
2023-04-22,1000000
2023-04-23,1000000


Rows: 37,760,527


### After

In [21]:
display(after_update_pdf)

# Number of Rows
print(f"Rows: {after_rows:,}")

Unnamed: 0_level_0,counts
date,Unnamed: 1_level_1
2023-04-14,1000000
2023-04-15,1000000
2023-04-16,1000000
2023-04-17,1000000
2023-04-18,1000000
2023-04-19,1000000
2023-04-20,1000000
2023-04-21,1000000
2023-04-22,1000000
2023-04-23,1000000


Rows: 26,111,782


In [22]:
print( f"Last Run: {datetime.datetime.now()}" )

Last Run: 2023-06-09 14:49:02.387894
