# BasicTick: End of Day (EOD) Processing
This notebook an example of an end of day (EOD) process that adds the contents of an RDB as a changeset to an HDB.

Instead of moving between python and q code this notebokk makes use of PyKX to do all its work with the RDB, HDB, and Gateway for end of day processing.

**RDB: Save Day's Data**
1. Save table and sym locally    
2. Savedown: add changeset to database    

**HDB: Update**
1. Update the Cluster's Database to New Changeset ID

**Gateway: Re-Connect**
1. Update the Database Connections

In [1]:
# clusters
RDB_CLUSTER_NAME="RDB_basictickdb_20230606"
HDB_CLUSTER_NAME="HDB_basictickdb_20230606"
GW_CLUSTER_NAME ="GATEWAY_20230606"

# database
DB_NAME="basictickdb"

# scratch location 
scratch_path = "/opt/kx/app/scratch"

# clean rdb?
clear_rdb = True

In [2]:
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

import os
import boto3
import json
import datetime

import pykx as kx

from managed_kx import *
from env_2 import *


In [3]:
# triggers credential get
session=None

try:
    # aws: use ada for credentials
    subprocess.call(["which", "ada"])
    os.system(f"ada credentials update --account={ACCOUNT_ID} --provider=isengard --role=Admin --once")
except: 
    None

if AWS_ACCESS_KEY_ID is None:
    print("Using Defaults ...")
    # create AWS session: using access variables
    session = boto3.Session()
else:
    print("Using variables ...")
    session = boto3.Session(
        aws_access_key_id=AWS_ACCESS_KEY_ID,
        aws_secret_access_key=AWS_SECRET_ACCESS_KEY,
        aws_session_token=AWS_SESSION_TOKEN
    )

# create finspace client
client = session.client(service_name='finspace', endpoint_url=ENDPOINT_URL)

Using variables ...


# Current State of HDB

In [4]:
# Query the HDB
hdb = get_pykx_connection(client, 
                          environmentId=ENV_ID, clusterName=HDB_CLUSTER_NAME, 
                          userName=KDB_USERNAME, boto_session=session)

# Dates and Counts
before_update_pdf = hdb("select counts:count i by date from example").pd()

# Number of Rows
before_rows = hdb("count example").py()

# RDB: Save Day's Data
1. Save table and sym locally   
2. Savedown: add changeset to database

In [5]:
# Connect to the RDB
rdb = get_pykx_connection(client, 
                          environmentId=ENV_ID, clusterName=RDB_CLUSTER_NAME, 
                          userName=KDB_USERNAME, boto_session=session)

In [6]:
# Dates and Counts
rdb_sample_pdf = rdb("select [-5] from example").pd()
rdb_rows = rdb("count example").py()

In [7]:
display(rdb_sample_pdf)
print(f"Rows: {rdb_rows:,}")

Unnamed: 0,sym,time,number
0,ekk,2023-06-12 23:21:34.904022459,12
1,kpn,2023-06-12 23:21:34.914025162,17
2,lac,2023-06-12 23:21:34.914025162,28
3,knk,2023-06-12 23:21:34.914025162,86
4,kjc,2023-06-12 23:21:34.914025162,24


Rows: 4,987,185


## Step 1: Save table and sym locally

In [8]:
# date directory
today = datetime.date.today()

date_dir = today.strftime("%Y.%m.%d")

print( f"Saving to: {date_dir}" )
rdb( f".Q.dpfts[`:{scratch_path};{date_dir};`sym;`example;`sym]" )

Saving to: 2023.06.12


pykx.SymbolAtom(pykx.q('`example'))

## Step 2: Savedown: add changeset to database

a. Table of changes for the changset  
b. Create Changeset  
c. Wait for Changeset to be added

In [9]:
rdb(f"c_r:(`input_path`database_path`change_type!(\"{scratch_path}/{date_dir}\";\"/{date_dir}/\";\"PUT\");\
    `input_path`database_path`change_type!(\"{scratch_path}/sym\";\"/\";\"PUT\"));")

rdb("c_r").pd()

Unnamed: 0,input_path,database_path,change_type
0,b'/opt/kx/app/scratch/2023.06.12',b'/2023.06.12/',b'PUT'
1,b'/opt/kx/app/scratch/sym',b'/',b'PUT'


In [10]:
print(f"Creating changeset for: {DB_NAME}")

res = rdb(f".aws.create_changeset[\"{DB_NAME}\";c_r]")

CHANGESET_ID = str(res.get("id"))
print(f"ChangesetID: {CHANGESET_ID}")

Creating changeset for: basictickdb
ChangesetID: 5sRY9RwOgE5ekO2NqDfe7A


In [11]:
# wait for ingestion
wait_for_changeset_status(client, environmentId=ENV_ID, databaseName=DB_NAME, changesetId=CHANGESET_ID, show_wait=True)
print("** Done **")

Status is IN_PROGRESS, total wait 0:00:00, waiting 10 sec ...
** Done **


### Optional: Clean up RDB
Optionally clean up by deleting files created and clear the example table.

In [12]:
# clear the RDB
if clear_rdb:
    print(f"Cleaning: {scratch_path}")

    rdb(f"system \" rm -rf {scratch_path}/*\"")

    # remove tables
    rdb("delete from `example")
    rdb("delete c_r from `.")

Cleaning: /opt/kx/app/scratch


# HDB: Update
Update the cluster's database to new changeset.


In [13]:
DB_CONFIG=[{'databaseName': DB_NAME,
   'cacheConfigurations': [{'cacheType': 'CACHE_1000', 'dbPaths': ['/']}],
   'changesetId': CHANGESET_ID}]

resp=client.update_kx_cluster_databases(environmentId=ENV_ID, clusterName=HDB_CLUSTER_NAME, databases=DB_CONFIG)
resp

{'ResponseMetadata': {'RequestId': '2593441c-906b-4022-b1a8-8f96d4d68000',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'content-type': 'application/json',
   'content-length': '2',
   'connection': 'keep-alive',
   'date': 'Mon, 12 Jun 2023 23:21:50 GMT',
   'x-amzn-requestid': '2593441c-906b-4022-b1a8-8f96d4d68000',
   'x-amz-apigw-id': 'GbdF7GMtIAMFTKA=',
   'x-amzn-trace-id': 'Root=1-6487a88c-1e99523832a588af55cadd82',
   'x-cache': 'Miss from cloudfront',
   'via': '1.1 d8778dbc6e81818135a7305a388b2974.cloudfront.net (CloudFront)',
   'x-amz-cf-pop': 'IAD55-P1',
   'x-amz-cf-id': 'f3sPP4_r01ajpS3eo9G1WwANcYCj5ajlsCr4VmkGE94FtQgdU7-1WA=='},
  'RetryAttempts': 0}}

In [14]:
wait_for_cluster_status(client, environmentId=ENV_ID, clusterName=HDB_CLUSTER_NAME, show_wait=True)
print("** Done **")

Cluster: HDB_basictickdb_20230606 status is UPDATING, total wait 0:00:00, waiting 30 sec ...
Cluster: HDB_basictickdb_20230606 status is UPDATING, total wait 0:00:30, waiting 30 sec ...
Cluster: HDB_basictickdb_20230606 status is UPDATING, total wait 0:01:00, waiting 30 sec ...
Cluster: HDB_basictickdb_20230606 status is UPDATING, total wait 0:01:30, waiting 30 sec ...
Cluster: HDB_basictickdb_20230606 status is UPDATING, total wait 0:02:00, waiting 30 sec ...
Cluster: HDB_basictickdb_20230606 status is UPDATING, total wait 0:02:30, waiting 30 sec ...
Cluster: HDB_basictickdb_20230606 status is UPDATING, total wait 0:03:00, waiting 30 sec ...
Cluster: HDB_basictickdb_20230606 status is UPDATING, total wait 0:03:30, waiting 30 sec ...
Cluster: HDB_basictickdb_20230606 status is UPDATING, total wait 0:04:00, waiting 30 sec ...
Cluster: HDB_basictickdb_20230606 status is UPDATING, total wait 0:04:30, waiting 30 sec ...
Cluster: HDB_basictickdb_20230606 status is UPDATING, total wait 0:05:

# Gateway: Re-Connect
Using PyKX, connect to the Gateway cluster and have it re-connect to its Databases. Connection to the HDB would have been lost during the HDB update process.


In [15]:
# Connect to the Gateway with PyKX
gw = get_pykx_connection(client, 
                          environmentId=ENV_ID, clusterName=GW_CLUSTER_NAME, 
                          userName=KDB_USERNAME, boto_session=session)

In [16]:
# reinit the gateway, will re-connect to databases
gw("reinit[hdb_name; rdb_name]")

pykx.Identity(pykx.q('::'))

In [17]:
# Updated state of connected processes
display( gw("select process, handle, connected, address from .conn.procs").pd() )

Unnamed: 0,process,handle,connected,address
0,rdb,28,True,:ip-192-168-2-98.ec2.internal:5000:GATEWAY_20230606:Host=ip-192-168-2-98.ec2.internal&Port=5000&User=GATEWAY_20230606&Action=finspace%3AConnectKxCluster&X-Amz-Security-Token=IQoJb3JpZ2luX2VjEJj%2F%2F%2F%2F%2F%2F%2F%2F%2F%2FwEaCXVzLWVhc3QtMSJHMEUCIQDV80hVLhSVbKAQhVdybfafHGGrIIzVVMeOZk0mJVhTsAIgEHSlGpwzX4rsJPrfRrwAYNkq2J4gclNvyVHFbVu7%2BvwqgQMI4f%2F%2F%2F%2F%2F%2F%2F%2F%2F%2FARAAGgw2MTI4NDEzODM1OTQiDBHpY4TXtcatmwHxMirVAmofnjZWZNqonPPTO2X0%2BVrFhxSB0XJsLT1%2Bacm%2F%2Fahgu0ppM8PYgjeo9pkGqsVAqM8%2BSx4KlB12UjzwQIvqPyogwgVfH5PfZTQ7WULspPo%2BlJwaHLYXR3BPKIPlEGupOiLj%2B3MTt4mv%2BD1hMXxnwh1wYDcu7HF%2FteunZirnZvqWiBim81924NkGuYAzeOWx39fSF80H5n4JbvbG4n7GzaUrk3uS4DlFFHrc4KhmI51AG0ld7cgYI%2F8wlLbwOPPTno7fmS8U3AzqtArjweDE10IojYRcB90FGdloL1g1CrvEIr9m%2FIf2xiMmAYPeHs4Su2O2vaHUTZIzxVYx9KHvnc6XjQAnqmW5159Cbk77zzHbwvUXpm0iJRJHZWMraRensdsF2RElyyrKE19vlEBWjEkav%2Bz6Ay%2FgGJ7xil%2FKndjSiQYz6ANlDKoWz5SKV1CNBGhE6p7yMIXYnqQGOr8BX0%2BDnDtuQWdEBFP1TpgZEk8QlhrnY7OKYvCbK8fQTyfPAFNatDdKq9KhscoajgRQFE66xAZlM%2FswD5VdR%2FbhwSIsqKdLELVSKYIcLRAVP3Z81ingmvB3NNPF1IfqvIjTnFuYf1RA4GYTvUuy2IDjjbiYHNZbbG7KXwsfwzFDZt2ZWomSLFFJRXI8DzRMgewS0YveLiYSREEP3A%2BSXsXOCdQ7lLvrjwIB6mgIzE5ZtitvDcF9a9HRydR%2BQMM5lwY%3D&X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Date=20230612T233637Z&X-Amz-SignedHeaders=host&X-Amz-Expires=900&X-Amz-Credential=ASIAY5MBRM2VG3GBJP5U%2F20230612%2Fus-east-1%2Ffinspace-apricot%2Faws4_request&X-Amz-Signature=b617ea7052e6a527195a9dd080ca9e71ac470ab3fd3a304b6eec59e6f37c9230
1,hdb,32,True,:ip-192-168-8-225.ec2.internal:5000:GATEWAY_20230606:Host=ip-192-168-8-225.ec2.internal&Port=5000&User=GATEWAY_20230606&Action=finspace%3AConnectKxCluster&X-Amz-Security-Token=IQoJb3JpZ2luX2VjEJj%2F%2F%2F%2F%2F%2F%2F%2F%2F%2FwEaCXVzLWVhc3QtMSJHMEUCIHOaEnFZ%2Bunh0CIrlvHjjA2on3A28jMj6K60HmXvvnITAiEApLHA9A1o5xIzJv6Tzf9TK8ReH6bidhEYyA7EhTdBzJsqgQMI4f%2F%2F%2F%2F%2F%2F%2F%2F%2F%2FARAAGgw2MTI4NDEzODM1OTQiDAKcWKSlF9iKeuIKJSrVAhVM8NN79QLNiv70xMkOvkimXZdeE9xRz2l6wNCPS7mnetJQmp70mR7y1uOglMCF%2B%2B4YGbRttpqP6AWL4zf3vn0KZs4uUYmBModDX28r9OxECkb2FsyUQleMr0U3nUvlkSN79g2GW%2FAlr9SY6Yvgbiqa5lIKYDTKRYjTS3hYaK%2BJH7xp0DjWNEuGKp4D54FnEm5Ftroxvnp4c5ae%2Fijckt3p9JbEOEXDyEoU2DtRonMwAUfZJnotJLYbNnkoQCzykmGmiPQhmAHdAcqrtEUgWCBIb%2BZZYlCtCbXelwICyrmoJWzNLHskYBkEfCFyiGRcnpUuPfx8OIH4ysPz%2FHeFP7t2NLtoN1ILrvX67iyk8itIlh93uyIoJXzw7fAvhQXnw8hV9D4IMdqdxyPbXJu3oaS0rRnNasLOnUK%2FcCgBtyLsfPh%2BSYYIC9a0M5Gt7Dhaoss4f84OMIXYnqQGOr8BK921xEc%2BDhAIECitwujz59d3UFqngKeMau5ih8SX7LgSLVNrMVfuJXSOFor5SRj2Npg0m9twemZCYhJUuz70cQXZpgr5nmRaF4ijdHFhDDZF9n9BL1E8u0u9aRlmFrRjcboSuwUyhm54Yj7nZrrpooRehEwP3TE4lFVh8cP2zVmQg494Z3Cb0Ow1GQ%2FK%2FC8wF7%2FUyKyW5YneeIeq1kWrXj5Vjm61HrMkU5kxp0j2KJCemibJzJSkhKs032Mf%2FgM%3D&X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Date=20230612T233637Z&X-Amz-SignedHeaders=host&X-Amz-Expires=900&X-Amz-Credential=ASIAY5MBRM2VIXAJUFH2%2F20230612%2Fus-east-1%2Ffinspace-apricot%2Faws4_request&X-Amz-Signature=bc43b081dfc5d67b580b99cba6e7e290ced36fb6d9381b61d6f360e446f48ef7
2,hdb,33,True,:ip-192-168-2-166.ec2.internal:5000:GATEWAY_20230606:Host=ip-192-168-2-166.ec2.internal&Port=5000&User=GATEWAY_20230606&Action=finspace%3AConnectKxCluster&X-Amz-Security-Token=IQoJb3JpZ2luX2VjEJj%2F%2F%2F%2F%2F%2F%2F%2F%2F%2FwEaCXVzLWVhc3QtMSJGMEQCIA%2FvT2OqNuEQiEgpicXeL%2FHjZVBOcSc%2FicjhnmPOFTMAAiBD%2BjaEc8l54cMKa%2Fr9PSnzIGC%2BSmjpwA1RGApXeT6uSyqAAwjh%2F%2F%2F%2F%2F%2F%2F%2F%2F%2F8BEAAaDDYxMjg0MTM4MzU5NCIM8h6iojQKNmpiQCxOKtQC4n8JmYMHQOpSYkaxmmoUykvkJF8KYaTtbAKkTx9GvuycajdIcqAADn6LjFGwvbHoTMe6Vq5l%2FCIXYDDHkbNetntkn39byEAqIO%2BqixW2VOiUZCnEMzjxtpFPL1ZvVBDgiN8ZwHWA5YsvwnzhXpxWrobaYbPDsdJW2z4ki3daxd5im5O5SbwAdRLbW6ra3EPwB48vo%2FbvZUkeM%2BlznKaq3wg6Ix9ZgjB2b1yott0tv%2FECsL6rsIds8tdfmz530XvwaQ8pRooBeVohyrcC7peezaABFZ%2FyeXP3wa83Z2aj9zLcelByTgVEuJa6HGW6RhslOfthCGQixuoHS2cHKEXsCzKDYkClw0GMOfHtgAMBDWdovzsQAmjO9xQNQOhf1pex3asqhlqdMr3JPAbG5VUHHUIrjJrWOsDxD4d4Duxr%2BpPSKBCDWY9HwbsSxKTb026RsgKSnDCF2J6kBjrAARFpY00Yigd0nwxl0FyhfAo9U9CTwRPc7pmYOznmEfGMqw1SCJ2c4x7uO6RsH24NRZjb5yCO4uknKBK9jq%2FVA%2BVbput5blrgZT3JVWqRB1CHFu7xKI9KskrzRlWobjQGYFoAvmApiXW6hIB7vIZ5xdhy5VdZO%2BAaBeCCJH6rhrMnmKOuADZSN8eG4%2B0ljtXAwN8q3luWQxRJgtD6wRduiySXflvQvuyFygFVWeempxcbx5UOMLyUv6MFiIVohRYafw%3D%3D&X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Date=20230612T233637Z&X-Amz-SignedHeaders=host&X-Amz-Expires=900&X-Amz-Credential=ASIAY5MBRM2VIN4LBU2N%2F20230612%2Fus-east-1%2Ffinspace-apricot%2Faws4_request&X-Amz-Signature=83e8cfc91bae0cacfe122041bd959ade7d73e32071a23a35623fea1d5230ab94


# HDB: Before and After
Dates and counts of the HDB before update and after.

In [18]:
# Query the HDB for after state
hdb = get_pykx_connection(client, 
                          environmentId=ENV_ID, clusterName=HDB_CLUSTER_NAME, 
                          userName=KDB_USERNAME, boto_session=session)

In [19]:
# Latest Dates and Counts
after_update_pdf = hdb("select counts:count i by date from example").pd()
after_rows = hdb("count example").py()

### Before

In [20]:
display(before_update_pdf)

# Number of Rows
print(f"Rows: {before_rows:,}")

Unnamed: 0_level_0,counts
date,Unnamed: 1_level_1
2023-04-14,1000000
2023-04-15,1000000
2023-04-16,1000000
2023-04-17,1000000
2023-04-18,1000000
2023-04-19,1000000
2023-04-20,1000000
2023-04-21,1000000
2023-04-22,1000000
2023-04-23,1000000


Rows: 34,755,675


### After

In [21]:
display(after_update_pdf)

# Number of Rows
print(f"Rows: {after_rows:,}")

Unnamed: 0_level_0,counts
date,Unnamed: 1_level_1
2023-04-14,1000000
2023-04-15,1000000
2023-04-16,1000000
2023-04-17,1000000
2023-04-18,1000000
2023-04-19,1000000
2023-04-20,1000000
2023-04-21,1000000
2023-04-22,1000000
2023-04-23,1000000


Rows: 30,985,368


In [22]:
print( f"Last Run: {datetime.datetime.now()}" )

Last Run: 2023-06-12 23:36:38.717077
