# BasicTick: End of Day (EOD) Processing
This notebook an example of an end of day (EOD) process that adds the contents of an RDB as a changeset to an HDB.

Instead of moving between python and q code this notebokk makes use of PyKX to do all its work with the RDB, HDB, and Gateway for end of day processing.

**RDB: Save Day's Data**
1. Save table and sym locally    
2. Savedown: add changeset to database    

**HDB: Update**
1. Update the Cluster's Database to New Changeset ID

**Gateway: Re-Connect**
1. Update the Database Connections

In [1]:
# scratch location 
scratch_path = "/opt/kx/app/scratch"

# clean rdb?
clear_rdb = True

In [2]:
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

import os
import boto3
import json
import datetime

import pykx as kx

from managed_kx import *
from env_kdb_1 import *

from basictick_setup import *

In [3]:
# triggers credential get
session=None

try:
    # aws: use ada for credentials
    subprocess.call(["which", "ada"])
    os.system(f"ada credentials update --account={ACCOUNT_ID} --provider=isengard --role=Admin --once")
except: 
    None

if AWS_ACCESS_KEY_ID is None:
    print("Using Defaults ...")
    # create AWS session: using access variables
    session = boto3.Session()
else:
    print("Using variables ...")
    session = boto3.Session(
        aws_access_key_id=AWS_ACCESS_KEY_ID,
        aws_secret_access_key=AWS_SECRET_ACCESS_KEY,
        aws_session_token=AWS_SESSION_TOKEN
    )

# create finspace client
client = session.client(service_name='finspace', endpoint_url=ENDPOINT_URL)

Using variables ...


# Current State of HDB

In [4]:
# Query the HDB
hdb = get_pykx_connection(client, 
                          environmentId=ENV_ID, clusterName=HDB_CLUSTER_NAME, 
                          userName=KDB_USERNAME, boto_session=session)

# Dates and Counts
before_update_pdf = hdb("select counts:count i by date from example").pd()

# Number of Rows
before_rows = hdb("count example").py()

# RDB: Save Day's Data
1. Save table and sym locally   
2. Savedown: add changeset to database

In [5]:
# Connect to the RDB
rdb = get_pykx_connection(client, 
                          environmentId=ENV_ID, clusterName=RDB_CLUSTER_NAME, 
                          userName=KDB_USERNAME, boto_session=session)

In [6]:
# Dates and Counts
rdb_sample_pdf = rdb("select [-5] from example").pd()
rdb_rows = rdb("count example").py()

In [7]:
display(rdb_sample_pdf)
print(f"Rows: {rdb_rows:,}")

Unnamed: 0,sym,time,number
0,pag,2023-06-19 19:40:14.863037793,40
1,cib,2023-06-19 19:40:14.863037793,61
2,lgp,2023-06-19 19:40:14.863037793,22
3,knd,2023-06-19 19:40:14.863037793,35
4,lib,2023-06-19 19:40:14.863037793,71


Rows: 984,521


## Step 1: Save table and sym locally

In [8]:
# date directory
today = datetime.date.today()

date_dir = today.strftime("%Y.%m.%d")

print( f"Saving to: {date_dir}" )
rdb( f".Q.dpfts[`:{scratch_path};{date_dir};`sym;`example;`sym]" )

Saving to: 2023.06.19


pykx.SymbolAtom(pykx.q('`example'))

## Step 2: Savedown: add changeset to database

a. Table of changes for the changset  
b. Create Changeset  
c. Wait for Changeset to be added

In [9]:
rdb(f"c_r:(`input_path`database_path`change_type!(\"{scratch_path}/{date_dir}\";\"/{date_dir}/\";\"PUT\");\
    `input_path`database_path`change_type!(\"{scratch_path}/sym\";\"/\";\"PUT\"));")

rdb("c_r").pd()

Unnamed: 0,input_path,database_path,change_type
0,b'/opt/kx/app/scratch/2023.06.19',b'/2023.06.19/',b'PUT'
1,b'/opt/kx/app/scratch/sym',b'/',b'PUT'


In [10]:
print(f"Creating changeset for: {DB_NAME}")

res = rdb(f".aws.create_changeset[\"{DB_NAME}\";c_r]")

CHANGESET_ID = str(res.get("id"))
print(f"ChangesetID: {CHANGESET_ID}")

Creating changeset for: basictickdb
ChangesetID: AMRqlgq0XqrDXV2P8i69CQ


In [11]:
# wait for ingestion
wait_for_changeset_status(client, environmentId=ENV_ID, databaseName=DB_NAME, changesetId=CHANGESET_ID, show_wait=True)
print("** Done **")

Status is IN_PROGRESS, total wait 0:00:00, waiting 10 sec ...
** Done **


### Optional: Clean up RDB
Optionally clean up by deleting files created and clear the example table.

In [12]:
# clear the RDB
if clear_rdb:
    print(f"Cleaning: {scratch_path}")

    rdb(f"system \" rm -rf {scratch_path}/*\"")

    # remove tables
    rdb("delete from `example")
    rdb("delete c_r from `.")

Cleaning: /opt/kx/app/scratch


# HDB: Update
Update the cluster's database to new changeset.


In [13]:
DB_CONFIG=[{'databaseName': DB_NAME,
   'cacheConfigurations': [{'cacheType': 'CACHE_1000', 'dbPaths': ['/']}],
   'changesetId': CHANGESET_ID}]

resp=client.update_kx_cluster_databases(environmentId=ENV_ID, clusterName=HDB_CLUSTER_NAME, databases=DB_CONFIG)
resp

{'ResponseMetadata': {'RequestId': '15321d47-8b61-4040-a6b8-af8aa90eb098',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'content-type': 'application/json',
   'content-length': '2',
   'connection': 'keep-alive',
   'date': 'Mon, 19 Jun 2023 19:40:29 GMT',
   'x-amzn-requestid': '15321d47-8b61-4040-a6b8-af8aa90eb098',
   'x-amz-apigw-id': 'GyBO1HHrIAMFSIw=',
   'x-amzn-trace-id': 'Root=1-6490af2b-42ed6864163c17291b890fb0',
   'x-cache': 'Miss from cloudfront',
   'via': '1.1 aef197034a978e986954f2826c90b090.cloudfront.net (CloudFront)',
   'x-amz-cf-pop': 'IAD55-P1',
   'x-amz-cf-id': 'OvaT5rtr1ocmDbc4dKVNdRxcMN2tIjm9LyNfz6v9LEt59Yxm2v7FbA=='},
  'RetryAttempts': 0}}

In [14]:
wait_for_cluster_status(client, environmentId=ENV_ID, clusterName=HDB_CLUSTER_NAME, show_wait=True)
print("** Done **")

Cluster: HDB_basictickdb_TFC status is UPDATING, total wait 0:00:00, waiting 30 sec ...
Cluster: HDB_basictickdb_TFC status is UPDATING, total wait 0:00:30, waiting 30 sec ...
Cluster: HDB_basictickdb_TFC status is UPDATING, total wait 0:01:00, waiting 30 sec ...
Cluster: HDB_basictickdb_TFC status is UPDATING, total wait 0:01:30, waiting 30 sec ...
Cluster: HDB_basictickdb_TFC status is UPDATING, total wait 0:02:00, waiting 30 sec ...
Cluster: HDB_basictickdb_TFC status is UPDATING, total wait 0:02:30, waiting 30 sec ...
Cluster: HDB_basictickdb_TFC status is UPDATING, total wait 0:03:00, waiting 30 sec ...
Cluster: HDB_basictickdb_TFC status is UPDATING, total wait 0:03:30, waiting 30 sec ...
Cluster: HDB_basictickdb_TFC status is UPDATING, total wait 0:04:00, waiting 30 sec ...
Cluster: HDB_basictickdb_TFC status is UPDATING, total wait 0:04:30, waiting 30 sec ...
Cluster: HDB_basictickdb_TFC status is UPDATING, total wait 0:05:00, waiting 30 sec ...
Cluster: HDB_basictickdb_TFC sta

# Gateway: Re-Connect
Using PyKX, connect to the Gateway cluster and have it re-connect to its Databases. Connection to the HDB would have been lost during the HDB update process.


In [15]:
# Connect to the Gateway with PyKX
gw = get_pykx_connection(client, 
                          environmentId=ENV_ID, clusterName=GW_CLUSTER_NAME, 
                          userName=KDB_USERNAME, boto_session=session)

In [16]:
# reinit the gateway, will re-connect to databases
gw("reinit[hdb_name; rdb_name]")

pykx.Identity(pykx.q('::'))

In [17]:
# Updated state of connected processes
display( gw("select process, handle, connected, address from .conn.procs").pd() )

Unnamed: 0,process,handle,connected,address
0,rdb,18,True,:ip-192-168-0-211.ec2.internal:5000:GATEWAY_TFC:Host=ip-192-168-0-211.ec2.internal&Port=5000&User=GATEWAY_TFC&Action=finspace%3AConnectKxCluster&X-Amz-Security-Token=IQoJb3JpZ2luX2VjEDwaCXVzLWVhc3QtMSJHMEUCIQDiN2QGsRjfKDNgjRbgxedqHSH18ulEyDfZTJH8zBBCsgIgHBH48lx2U7jBuKvyY0T25Ro%2FISFc9F%2BGY6q83lFvYRMqgAMIlf%2F%2F%2F%2F%2F%2F%2F%2F%2F%2FARAAGgw4Mjk4NDU5OTg4ODkiDOJih5zNbdwlXvGy1CrUAnZv9dSf7rCI%2BKnJ0hNnhYDwis9J%2BRC%2Bj1a2nd9iwx3N7vnqv2eL5M9kX3gJ%2FmsuiQWHEXmhZD7rJUX631YmCj8cHpFh8%2Bd1vhDIn8k2X%2F%2FMf%2Ft3yKKjbFg700Drg2vbGKXmNOb8kXEBHS5vA%2FTwRxJI8Yftr6HEl9EggwBO%2B4AeTZt34itlac6R9Zo93wsoen7bCW6cVNKWMiw6C7SCqwvQs9D%2BN%2FXDKpzjT4P%2B%2Fi84lQN%2BVzM11zH8wXVXoem%2B4jqsB%2Fq9UL%2FztvvTLWtcOiS0uVKjWmH%2BiidTyLrzGH9CsbaFAm4PJmQcGPWOHynhyo8P%2FZm4Q%2BFxHQAgtA6s0U1xzg6lmZFnknF%2ByJRYUqXYF2hIrT12ywn%2BFiPRNFSDhMwgxsoZOG%2FdEDIGf%2FAOAwVIDNNpSTn3JhK52AcM5XhdTOGihhksYkfztvxuA0ERyr20OEMw6OTCpAY6vwFAf2Gi5xGyFdpKhk8D7YHBqxcpAHwd4E9bQoxnrIjkC8w%2BO55zZbuwW5Rt%2B%2F9bT8aK9wxV5aV3tGccCCeB6vGAAUyfX8p6HFIu40lVT74LHTpwXj2WKOqHgwn1lvKuJIVfoR1PviJbS%2Bfesr3rLxlObqWu937DNFWjetqL3MBoKF2iLBNVvjvKnJulMk5c8ddWm3JVMdHlPxpfubezCUFcjNbxBIkcvIArtRzr7Bz%2BuG8QbjuHs0YsVBflJIwTBQ%3D%3D&X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Date=20230619T195416Z&X-Amz-SignedHeaders=host&X-Amz-Expires=900&X-Amz-Credential=ASIA4CNVNBUU5I3TZOET%2F20230619%2Fus-east-1%2Ffinspace-apricot%2Faws4_request&X-Amz-Signature=560e5b8c76ee57441b4c90dff74b5d2e283ad7304bdd451c1db170c75d16b343
1,hdb,19,True,:ip-192-168-14-52.ec2.internal:5000:GATEWAY_TFC:Host=ip-192-168-14-52.ec2.internal&Port=5000&User=GATEWAY_TFC&Action=finspace%3AConnectKxCluster&X-Amz-Security-Token=IQoJb3JpZ2luX2VjEDwaCXVzLWVhc3QtMSJHMEUCIBNXiVV%2BP3Cdbkd0aQ1NLS%2BZ%2FYnt4YW3Fjp%2FKIqAJ4iNAiEA%2BZSc2%2BprycgoW7JF6SysgPlMXSl2tid5wob2zdaewecqgAMIlf%2F%2F%2F%2F%2F%2F%2F%2F%2F%2FARAAGgw4Mjk4NDU5OTg4ODkiDNMXZBlrbrT8u6EJfirUAqUhZhJr59c9zCzLSX8FomVHDDD1RkUATGQ%2F%2FQ1wQVXzeJ6MYEVNVx5TOS2FtfnGGP6Ss2CcNDbnRrxVhMeJk0f0gJoabppaY60JYSQ5IONW7LMC8ClcC5mTSqY8kbwsiu3jzeKLMVVkFyB3Gw1wtVaXcC1umjHtRlIaqO5fecSJG6bn%2BEsLbxK0z8wMeC80oW%2FCPC%2BDuYcITK5hjQuJqqhHPFCjdYKqlpNZVLb%2F24iz02IXQVEe%2FbBWT0iQtpp3OKAqajAHBvAorRMcaaYIZ%2FFYOnHZMBZF9Dpg%2FKJVePBij093aPbJojsxIjNTzHh7xXnWyKlwlM%2FAeH%2BtB%2F8dJ9cppEM%2FDOGRLZXf%2F6syvtrAG9mDNKQZ3WOzmBJ7RatPdSlhKLhF4oVqWHSK7xWwW9B66xyc499NzAW0QG6Et8ZXUSCTjmJwaiqcRTF%2BZySAnsxnghAw6OTCpAY6vwGi9V0WR%2F8pVx0FTdWVM%2FzcpV0kHTmzdLIr9kMEE57wQ%2BbQOJM%2BCQC9fadlgFzPuRVtrpA2B2axdNOyvvZwWOVKYl%2Bm5g7mwB1nvs1r85zv5tR8whpBgL0nrOwtDwUOhnK55qG%2Bx41STlzF4rMFNYOVkCD5o627teXDtYWLgZ8PgMUopTLcU%2F9ly0igilwr8WEN0PAMtp0aUAcCZ2Hs36fcnEN4cfE4GaeVHcIHb%2FTh1ANJAzrLRbINRtAmpCjO3g%3D%3D&X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Date=20230619T195416Z&X-Amz-SignedHeaders=host&X-Amz-Expires=900&X-Amz-Credential=ASIA4CNVNBUUWMSI2ROX%2F20230619%2Fus-east-1%2Ffinspace-apricot%2Faws4_request&X-Amz-Signature=4e09e5fe49fe3731535871c3ff629f92a257a3615031f71c58d944581ddd5a2d
2,hdb,20,True,:ip-192-168-11-244.ec2.internal:5000:GATEWAY_TFC:Host=ip-192-168-11-244.ec2.internal&Port=5000&User=GATEWAY_TFC&Action=finspace%3AConnectKxCluster&X-Amz-Security-Token=IQoJb3JpZ2luX2VjEDwaCXVzLWVhc3QtMSJGMEQCID9q13RELa%2FbR8cxVlRZc3VduTpafzSVO7cIHimoWQjrAiAuY4J2ofCoP%2BP514c345qTWhLON9PkmMYXYBWETxex3SqAAwiV%2F%2F%2F%2F%2F%2F%2F%2F%2F%2F8BEAAaDDgyOTg0NTk5ODg4OSIMmL52nrGRi%2BTUsqcvKtQCrMbhG%2FxDDItfVFUt8aM8Um2VAaEWOMjN3ahfuCu8NEy8K8oYucG8MuI%2FtdD1WTB0%2Fhy8VFHXDRSbb%2Fl%2F5KdGwyczU6ZKAPtk7%2Fw2aY72QxXePEk6XXt5NLhoHCsJzTkEjYaRW4YTM8mvt21If%2BYrL1XIi6BTY09sSLO2D9DE%2BMG85MDb00jOxrGYmPoMYghxRtbfRBR27K%2FqPt%2BqkoT9L1%2FfA%2FcyWmp7Hvy9MNmiRl%2FZaO2YwOLQIft9we2zQuVduTW3%2BLLO1K%2FTo7ykHMtzip9WD22VlDgp2SD%2FrBbrcOuiJwbX9QAEZp3umbUcFzdbY1MT7XOMDdyWlXahVvaeXFPeCQ6C5pAvFHsS9HTILyYeXEjQD5NS4rl3I3dNZcZmgHQRlBp5cQ%2BODoVoyU8T%2BEL%2BGqWj2Qqr6VK1TUZoGKBtX%2FmkXFhA1Y0HBCWEW3VK0km6bzDp5MKkBjrAAc9MPhkzE6jK%2FQo5pbxTraOUoc8otYK5qAIIqS8mHbyZfhC%2F6A3Xwd%2FOS%2Fjrx687yN5wbByT5iqqRzDJy51XFe6wI6kpTWqyRVSPrTabEyfyxLQwRv8ABghQv1fVCQSlyFhV4dZMzIIjv9IcEvbL6UgLQk%2BlyEuTxRFq%2BUevI%2FDel0YQqZP8sMMOcB5%2BbygsEXJ7tqAQYt3%2BRT8oZH%2B2PPGa%2BxLjfFsQi%2FXZzOXgvNaqXu2f8LDtSxFq2rqciR0hnA%3D%3D&X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Date=20230619T195417Z&X-Amz-SignedHeaders=host&X-Amz-Expires=900&X-Amz-Credential=ASIA4CNVNBUUYUPJNN6T%2F20230619%2Fus-east-1%2Ffinspace-apricot%2Faws4_request&X-Amz-Signature=e3652473268819f91621270b2d5fc65c35b99148912313d78fef10e410f1b1ce


# HDB: Before and After
Dates and counts of the HDB before update and after.

In [18]:
# Query the HDB for after state
hdb = get_pykx_connection(client, 
                          environmentId=ENV_ID, clusterName=HDB_CLUSTER_NAME, 
                          userName=KDB_USERNAME, boto_session=session)

In [19]:
# Latest Dates and Counts
after_update_pdf = hdb("select counts:count i by date from example").pd()
after_rows = hdb("count example").py()

### Before

In [20]:
display(before_update_pdf)

# Number of Rows
print(f"Rows: {before_rows:,}")

Unnamed: 0_level_0,counts
date,Unnamed: 1_level_1
2023-04-14,1000000
2023-04-15,1000000
2023-04-16,1000000
2023-04-17,1000000
2023-04-18,1000000
2023-04-19,1000000
2023-04-20,1000000
2023-04-21,1000000
2023-04-22,1000000
2023-04-23,1000000


Rows: 10,000,000


### After

In [21]:
display(after_update_pdf)

# Number of Rows
print(f"Rows: {after_rows:,}")

Unnamed: 0_level_0,counts
date,Unnamed: 1_level_1
2023-04-14,1000000
2023-04-15,1000000
2023-04-16,1000000
2023-04-17,1000000
2023-04-18,1000000
2023-04-19,1000000
2023-04-20,1000000
2023-04-21,1000000
2023-04-22,1000000
2023-04-23,1000000


Rows: 10,984,521


In [22]:
print( f"Last Run: {datetime.datetime.now()}" )

Last Run: 2023-06-19 19:54:18.515521
