# BasicTick: End of Day (EOD) Processing
This notebook an example of an end of day (EOD) process that adds the contents of an RDB as a changeset to an HDB.

Instead of moving between python and q code this notebook makes use of PyKX to do all its work with the RDB, HDB, and Gateway for end of day processing.

**RDB: Save Day's Data**
1. Save table and sym locally    
2. Savedown: add changeset to database    

**HDB: Update**
1. Update the Cluster's Database to New Changeset ID

**Gateway: Re-Connect**
1. Update the Database Connections

In [1]:
# scratch location on RDB
scratch_path = "/opt/kx/app/scratch"

# clean rdb?
clear_rdb = True

In [2]:
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

import os
import boto3
import json
import datetime

import pykx as kx

from managed_kx import *
from env_kdb_1 import *

from basictick_setup import *

In [3]:
# triggers credential get
session=None

try:
    # aws: use ada for credentials
    subprocess.call(["which", "ada"])
    os.system(f"ada credentials update --account={ACCOUNT_ID} --provider=isengard --role=Admin --once")
except: 
    None

if AWS_ACCESS_KEY_ID is None:
    print("Using Defaults ...")
    # create AWS session: using access variables
    session = boto3.Session()
else:
    print("Using variables ...")
    session = boto3.Session(
        aws_access_key_id=AWS_ACCESS_KEY_ID,
        aws_secret_access_key=AWS_SECRET_ACCESS_KEY,
        aws_session_token=AWS_SESSION_TOKEN
    )

# create finspace client
client = session.client(service_name='finspace', endpoint_url=ENDPOINT_URL)

Using variables ...


# Current State of HDB

In [4]:
# Query the HDB
hdb = get_pykx_connection(client, 
                          environmentId=ENV_ID, clusterName=HDB_CLUSTER_NAME, 
                          userName=KDB_USERNAME, boto_session=session)

# Dates and Counts
before_update_pdf = hdb("select counts:count i by date from example").pd()

# Number of Rows
before_rows = hdb("count example").py()

# RDB: Save Day's Data
1. Save table and sym locally   
2. Savedown: add changeset to database

In [5]:
# Connect to the RDB
rdb = get_pykx_connection(client, 
                          environmentId=ENV_ID, clusterName=RDB_CLUSTER_NAME, 
                          userName=KDB_USERNAME, boto_session=session)
# Dates and Counts
rdb_sample_pdf = rdb("select [-5] from example").pd()
rdb_rows = rdb("count example").py()

display(rdb_sample_pdf)
print(f"Rows: {rdb_rows:,}")

Unnamed: 0,sym,time,number
0,aoe,2023-07-26 19:23:44.675384594,50
1,igl,2023-07-26 19:23:44.675384594,79
2,nfp,2023-07-26 19:23:44.675384594,58
3,oop,2023-07-26 19:23:44.685372093,7
4,fac,2023-07-26 19:23:44.685372093,69


Rows: 3,258,921


## Step 1: Save table and sym locally

In [6]:
# date directory
today = datetime.date.today()

date_dir = today.strftime("%Y.%m.%d")

print( f"Saving to: {date_dir}" )
rdb( f".Q.dpfts[`:{scratch_path};{date_dir};`sym;`example;`sym]" )
print( f"Saved to: {scratch_path}" )


Saving to: 2023.07.26
Saved to: /opt/kx/app/scratch


## Step 2: Savedown: add changeset to database

a. Table of changes for the changset  
b. Create Changeset  
c. Wait for Changeset to be added

In [7]:
# pandas table that catalogs the changeset's contents
cr = [
    {'input_path': f'{scratch_path}/{date_dir}', 'database_path': f'/{date_dir}/', 'change_type':'PUT'},
    {'input_path': f'{scratch_path}/sym', 'database_path': f'/', 'change_type':'PUT'},
]

display(cr)

# send to rdb
rdb['c_r'] = cr

[{'input_path': '/opt/kx/app/scratch/2023.07.26',
  'database_path': '/2023.07.26/',
  'change_type': 'PUT'},
 {'input_path': '/opt/kx/app/scratch/sym',
  'database_path': '/',
  'change_type': 'PUT'}]

In [8]:
print(f"Creating changeset for: {DB_NAME}")

res = rdb(f".aws.create_changeset[\"{DB_NAME}\";c_r]")

CHANGESET_ID = str(res.get("id"))
print(f"ChangesetID: {CHANGESET_ID}")

Creating changeset for: basictickdb
ChangesetID: OMTJ1CPJNAvq0RQF3o7xFQ


In [9]:
# wait for ingestion
wait_for_changeset_status(client, environmentId=ENV_ID, databaseName=DB_NAME, changesetId=CHANGESET_ID, show_wait=True)
print("** Done **")

Status is IN_PROGRESS, total wait 0:00:00, waiting 10 sec ...
Status is IN_PROGRESS, total wait 0:00:10, waiting 10 sec ...
Status is IN_PROGRESS, total wait 0:00:20, waiting 10 sec ...
Status is IN_PROGRESS, total wait 0:00:30, waiting 10 sec ...
Status is IN_PROGRESS, total wait 0:00:40, waiting 10 sec ...
** Done **


### Optional: Clean up RDB
Optionally clean up by deleting files created and clear the example table.

In [10]:
# clear the RDB
if clear_rdb:
    print(f"Cleaning: {scratch_path}")

    rdb(f"system \" rm -rf {scratch_path}/*\"")

    # remove tables
    rdb("delete from `example")
    rdb("delete c_r from `.")

Cleaning: /opt/kx/app/scratch


# HDB: Update
Update the cluster's database to new changeset.


In [None]:
DB_CONFIG=[{'databaseName': DB_NAME,
   'cacheConfigurations': [{'cacheType': 'CACHE_1000', 'dbPaths': ['/']}],
   'changesetId': CHANGESET_ID}]

client.update_kx_cluster_databases(environmentId=ENV_ID, clusterName=HDB_CLUSTER_NAME, databases=DB_CONFIG)

wait_for_cluster_status(client, environmentId=ENV_ID, clusterName=HDB_CLUSTER_NAME)
print("** Done **")

# Gateway: Re-Connect
Using PyKX, connect to the Gateway cluster and have it re-connect to its Databases. Connection to the HDB would have been lost during the HDB update process.


In [None]:
# Connect to the Gateway with PyKX
gw = get_pykx_connection(client, 
                          environmentId=ENV_ID, clusterName=GW_CLUSTER_NAME, 
                          userName=KDB_USERNAME, boto_session=session)

# reinit the gateway, will re-connect to databases
gw("reinit[hdb_name; rdb_name]")

# State of connected processes
display( gw("select process, handle, connected, address from .conn.procs").pd() )

# HDB: Before and After
Dates and counts of the HDB before update and after.

In [None]:
# Query the HDB for after state
hdb = get_pykx_connection(client, 
                          environmentId=ENV_ID, clusterName=HDB_CLUSTER_NAME, 
                          userName=KDB_USERNAME, boto_session=session)

# Latest Dates and Counts
after_update_pdf = hdb("select counts:count i by date from example").pd()
after_rows = hdb("count example").py()

### Before

In [None]:
display(before_update_pdf)

# Number of Rows
print(f"Rows: {before_rows:,}")

### After

In [None]:
display(after_update_pdf)

# Number of Rows
print(f"Rows: {after_rows:,}")

In [None]:
print( f"Last Run: {datetime.datetime.now()}" )