# PyKX Query Components
Query all components, RDB, HDB, and Gateway.


In [1]:
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

import os
import boto3
import json
import datetime

import pykx as kx

from managed_kx import *
from env_kdb_1 import *

from basictick_setup import *

In [2]:
# triggers credential get
session=None

try:
    # aws: use ada for credentials
    subprocess.call(["which", "ada"])
    os.system(f"ada credentials update --account={ACCOUNT_ID} --provider=isengard --role=Admin --once")
except: 
    None

if AWS_ACCESS_KEY_ID is None:
    print("Using Defaults ...")
    # create AWS session: using access variables
    session = boto3.Session()
else:
    print("Using variables ...")
    session = boto3.Session(
        aws_access_key_id=AWS_ACCESS_KEY_ID,
        aws_secret_access_key=AWS_SECRET_ACCESS_KEY,
        aws_session_token=AWS_SESSION_TOKEN
    )

# create finspace client
client = session.client(service_name='finspace', endpoint_url=ENDPOINT_URL)

Using variables ...


# Current State of HDB

In [3]:
# Query the HDB
hdb = get_pykx_connection(client, 
                          environmentId=ENV_ID, clusterName=HDB_CLUSTER_NAME, 
                          userName=KDB_USERNAME, boto_session=session)

# Dates and Counts
hdb_pdf = hdb("select counts:count i by date from example").pd()

# Number of Rows
hdb_rows = hdb("count example").py()

# Current State of RDB

In [4]:
# Query the RDB
rdb = get_pykx_connection(client, 
                          environmentId=ENV_ID, clusterName=RDB_CLUSTER_NAME, 
                          userName=KDB_USERNAME, boto_session=session)

# Dates and Counts
rdb_head_pdf = rdb("select [5] from example").pd()
rdb_tail_pdf = rdb("select [-5] from example").pd()

# Number of Rows
rdb_rows = rdb("count example").py()

# Current State of GW

In [5]:
# Query the HDB
gw = get_pykx_connection(client, 
                          environmentId=ENV_ID, clusterName=GW_CLUSTER_NAME, 
                          userName=KDB_USERNAME, boto_session=session)

In [6]:
proc_pdf = gw("select process, handle, connected, address from .conn.procs").pd()

# are any processes not connected?
if len(proc_pdf[proc_pdf.connected == False].index) > 0:
    print("reinit Gateway")
    gw("reinit[hdb_name; rdb_name]")
    proc_pdf = gw("select process, handle, connected, address from .conn.procs").pd()

# truncate with elipsis
proc_pdf['address'] = proc_pdf['address'].str.slice(0,110)+"..."

display(proc_pdf)

Unnamed: 0,process,handle,connected,address
0,rdb,7,True,:ip-192-168-12-16.ec2.internal:5000:GATEWAY_basictickdb:Host=ip-192-168-12-16.ec2.internal&Port=5000&User=GATE...
1,hdb,9,True,:ip-192-168-5-194.ec2.internal:5000:GATEWAY_basictickdb:Host=ip-192-168-5-194.ec2.internal&Port=5000&User=GATE...
2,hdb,10,True,:ip-192-168-4-43.ec2.internal:5000:GATEWAY_basictickdb:Host=ip-192-168-4-43.ec2.internal&Port=5000&User=GATEWA...


In [7]:
gw("res: `time xasc queryData[`example;`]").pd()

gw_pdf = gw("res").pd()
gw_head_pdf = gw("select [5] from res").pd()
gw_tail_pdf = gw("select [-5] from res").pd()

# Number of Rows
gw_rows = len(gw_pdf.index)

# HDB Contents

In [8]:
display(hdb_pdf)

# Number of Rows
print(f"Rows: {hdb_rows:,}")

Unnamed: 0_level_0,counts
date,Unnamed: 1_level_1
2023-04-14,1000000
2023-04-15,1000000
2023-04-16,1000000
2023-04-17,1000000
2023-04-18,1000000
2023-04-19,1000000
2023-04-20,1000000
2023-04-21,1000000
2023-04-22,1000000
2023-04-23,1000000


Rows: 10,000,000


# RDB Contents

In [9]:
display(rdb_head_pdf)
display(rdb_tail_pdf)

# Number of Rows
print(f"Rows: {rdb_rows:,}")

Unnamed: 0,sym,time,number
0,ikc,2023-06-19 23:36:40.873033934,4
1,pep,2023-06-19 23:36:40.873033934,13
2,pmh,2023-06-19 23:36:40.873033934,37
3,akd,2023-06-19 23:36:40.873033934,5
4,dio,2023-06-19 23:36:40.873033934,50


Unnamed: 0,sym,time,number
0,dgh,2023-06-20 00:04:31.843041421,46
1,kcf,2023-06-20 00:04:31.843041421,8
2,pdm,2023-06-20 00:04:31.853059579,78
3,nno,2023-06-20 00:04:31.853059579,72
4,gfl,2023-06-20 00:04:31.863055501,12


Rows: 752,817


# GW Contents

In [10]:
display(gw_head_pdf)
display(gw_tail_pdf)

# Number of Rows
print(f"Rows: {gw_rows:,}")

# Note: RDB will have less than GW  b/c data is always arriving to the RDB
# RDB alone was queried first and then again as part of GW query

Unnamed: 0,sym,time,number
0,aaa,2023-04-14 23:17:28.473,53231
1,aaa,2023-04-14 23:17:28.473,153560
2,aaa,2023-04-14 23:17:28.473,449428
3,aaa,2023-04-14 23:17:28.473,631966
4,aaa,2023-04-14 23:17:28.473,941566


Unnamed: 0,sym,time,number
0,ieh,2023-06-20 00:04:32.843016875,27
1,epm,2023-06-20 00:04:32.843016875,21
2,dng,2023-06-20 00:04:32.853017038,2
3,dhe,2023-06-20 00:04:32.853017038,41
4,edm,2023-06-20 00:04:32.853017038,30


Rows: 10,753,291


In [11]:
print( f"Last Run: {datetime.datetime.now()}" )

Last Run: 2023-06-20 00:04:36.788252
