# PyKX Query Components
Query all components, RDB, HDB, and Gateway.


In [1]:
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

import os
import boto3
import json
import datetime

import pykx as kx

from managed_kx import *
from env import *

from basictick_setup import *

In [2]:
# triggers credential get
session=None

try:
    # aws: use ada for credentials
    os.system(["which", "ada"])
    os.system(f"ada credentials update --account={ACCOUNT_ID} --provider=isengard --role=Admin --once")
except: 
    None

if AWS_ACCESS_KEY_ID is None:
    print("Using Defaults ...")
    # create AWS session: using access variables
    session = boto3.Session()
else:
    print("Using variables ...")
    session = boto3.Session(
        aws_access_key_id=AWS_ACCESS_KEY_ID,
        aws_secret_access_key=AWS_SECRET_ACCESS_KEY,
        aws_session_token=AWS_SESSION_TOKEN
    )

# create finspace client
client = session.client(service_name='finspace', endpoint_url=ENDPOINT_URL)

Using variables ...


# Current State of HDB

In [3]:
# Query the HDB
hdb = get_pykx_connection(client, 
                          environmentId=ENV_ID, clusterName=HDB_CLUSTER_NAME, 
                          userName=KDB_USERNAME, boto_session=session)

# Dates and Counts
hdb_pdf = hdb("select counts:count i by date from example").pd()

hdb_head_pdf = hdb("select from example where date = min date, i<5").pd()

# Number of Rows
hdb_rows = hdb("count example").py()

# Current State of RDB

In [4]:
# Query the RDB
rdb = get_pykx_connection(client, 
                          environmentId=ENV_ID, clusterName=RDB_CLUSTER_NAME, 
                          userName=KDB_USERNAME, boto_session=session)

# Dates and Counts
rdb_head_pdf = rdb("select [5] from example").pd()
rdb_tail_pdf = rdb("select [-5] from example").pd()

# Number of Rows
rdb_rows = rdb("count example").py()

# Current State of GW

In [5]:
# Query the GW
gw = get_pykx_connection(client, 
                          environmentId=ENV_ID, clusterName=GW_CLUSTER_NAME, 
                          userName=KDB_USERNAME, boto_session=session)

proc_pdf = gw("select process, handle, connected, address from .conn.procs").pd()

# are any processes not connected? if so-reconnect
if (len(proc_pdf) == 0) or len(proc_pdf[proc_pdf.connected == False].index) > 0:
    print("reinit Gateway")
    gw("reinit[hdb_name; rdb_name]")
    proc_pdf = gw("select process, handle, connected, address from .conn.procs").pd()
    
# truncate with elipsis
proc_pdf['address'] = proc_pdf['address'].str.slice(0,110)+"..."

#display(proc_pdf)

# query GW
gw("res: `time xasc queryData[`example;`]").pd()

#gw_pdf = gw("res").pd()
gw_head_pdf = gw("select [5] from res").pd()
gw_tail_pdf = gw("select [-5] from res").pd()

# Number of Rows
gw_rows = gw("count res").py()

reinit Gateway


# HDB Contents

In [6]:
display(hdb_pdf)
display(hdb_head_pdf)

# Number of Rows
print(f"Rows: {hdb_rows:,}")

Unnamed: 0_level_0,counts
date,Unnamed: 1_level_1
2023-04-14,1000000
2023-04-15,1000000
2023-04-16,1000000
2023-04-17,1000000
2023-04-18,1000000
2023-04-19,1000000
2023-04-20,1000000
2023-04-21,1000000
2023-04-22,1000000
2023-04-23,1000000


Unnamed: 0,date,sym,time,number
0,2023-04-14,aaa,2023-04-14 23:17:28.473,53231
1,2023-04-14,aaa,2023-04-14 23:17:28.473,153560
2,2023-04-14,aaa,2023-04-14 23:17:28.473,449428
3,2023-04-14,aaa,2023-04-14 23:17:28.473,631966
4,2023-04-14,aaa,2023-04-14 23:17:28.473,941566


Rows: 10,000,000


# RDB Contents

In [7]:
display(rdb_head_pdf)
display(rdb_tail_pdf)

# Number of Rows
print(f"Rows: {rdb_rows:,}")

Unnamed: 0,sym,time,number
0,gjn,2024-02-13 17:24:20.059043624,30
1,onb,2024-02-13 17:24:20.059043624,98
2,mdg,2024-02-13 17:24:20.059043624,46
3,lgo,2024-02-13 17:24:20.059043624,92
4,nmj,2024-02-13 17:24:20.069042753,78


Unnamed: 0,sym,time,number
0,lpn,2024-02-13 17:28:32.029042235,70
1,ilf,2024-02-13 17:28:32.029042235,45
2,kbm,2024-02-13 17:28:32.029042235,99
3,mho,2024-02-13 17:28:32.029042235,19
4,bep,2024-02-13 17:28:32.029042235,32


Rows: 113,944


# GW Contents

In [8]:
display(gw_head_pdf)
display(gw_tail_pdf)

# Number of Rows
print(f"  Gateway Rows: {gw_rows:,}")

# Number of Rows
print(f"HDB + RDB Rows: {hdb_rows+rdb_rows:,}")

# Note: HDB+RDB will have less than GW  b/c data is always arriving to the RDB
# RDB alone was queried first and then again as part of GW query

Unnamed: 0,sym,time,number
0,aaa,2023-04-14 23:17:28.473,53231
1,aaa,2023-04-14 23:17:28.473,153560
2,aaa,2023-04-14 23:17:28.473,449428
3,aaa,2023-04-14 23:17:28.473,631966
4,aaa,2023-04-14 23:17:28.473,941566


Unnamed: 0,sym,time,number
0,bpj,2024-02-13 17:28:37.039045673,83
1,fam,2024-02-13 17:28:37.039045673,60
2,pnd,2024-02-13 17:28:37.039045673,54
3,hml,2024-02-13 17:28:37.039045673,53
4,pgg,2024-02-13 17:28:37.039045673,46


  Gateway Rows: 10,116,147
HDB + RDB Rows: 10,113,944
