# PyKX Query Components
Query all components of the application.

## Architecture
<img src="images/Deepdive Diagrams-BasicTick V3.drawio.png"  width="80%">


In [1]:
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

import os
import boto3
import json
import datetime

import pykx as kx

from env import *
from managed_kx import *

# Cluster names and database
from basictick_setup import *

In [2]:
# triggers credential get
session=None

if AWS_ACCESS_KEY_ID is None:
    print("Using Defaults ...")
    # create AWS session: using access variables
    session = boto3.Session()
else:
    print("Using variables ...")
    session = boto3.Session(
        aws_access_key_id=AWS_ACCESS_KEY_ID,
        aws_secret_access_key=AWS_SECRET_ACCESS_KEY,
        aws_session_token=AWS_SESSION_TOKEN
    )

# create finspace client
client = session.client(service_name='finspace', endpoint_url=ENDPOINT_URL)

Using Defaults ...


# Connections to Clusters

In [3]:
hdb = get_pykx_connection(client, 
                          environmentId=ENV_ID, clusterName=HDB_CLUSTER_NAME, 
                          userName=KDB_USERNAME, boto_session=session)
tp  = get_pykx_connection(client, 
                           environmentId=ENV_ID, clusterName=TP_CLUSTER_NAME, 
                           userName=KDB_USERNAME, boto_session=session)
rdb = get_pykx_connection(client, 
                           environmentId=ENV_ID, clusterName=RDB_CLUSTER_NAME, 
                           userName=KDB_USERNAME, boto_session=session)
rts = get_pykx_connection(client, 
                           environmentId=ENV_ID, clusterName=RTS_CLUSTER_NAME, 
                           userName=KDB_USERNAME, boto_session=session)
gw = get_pykx_connection(client, 
                          environmentId=ENV_ID, clusterName=GW_CLUSTER_NAME, 
                          userName=KDB_USERNAME, boto_session=session)

# Query the HDB
With the HDB connection, query its data.

In [4]:
# ensure database is loaded
hdb('.Q.lo[hsym`$.aws.akdbp,"/",.aws.akdb,"/";0b;0b]')

# inventory of tables in the database and rows in each
print("All Tables and Counts")
display( hdb("tables[]!count each value each tables[]") )
print(40*'=')

# Dates and Counts of one table
tables = hdb('tables[]').py()

for t in tables:
    # anything to display?
    tt = hdb(f"select {t}s:count i by date from {t} where date in 10#desc date").pd()
    r = rdb(f'count {t}').py()

    if r == 0: 
        continue

    print(f'{t}: {r:,}')
    print(40*'-')
    display(tt)
    print(40*'=')

All Tables and Counts


quote: 361,558
----------------------------------------


Unnamed: 0_level_0,quotes
date,Unnamed: 1_level_1
2024-09-23,4323449
2024-09-24,4440838
2024-09-25,4446429
2024-09-26,4422176
2024-09-27,4447795
2024-09-30,4424949


trade: 72,309
----------------------------------------


Unnamed: 0_level_0,trades
date,Unnamed: 1_level_1
2024-09-23,866361
2024-09-24,888436
2024-09-25,888187
2024-09-26,883938
2024-09-27,889931
2024-09-30,884716




# Query the RBD
With the RDB connection, query its data. Will use a q magic cell to send a function to the RDB and run it from Python as well.


In [5]:
# inventory of tables in the database and rows in each
print("Counts")
display( rdb("tables[]!count each value each tables[]") )

# last timestamps in each table
print("Last Times")
display( rdb("tables[]!{exec first max `time$time from x}each tables[]") )
print()
print(40*'=')


# Dates and Counts of one table
tables = rdb('tables[]').py()

for t in tables:
    r = rdb(f'count {t}').py()

    if r == 0: 
        continue

    print(f'{t}: {r:,}')
    print(40*'-')
    # Summarize table by hour
    display( rdb(f"select {t}s:count i by hour:`hh$time from {t}") )
    print(40*'=')

Counts


Last Times



quote: 361,558
----------------------------------------


Unnamed: 0_level_0,quotes
hour,Unnamed: 1_level_1
22i,361558


trade: 72,309
----------------------------------------


Unnamed: 0_level_0,trades
hour,Unnamed: 1_level_1
22i,72309




In [6]:
# get the RDB conneciton string
rdb_conn_str = get_kx_connection_string(client, 
                                  environmentId=ENV_ID, clusterName=RDB_CLUSTER_NAME, 
                                   userName=KDB_USERNAME, boto_session=session)

# parse the RDB connection string to its components
rdb_host, rdb_port, rdb_username, rdb_password = parse_connection_string(rdb_conn_str)

## Define a function on the RDB
Using a q magic cell, define a function on the RDB.

In [7]:
%%q --host $rdb_host --port $rdb_port --user $rdb_username --pass $rdb_password

/ define a function to calculate TWAP
generateTWAP:{[syms;st;et] 
    if[syms~`;syms:exec distinct sym from trade];
    // Calculate statistics from trade and quote tables, join the tables with 
    // appropriate join function in this case a union join 
    quoteMetrics:select avg_spread:avg (ask-bid),twa_spread:(next[time]- time) wavg (ask-bid), avg_size:0.5*avg (asize+bsize),avg_duration:"t"$avg next[time]-time by sym from quote where sym in syms,time within(st;et); 
    tradeMetrics:select std_dev:2*dev price, twap:(next[time]-time) wavg price,max_price:max price, min_price:min price,vwap:size wavg price by sym from trade where sym in syms,time within(st;et); 
    quoteMetrics uj tradeMetrics 
 }


## Call Function on RDB and Display Results
Function is called on the RDB, results are then returned as a Pandas DataFrame using PyKX and display the results as a table in the notebook.

In [8]:
# Call the function for all tickers and a time range
display( rdb("generateTWAP[`;00:00:00.040; 23:59:59.999]") )

# call the function for some tickers and another range
display( rdb("generateTWAP[`AAPL`IBM;00:00:00.040; 23:59:59.999]") )

Unnamed: 0_level_0,avg_spread,twa_spread,avg_size,avg_duration,std_dev,twap,max_price,min_price,vwap
sym,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
AAPL,1.001146,1.109132,54.53156,02:58:08.439,1.472948,82.51456,84.83,80.22,82.60819
AIG,0.9989489,0.9811025,54.30372,07:29:15.978,0.2922466,27.08762,27.51,26.65,27.08464
AMD,0.9990165,1.088911,54.52227,04:58:10.249,0.4834961,33.71478,34.28,32.98,33.6965
DELL,1.000617,1.083906,54.3466,07:23:05.124,0.1452164,12.37196,12.56,12.16,12.34309
DOW,1.004468,0.9654454,54.54825,14:49:39.193,0.1769679,20.29217,20.51,20.03,20.26313
GOOG,0.9986624,1.028908,54.55306,02:30:27.655,1.379684,72.63063,74.35,70.61,72.56474
HPQ,0.9957252,0.957597,54.62613,07:27:06.021,0.427945,36.10174,36.68,35.46,36.08956
IBM,1.000524,0.9841408,54.38419,07:18:17.505,0.460618,42.86065,43.56,42.2,42.83483
INTC,0.9995656,0.9539038,54.53256,04:58:35.232,0.7186708,51.15608,52.34,49.92,51.11621
MSFT,0.9969269,1.013237,54.45091,04:59:26.108,0.3976977,29.85848,30.34,29.13,29.83608


Unnamed: 0_level_0,avg_spread,twa_spread,avg_size,avg_duration,std_dev,twap,max_price,min_price,vwap
sym,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
AAPL,1.001146,1.109132,54.53156,02:58:08.439,1.472948,82.51456,84.83,80.22,82.60819
IBM,1.000524,0.9841408,54.38419,07:18:17.505,0.460618,42.86065,43.56,42.2,42.83483


# Query the GW

In [9]:
# Query the GW for its connected processes, are all connected?
proc_pdf = gw("select process, handle, connected, address from .conn.procs").pd()

# are any processes not connected? if so-reconnect
if (len(proc_pdf) == 0) or len(proc_pdf[proc_pdf.connected == False].index) > 0:
    print("reinit Gateway")
    gw("reinit[hdb_name; rdb_name]")
    proc_pdf = gw("select process, handle, connected, address from .conn.procs").pd()

# truncate address with elipsis
proc_pdf['address'] = proc_pdf['address'].str.slice(0,110)+"..."

# display table
display(proc_pdf)

Unnamed: 0,process,handle,connected,address
0,rdb,13,True,:tcps://ip-192-168-15-202.ec2.internal:443:GATEWAY_basictickdb:Host=ip-192-168-15-202.ec2.internal&Port=443&Us...
1,hdb,14,True,:tcps://ip-192-168-9-254.ec2.internal:443:GATEWAY_basictickdb:Host=ip-192-168-9-254.ec2.internal&Port=443&User...
2,hdb,15,True,:tcps://ip-192-168-3-131.ec2.internal:443:GATEWAY_basictickdb:Host=ip-192-168-3-131.ec2.internal&Port=443&User...


In [10]:
# query GW using queryData function on gateway

# query and sample specific table for date range (today -3 days to tomorrow)
gw("res: `time xasc queryData[`trade;`;.z.D-3;.z.D+1]").pd()

# first/last 5 rows from res
display( gw("select [3] from res").pd() )
display( gw("select [-3] from res").pd() )

# select for a specific table from ticker with time range (5 days ago to now)
display( hdb(".query.data[`trade;`IBM;.z.P-5D;.z.P]") )

Unnamed: 0,sym,time,price,size,source
0,SBUX,2024-09-30 09:30:00.000003557,64.83,93,HDB
1,HPQ,2024-09-30 09:30:00.000046186,37.39,71,HDB
2,GOOG,2024-09-30 09:30:00.000050642,76.4,20,HDB


Unnamed: 0,sym,time,price,size,source
0,AAPL,2024-10-01 22:17:51.464809234,82.64,65,RDB
1,GOOG,2024-10-01 22:17:51.464809234,72.7,26,RDB
2,PEP,2024-10-01 22:17:51.464809234,22.19,12,RDB


Unnamed: 0,time,sym,price,size,source
,,,,,
0,2024.09.27D09:30:00.000080646,IBM,46.36,51,HDB
1,2024.09.27D09:30:00.000328268,IBM,46.39,69,HDB
2,2024.09.27D09:30:00.003756904,IBM,46.29,51,HDB
3,2024.09.27D09:30:00.011875479,IBM,46.29,80,HDB
4,2024.09.27D09:30:00.017520178,IBM,46.33,17,HDB
5,2024.09.27D09:30:00.026632638,IBM,46.38,57,HDB
6,2024.09.27D09:30:00.027952730,IBM,46.36,43,HDB
7,2024.09.27D09:30:00.029033798,IBM,46.36,21,HDB
8,2024.09.27D09:30:00.029682616,IBM,46.36,88,HDB


# Query the RTS Cluster
The RTS cluster is subscribing to the tickerplant and maintaining another set of tables. Connect to the RTS and show the contents of its tables.

In [11]:
# inventory of tables in the database and rows in each
print("All Tables and Counts")
display( rts("tables[]!count each value each tables[]") )

tables = rts('tables[]').py()

# show contents of tables
for t in tables:
    # anything to display?
    tt = rts(f"select from {t}").pd()
    r = len(tt.index)

    # nothing in table
    if r == 0: 
        continue

    # print table contents
    print(f'{t}: {r:,}')
    print(100*'=')

    # Contents of table, transponse if small
    if r < 20:
        display(tt.T)
    else:
        display(tt)
    print(100*'-')


All Tables and Counts


trade_hlcv: 15


sym,AAPL,AIG,AMD,DELL,DOW,GOOG,HPQ,IBM,INTC,MSFT,ORCL,PEP,PRU,SBUX,TXN
high,84.83,27.51,34.28,12.56,20.51,74.35,36.68,43.56,52.34,30.34,36.16,22.39,60.47,63.92,18.06
low,80.22,26.65,32.98,12.16,20.03,70.61,35.46,42.2,49.92,29.13,35.08,21.53,58.39,61.52,17.63
close,82.64,27.09,33.58,12.26,20.25,72.7,36.38,42.66,50.99,30.26,35.63,22.19,59.75,62.28,17.79
volume,470232.0,183547.0,272317.0,191349.0,94989.0,556974.0,193491.0,192599.0,277620.0,281621.0,188796.0,379642.0,187636.0,383573.0,91837.0


----------------------------------------------------------------------------------------------------
trade_last: 15


sym,AAPL,AIG,AMD,DELL,DOW,GOOG,HPQ,IBM,INTC,MSFT,ORCL,PEP,PRU,SBUX,TXN
time,2024-10-01 22:17:51.464809234,2024-10-01 22:17:51.464809234,2024-10-01 22:17:51.464809234,2024-10-01 22:17:51.464809234,2024-10-01 22:17:51.464809234,2024-10-01 22:17:51.464809234,2024-10-01 22:17:51.464809234,2024-10-01 22:17:51.464809234,2024-10-01 22:17:51.464809234,2024-10-01 22:17:51.464809234,2024-10-01 22:17:51.464809234,2024-10-01 22:17:51.464809234,2024-10-01 22:17:51.464809234,2024-10-01 22:17:51.464809234,2024-10-01 22:17:51.464809234
price,82.64,27.09,33.58,12.26,20.25,72.7,36.38,42.66,50.99,30.26,35.63,22.19,59.75,62.28,17.79
size,65,57,82,21,97,26,60,65,43,60,21,12,28,96,33


----------------------------------------------------------------------------------------------------
trade_vwap: 15


sym,AAPL,AIG,AMD,DELL,DOW,GOOG,HPQ,IBM,INTC,MSFT,ORCL,PEP,PRU,SBUX,TXN
vwap,3882.523587,1272.827758,1583.945955,580.038548,952.320515,3410.400361,1696.4696,2013.413749,2402.364924,1402.194321,1675.430814,1033.168567,2792.754534,2949.820614,838.715982
volume,470232.0,183547.0,272317.0,191349.0,94989.0,556974.0,193491.0,192599.0,277620.0,281621.0,188796.0,379642.0,187636.0,383573.0,91837.0


----------------------------------------------------------------------------------------------------


In [12]:
print( f"Last Run: {datetime.datetime.now()}" )

Last Run: 2024-10-01 22:17:59.140098
