# PyKX Query Components
Query all components of the application.

## Architecture
<img src="images/Deepdive Diagrams-BasicTick V3.drawio.png"  width="80%">


In [1]:
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

import os
import boto3
import json
import datetime

import pykx as kx

from env import *
from managed_kx import *

# Cluster names and database
from basictick_setup import *

In [2]:
# triggers credential get
session=None

if AWS_ACCESS_KEY_ID is None:
    print("Using Defaults ...")
    # create AWS session: using access variables
    session = boto3.Session()
else:
    print("Using variables ...")
    session = boto3.Session(
        aws_access_key_id=AWS_ACCESS_KEY_ID,
        aws_secret_access_key=AWS_SECRET_ACCESS_KEY,
        aws_session_token=AWS_SESSION_TOKEN
    )

# create finspace client
client = session.client(service_name='finspace', endpoint_url=ENDPOINT_URL)

Using Defaults ...


# Connections to Clusters

In [3]:
hdb = get_pykx_connection(client, 
                          environmentId=ENV_ID, clusterName=HDB_CLUSTER_NAME, 
                          userName=KDB_USERNAME, boto_session=session)
tp  = get_pykx_connection(client, 
                           environmentId=ENV_ID, clusterName=TP_CLUSTER_NAME, 
                           userName=KDB_USERNAME, boto_session=session)
rdb = get_pykx_connection(client, 
                           environmentId=ENV_ID, clusterName=RDB_CLUSTER_NAME, 
                           userName=KDB_USERNAME, boto_session=session)
rts = get_pykx_connection(client, 
                           environmentId=ENV_ID, clusterName=RTS_CLUSTER_NAME, 
                           userName=KDB_USERNAME, boto_session=session)
gw = get_pykx_connection(client, 
                          environmentId=ENV_ID, clusterName=GW_CLUSTER_NAME, 
                          userName=KDB_USERNAME, boto_session=session)

# Query the HDB
With the HDB connection, query its data.

In [4]:
# ensure database is loaded
hdb('.Q.lo[hsym`$.aws.akdbp,"/",.aws.akdb,"/";0b;0b]')

# inventory of tables in the database and rows in each
print("All Tables and Counts")
display( hdb("tables[]!count each value each tables[]") )
print(40*'=')

# Dates and Counts of one table
tables = hdb('tables[]').py()

for t in tables:
    # anything to display?
    tt = hdb(f"select {t}s:count i by date from {t} where date in 10#desc date").pd()
    r = rdb(f'count {t}').py()

    if r == 0: 
        continue

    print(f'{t}: {r:,}')
    print(40*'-')
    display(tt)
    print(40*'=')

All Tables and Counts


quote: 1,500,840
----------------------------------------


Unnamed: 0_level_0,quotes
date,Unnamed: 1_level_1
2024-07-30,4356637
2024-07-31,4495478
2024-08-01,4401306
2024-08-02,4471510
2024-08-05,4711942
2024-08-06,4619618
2024-08-07,4544274
2024-08-08,4333345


trade: 299,185
----------------------------------------


Unnamed: 0_level_0,trades
date,Unnamed: 1_level_1
2024-07-30,872530
2024-07-31,899400
2024-08-01,879672
2024-08-02,894169
2024-08-05,941313
2024-08-06,924403
2024-08-07,907938
2024-08-08,867065




# Query the RBD
With the RDB connection, query its data.

In [5]:
# inventory of tables in the database and rows in each
print("All Tables and Counts")
display( rdb("tables[]!count each value each tables[]") )
print(40*'=')

# Dates and Counts of one table
tables = rdb('tables[]').py()

for t in tables:
    r = rdb(f'count {t}').py()

    if r == 0: 
        continue
    
    print(f'{t}: {r:,}')
    print(40*'-')
    # Summarize table by hour
    display( rdb(f"select {t}s:count i by hour:`hh$time from {t}") )
    print(40*'=')

All Tables and Counts


quote: 1,500,840
----------------------------------------


Unnamed: 0_level_0,quotes
hour,Unnamed: 1_level_1
18i,1472949
19i,27891


trade: 299,185
----------------------------------------


Unnamed: 0_level_0,trades
hour,Unnamed: 1_level_1
18i,293564
19i,5621




In [6]:
# define a function on the RDB
rdb("""
    generateTWAP:{[syms;st;et] 
        if[syms~`;syms:exec distinct sym from trade];
        // Calculate statistics from trade and quote tables, join the tables with 
        // appropriate join function in this case a union join 
        quoteMetrics:select avg_spread:avg (ask-bid),twa_spread:(next[time]- time) wavg (ask-bid), avg_size:0.5*avg (asize+bsize),avg_duration:"t"$avg next[time]-time by sym from quote where sym in syms,time within(st;et); 
        tradeMetrics:select std_dev:2*dev price, twap:(next[time]-time) wavg price,max_price:max price, min_price:min price,vwap:size wavg price by sym from trade where sym in syms,time within(st;et); 
        quoteMetrics uj tradeMetrics 
     }
""")

# Call the function for all tickers and a time range
display( rdb("generateTWAP[`;08:00:00.040; 19:00:00.000]") )

# call the function for some tickers and another range
display( rdb("generateTWAP[`AAPL`IBM;08:00:00.040; 19:00:00.000]") )

Unnamed: 0_level_0,avg_spread,twa_spread,avg_size,avg_duration,std_dev,twap,max_price,min_price,vwap
sym,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
AAPL,1.000881,0.9739222,54.4808,04:14:53.595,2.067426,88.3951,92.75,85.15,88.1949
AIG,1.001951,0.999587,54.52951,10:36:19.954,0.4259227,27.9112,28.72,27.09,27.86678
AMD,1.000812,1.024824,54.45056,07:07:21.552,0.5857266,32.85027,33.81,31.87,32.86314
DELL,0.9988839,0.9633419,54.58787,10:37:53.739,0.1709594,12.21328,12.48,11.94,12.20219
DOW,1.003827,1.019948,54.52811,21:05:55.213,0.2150786,19.95104,20.25,19.6,19.95345
GOOG,1.000692,1.023021,54.5172,03:31:57.170,1.918032,71.53865,74.46,68.53,71.58962
HPQ,1.001359,1.00159,54.52843,10:36:16.147,0.5750852,36.70754,37.54,35.84,36.67851
IBM,0.9989498,0.9729474,54.40382,10:32:26.955,0.6119523,41.56243,42.54,40.27,41.58269
INTC,0.9990436,1.001117,54.48641,07:03:59.531,1.068531,52.10804,54.03,49.97,52.05869
MSFT,1.00151,1.007583,54.48041,07:03:00.746,0.5531491,29.25257,30.39,28.4,29.23613


Unnamed: 0_level_0,avg_spread,twa_spread,avg_size,avg_duration,std_dev,twap,max_price,min_price,vwap
sym,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
AAPL,1.000881,0.9739222,54.4808,04:14:53.595,2.067426,88.3951,92.75,85.15,88.1949
IBM,0.9989498,0.9729474,54.40382,10:32:26.955,0.6119523,41.56243,42.54,40.27,41.58269


# Query the GW

In [7]:
# Query the GW for its connected processes, are all connected?
proc_pdf = gw("select process, handle, connected, address from .conn.procs").pd()

# are any processes not connected? if so-reconnect
if (len(proc_pdf) == 0) or len(proc_pdf[proc_pdf.connected == False].index) > 0:
    print("reinit Gateway")
    gw("reinit[hdb_name; rdb_name]")
    proc_pdf = gw("select process, handle, connected, address from .conn.procs").pd()
    
# truncate address with elipsis
proc_pdf['address'] = proc_pdf['address'].str.slice(0,110)+"..."

# display table
display(proc_pdf)

Unnamed: 0,process,handle,connected,address
0,rdb,13,True,:tcps://ip-192-168-2-87.ec2.internal:443:GATEWAY_basictickdb:Host=ip-192-168-2-87.ec2.internal&Port=443&User=G...
1,hdb,14,True,:tcps://ip-192-168-4-114.ec2.internal:443:GATEWAY_basictickdb:Host=ip-192-168-4-114.ec2.internal&Port=443&User...
2,hdb,15,True,:tcps://ip-192-168-15-75.ec2.internal:443:GATEWAY_basictickdb:Host=ip-192-168-15-75.ec2.internal&Port=443&User...


In [8]:
# query GW using queryData function on gateway

# query and sample specific table for date range (today -3 days to tomorrow)
gw("res: `time xasc queryData[`trade;`;.z.D-3;.z.D+1]").pd()

# first/last 5 rows from res
display( gw("select [3] from res").pd() )
display( gw("select [-3] from res").pd() )

# select for a specific table from ticker with time range (5 days ago to now)
display( hdb(".query.data[`trade;`IBM;.z.P-5D;.z.P]") )

Unnamed: 0,sym,time,price,size,source
0,SBUX,2024-08-06 09:30:00.000021450,68.86,13,HDB
1,SBUX,2024-08-06 09:30:00.000035619,68.88,35,HDB
2,HPQ,2024-08-06 09:30:00.000052489,39.21,22,HDB


Unnamed: 0,sym,time,price,size,source
0,MSFT,2024-08-09 19:00:49.501276451,29.39,90,RDB
1,HPQ,2024-08-09 19:00:49.501276451,36.56,71,RDB
2,PRU,2024-08-09 19:00:49.501276451,58.76,25,RDB


Unnamed: 0,time,sym,price,size,source
,,,,,
0,2024.08.05D09:30:00.000030592,IBM,43.82,81,HDB
1,2024.08.05D09:30:00.000152320,IBM,43.81,82,HDB
2,2024.08.05D09:30:00.000268184,IBM,43.82,15,HDB
3,2024.08.05D09:30:00.000920274,IBM,43.83,21,HDB
4,2024.08.05D09:30:00.003657754,IBM,43.78,36,HDB
5,2024.08.05D09:30:00.003880730,IBM,43.8,59,HDB
6,2024.08.05D09:30:00.008656896,IBM,43.81,79,HDB
7,2024.08.05D09:30:00.009098791,IBM,43.82,66,HDB
8,2024.08.05D09:30:00.010738124,IBM,43.84,50,HDB


# Query the RTS Cluster
The RTS cluster is subscribing to the tickerplant and maintaining another set of tables. Connect to the RTS and show the contents of its tables.

In [9]:
# inventory of tables in the database and rows in each
print("All Tables and Counts")
display( rts("tables[]!count each value each tables[]") )

tables = rts('tables[]').py()

# show contents of tables
for t in tables:
    # anything to display?
    tt = rts(f"select from {t}").pd()
    r = len(tt.index)
    
    # nothing in table
    if r == 0: 
        continue
    
    # print table contents
    print(f'{t}: {r:,}')
    print(100*'=')
    
    # Contents of table, transponse if small
    if r < 20:
        display(tt.T)
    else:
        display(tt)
    print(100*'-')
        

All Tables and Counts


trade_hlcv: 15


sym,AAPL,AIG,AMD,DELL,DOW,GOOG,HPQ,IBM,INTC,MSFT,ORCL,PEP,PRU,SBUX,TXN
high,92.75,28.72,33.81,12.48,20.25,74.46,37.54,42.54,54.03,30.39,36.11,22.28,60.65,66.43,18.57
low,85.15,27.09,31.87,11.94,19.6,68.53,35.84,40.27,49.97,28.4,34.54,20.8,58.07,61.35,17.93
close,88.11,27.98,33.3,12.18,19.98,71.22,36.56,41.34,51.93,29.39,35.39,21.94,58.76,64.43,18.28
volume,1920941.0,792218.0,1158030.0,782109.0,385414.0,2338582.0,784365.0,782344.0,1163705.0,1161933.0,773579.0,1534744.0,783418.0,1553778.0,381991.0


----------------------------------------------------------------------------------------------------
trade_last: 15


sym,AAPL,AIG,AMD,DELL,DOW,GOOG,HPQ,IBM,INTC,MSFT,ORCL,PEP,PRU,SBUX,TXN
time,2024-08-09 19:00:49.501276451,2024-08-09 19:00:49.501276451,2024-08-09 19:00:49.501276451,2024-08-09 19:00:49.501276451,2024-08-09 19:00:49.501276451,2024-08-09 19:00:49.501276451,2024-08-09 19:00:49.501276451,2024-08-09 19:00:49.501276451,2024-08-09 19:00:49.501276451,2024-08-09 19:00:49.501276451,2024-08-09 19:00:49.501276451,2024-08-09 19:00:49.501276451,2024-08-09 19:00:49.501276451,2024-08-09 19:00:49.501276451,2024-08-09 19:00:49.501276451
price,88.11,27.98,33.3,12.18,19.98,71.22,36.56,41.34,51.93,29.39,35.39,21.94,58.76,64.43,18.28
size,95,14,25,58,72,90,71,50,98,90,34,24,25,54,90


----------------------------------------------------------------------------------------------------
trade_vwap: 15


sym,AAPL,AIG,AMD,DELL,DOW,GOOG,HPQ,IBM,INTC,MSFT,ORCL,PEP,PRU,SBUX,TXN
vwap,24162.41,7635.365293,9004.299,3343.499682,5467.029306,19620.12,10050.16415,11393.546314,14262.65,8010.686,9682.857055,5896.709,16278.11164,17582.5,5001.022904
volume,1920941.0,792218.0,1158030.0,782109.0,385414.0,2338582.0,784365.0,782344.0,1163705.0,1161933.0,773579.0,1534744.0,783418.0,1553778.0,381991.0


----------------------------------------------------------------------------------------------------


In [10]:
print( f"Last Run: {datetime.datetime.now()}" )

Last Run: 2024-08-09 19:00:56.788507
