# PyKX Query Components
Query all components of the application.

## Architecture
<img src="images/Deepdive Diagrams-BasicTick V3.drawio.png"  width="80%">


In [1]:
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

import os
import boto3
import json
import datetime

import pykx as kx

from env import *
from managed_kx import *

# Cluster names and database
from basictick_setup import *

In [2]:
# triggers credential get
session=None

if AWS_ACCESS_KEY_ID is None:
    print("Using Defaults ...")
    # create AWS session: using access variables
    session = boto3.Session()
else:
    print("Using variables ...")
    session = boto3.Session(
        aws_access_key_id=AWS_ACCESS_KEY_ID,
        aws_secret_access_key=AWS_SECRET_ACCESS_KEY,
        aws_session_token=AWS_SESSION_TOKEN
    )

# create finspace client
client = session.client(service_name='finspace', endpoint_url=ENDPOINT_URL)

Using Defaults ...


# Connections to Clusters

In [3]:
hdb = get_pykx_connection(client, 
                          environmentId=ENV_ID, clusterName=HDB_CLUSTER_NAME, 
                          userName=KDB_USERNAME, boto_session=session)
tp  = get_pykx_connection(client, 
                           environmentId=ENV_ID, clusterName=TP_CLUSTER_NAME, 
                           userName=KDB_USERNAME, boto_session=session)
rdb = get_pykx_connection(client, 
                           environmentId=ENV_ID, clusterName=RDB_CLUSTER_NAME, 
                           userName=KDB_USERNAME, boto_session=session)
cep = get_pykx_connection(client, 
                           environmentId=ENV_ID, clusterName=CEP_CLUSTER_NAME, 
                           userName=KDB_USERNAME, boto_session=session)
gw = get_pykx_connection(client, 
                          environmentId=ENV_ID, clusterName=GW_CLUSTER_NAME, 
                          userName=KDB_USERNAME, boto_session=session)

# Query the HDB
With the HDB connection, query its data.

In [4]:
# ensure database is loaded
hdb('.Q.lo[hsym`$.aws.akdbp,"/",.aws.akdb,"/";0b;0b]')

# inventory of tables in the database and rows in each
print("All Tables and Counts")
display( hdb("tables[]!count each value each tables[]") )
print(40*'=')

# Dates and Counts of one table
tables = hdb('tables[]').py()

for t in tables:
    # anything to display?
    tt = hdb(f"select {t}s:count i by date from {t} where date in 10#desc date").pd()
    r = len(tt.index)

    if r == 0: 
        continue

    print(f'Table: {t}: {r:,}')
    print(40*'-')
    display(tt)
    print(40*'=')

All Tables and Counts


Table: quote: 8
----------------------------------------


Unnamed: 0_level_0,quotes
date,Unnamed: 1_level_1
2024-07-29,4356637
2024-07-30,4495478
2024-07-31,4401306
2024-08-01,4471510
2024-08-02,4711942
2024-08-05,4619618
2024-08-06,4544274
2024-08-07,4333345


Table: trade: 8
----------------------------------------


Unnamed: 0_level_0,trades
date,Unnamed: 1_level_1
2024-07-29,872530
2024-07-30,899400
2024-07-31,879672
2024-08-01,894169
2024-08-02,941313
2024-08-05,924403
2024-08-06,907938
2024-08-07,867065




# Query the RBD
With the RDB connection, query its data.

In [5]:
# inventory of tables in the database and rows in each
print("All Tables and Counts")
display( rdb("tables[]!count each value each tables[]") )
print(40*'=')

# Dates and Counts of one table
tables = rdb('tables[]').py()

for t in tables:
    r = rdb(f'count {t}').py()

    if r == 0: 
        continue
    
    print(f'Table: {t}: {r:,}')
    print(40*'-')
    # Summarize table by hour
    display( rdb(f"select {t}s:count i by hour:`hh$time from {t}") )
    print(40*'=')

All Tables and Counts


Table: quote: 34,563
----------------------------------------


Unnamed: 0_level_0,quotes
hour,Unnamed: 1_level_1
19i,34563


Table: trade: 7,965
----------------------------------------


Unnamed: 0_level_0,trades
hour,Unnamed: 1_level_1
19i,7965




In [6]:
# define a function on the RDB
rdb("""
    generateTWAP:{[syms;st;et] 
        if[syms~`;syms:exec distinct sym from trade];
        // Calculate statistics from trade and quote tables, join the tables with 
        // appropriate join function in this case a union join 
        quoteMetrics:select avg_spread:avg (ask-bid),twa_spread:(next[time]- time) wavg (ask-bid), avg_size:0.5*avg (asize+bsize),avg_duration:"t"$avg next[time]-time by sym from quote where sym in syms,time within(st;et); 
        tradeMetrics:select std_dev:2*dev price, twap:(next[time]-time) wavg price,max_price:max price, min_price:min price,vwap:size wavg price by sym from trade where sym in syms,time within(st;et); 
        quoteMetrics uj tradeMetrics 
     }
""")

# Call the function for all tickers and a time range
display( rdb("generateTWAP[`;08:00:00.040; 19:00:00.000]") )

# call the function for some tickers and another range
display( rdb("generateTWAP[`AAPL`IBM;08:00:00.040; 19:00:00.000]") )

Unnamed: 0_level_0,avg_spread,twa_spread,avg_size,avg_duration,std_dev,twap,max_price,min_price,vwap
sym,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1


Unnamed: 0_level_0,avg_spread,twa_spread,avg_size,avg_duration,std_dev,twap,max_price,min_price,vwap
sym,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1


# Current State of GW

In [7]:
# Query the GW for its connected processes, expect there to be connected processes as well
proc_pdf = gw("select process, handle, connected, address from .conn.procs").pd()

# are any processes not connected? if so-reconnect
if (len(proc_pdf) == 0) or len(proc_pdf[proc_pdf.connected == False].index) > 0:
    print("reinit Gateway")
    gw("reinit[hdb_name; rdb_name]")
    proc_pdf = gw("select process, handle, connected, address from .conn.procs").pd()
    
# truncate with elipsis
proc_pdf['address'] = proc_pdf['address'].str.slice(0,110)+"..."

display(proc_pdf)

reinit Gateway


Unnamed: 0,process,handle,connected,address
0,rdb,13,True,:tcps://ip-192-168-13-125.ec2.internal:443:GATEWAY_basictickdb:Host=ip-192-168-13-125.ec2.internal&Port=443&Us...
1,hdb,14,True,:tcps://ip-192-168-0-232.ec2.internal:443:GATEWAY_basictickdb:Host=ip-192-168-0-232.ec2.internal&Port=443&User...
2,hdb,15,True,:tcps://ip-192-168-11-3.ec2.internal:443:GATEWAY_basictickdb:Host=ip-192-168-11-3.ec2.internal&Port=443&User=G...


In [8]:
# query GW using queryData function on gateway

# query and sample specific table for date range (today -3 days to tomorrow)
gw("res: `time xasc queryData[`trade;`;.z.D-3;.z.D+1]").pd()

# first/last 5 rows from res
display( gw("select [3] from res").pd() )
display( gw("select [-3] from res").pd() )

# select for a specific table from ticker with time range (5 days ago to now)
display( hdb(".query.data[`trade;`IBM;.z.P-5D;.z.P]") )

Unnamed: 0,sym,time,price,size,source
0,SBUX,2024-08-05 09:30:00.000021450,68.86,13,HDB
1,SBUX,2024-08-05 09:30:00.000035619,68.88,35,HDB
2,HPQ,2024-08-05 09:30:00.000052489,39.21,22,HDB


Unnamed: 0,sym,time,price,size,source
0,AMD,2024-08-08 19:31:23.871698273,32.52,85,RDB
1,AAPL,2024-08-08 19:31:23.871698273,88.41,73,RDB
2,INTC,2024-08-08 19:31:23.871698273,51.89,90,RDB


Unnamed: 0,time,sym,price,size,source
,,,,,
0,2024.08.05D09:30:00.000638570,IBM,45.43,33,HDB
1,2024.08.05D09:30:00.004999379,IBM,45.4,17,HDB
2,2024.08.05D09:30:00.006221554,IBM,45.41,29,HDB
3,2024.08.05D09:30:00.007944273,IBM,45.41,71,HDB
4,2024.08.05D09:30:00.014829246,IBM,45.4,64,HDB
5,2024.08.05D09:30:00.021750883,IBM,45.41,29,HDB
6,2024.08.05D09:30:00.022740963,IBM,45.41,92,HDB
7,2024.08.05D09:30:00.031757875,IBM,45.42,46,HDB
8,2024.08.05D09:30:00.032541080,IBM,45.4,41,HDB


# Query the CEP Cluster
The CEP cluster is subscribing to the tickerplant and maintaining another set of tables. 

In [9]:
# inventory of tables in the database and rows in each
print("All Tables and Counts")
display( cep("tables[]!count each value each tables[]") )

tables = cep('tables[]').py()

for t in tables:
    # anything to display?
    tt = cep(f"select from {t}").pd()
    r = len(tt.index)
    
    if r == 0: 
        continue
    
    print(f'Table: {t}: {r:,}')
    print(100*'=')
    
    # Contents of table, transponse if small
    if r < 20:
        display(tt.T)
    else:
        display(tt)
    print(100*'-')
        

All Tables and Counts


Table: trade_hlcv: 15


sym,AAPL,AIG,AMD,DELL,DOW,GOOG,HPQ,IBM,INTC,MSFT,ORCL,PEP,PRU,SBUX,TXN
high,89.99,28.08,33.43,12.32,20.25,74.31,37.2,42.49,54.03,29.66,36.1,21.89,59.98,65.65,18.34
low,87.22,27.43,32.31,12.04,19.75,69.87,36.29,40.55,50.95,28.77,35.0,21.42,58.38,63.01,17.99
close,88.41,28.0,32.52,12.24,19.79,70.95,36.64,40.86,51.89,29.37,35.49,21.87,59.55,64.14,18.26
volume,52113.0,21770.0,30675.0,20896.0,9805.0,61961.0,21707.0,20448.0,29441.0,30148.0,20416.0,42265.0,21920.0,41708.0,10277.0


----------------------------------------------------------------------------------------------------
Table: trade_last: 15


sym,AAPL,AIG,AMD,DELL,DOW,GOOG,HPQ,IBM,INTC,MSFT,ORCL,PEP,PRU,SBUX,TXN
time,2024-08-08 19:31:23.871698273,2024-08-08 19:31:23.871698273,2024-08-08 19:31:23.871698273,2024-08-08 19:31:23.871698273,2024-08-08 19:31:23.871698273,2024-08-08 19:31:23.871698273,2024-08-08 19:31:23.871698273,2024-08-08 19:31:23.871698273,2024-08-08 19:31:23.871698273,2024-08-08 19:31:23.871698273,2024-08-08 19:31:23.871698273,2024-08-08 19:31:23.871698273,2024-08-08 19:31:23.871698273,2024-08-08 19:31:23.871698273,2024-08-08 19:31:23.871698273
price,88.41,28.0,32.52,12.24,19.79,70.95,36.64,40.86,51.89,29.37,35.49,21.87,59.55,64.14,18.26
size,73,76,85,79,46,58,43,17,90,29,29,85,82,87,71


----------------------------------------------------------------------------------------------------
Table: trade_vwap: 15


sym,AAPL,AIG,AMD,DELL,DOW,GOOG,HPQ,IBM,INTC,MSFT,ORCL,PEP,PRU,SBUX,TXN
vwap,619.219486,194.432209,230.465213,85.336974,139.771523,502.54691,257.051377,290.896564,364.579759,204.617537,248.418194,151.558018,414.209033,449.483979,127.568554
volume,52113.0,21770.0,30675.0,20896.0,9805.0,61961.0,21707.0,20448.0,29441.0,30148.0,20416.0,42265.0,21920.0,41708.0,10277.0


----------------------------------------------------------------------------------------------------


In [10]:
print( f"Last Run: {datetime.datetime.now()}" )

Last Run: 2024-08-08 19:31:30.521756
