In [1]:
# Setup
import numpy as np
import pandas as pd
import pyarrow as pa

import pykx as kx
kx.q.system.console_size = [15, 100]

# Section #1: PyKX Basics

## Data Structures in PyKX:


### Atomic Types:
[Reference Card](https://code.kx.com/q/ref/#datatypes) for PyKX types

An atomic type refers to the most basic, indivisible data types that the library supports

In [2]:
# Atomic Float Type

print(kx.FloatAtom(1.0))
#or
print(kx.q('1.0f'))

1f
1f


In [3]:
# Atomic Int type

print(kx.IntAtom(1))
#or
print(kx.q('1i'))

1i
1i


In [4]:
# Atomic Boolean type

print(kx.BooleanAtom(True))
print(kx.BooleanAtom(1))
print(kx.q('1b'))
#or
print(kx.BooleanAtom(False))
print(kx.BooleanAtom(0))
print(kx.q('0b'))

1b
1b
1b
0b
0b
0b


In [5]:
# Atomic Symbol type

print(kx.SymbolAtom('abc'))
#or
print(kx.q('`abc'))

abc
abc


In [6]:
# Atomic timestamp type
print(kx.q('2017.08.17D04:00:00.000000000'))

# Atomic Date type
print(kx.q('2000.01.01d'))

# Atomic month
print(kx.q('2000.01m'))

2017.08.17D04:00:00.000000000
2000.01.01
2000.01m


##### Exercise 1
+ Return the atomic time for 11:47am

In [7]:
print(kx.q('11:47:00.000'))

11:47:00.000


### PyKX Collection Data Structure Types

### Vector Types:

In [8]:
kx.IntVector([1,2,3,4])

pykx.IntVector(pykx.q('1 2 3 4i'))

In [9]:
# From a Numpy array
kx.IntVector(np.array([1, 2, 3, 4], dtype=np.int32))

pykx.IntVector(pykx.q('1 2 3 4i'))

In [10]:
# From a Pandas series
kx.toq(pd.Series([1, 2, 3, 4]))

pykx.LongVector(pykx.q('1 2 3 4'))

### PyKX Lists:

In [11]:
kx.List([[1,2,3], [4,5,6],['a','b','c']])

pykx.List(pykx.q('
1 2 3
4 5 6
a b c
'))

### PyKX Dictionaries:

In [12]:
print(kx.Dictionary({'x': [1, 2, 3], 'x1': np.array([1, 2, 3])}))

x | 1 2 3
x1| 1 2 3


### PyKX Tables:

##### Example 1:

In [13]:
print(kx.Table([[1, 2, 'a'], [2, 3, 'b'], [3, 4, 'c']], columns = ['col1', 'col2', 'col3']))

col1 col2 col3
--------------
1    2    a   
2    3    b   
3    4    c   


##### Example 2:

In [14]:
print(kx.Table(data = {'col1': [1, 2, 3], 'col2': [2 , 3, 4], 'col3': ['a', 'b', 'c']}))

col1 col2 col3
--------------
1    2    a   
2    3    b   
3    4    c   


### PyKX Keyed Tables:

In [15]:
kx.Table(data = {'x': [1, 2, 3], 'x1': [2, 3, 4], 'x2': ['a', 'b', 'c']}).set_index(['x'])

Unnamed: 0_level_0,x1,x2
x,Unnamed: 1_level_1,Unnamed: 2_level_1
1,2,a
2,3,b
3,4,c


##### Exercise 2
+ Create a keyed table using the layout of example 1.

In [16]:
print(kx.Table([[1, 2, 'a'], [2, 3, 'b'], [3, 4, 'c']], columns = ['col1', 'col2', 'col3']).set_index(['col1']))

col1| col2 col3
----| ---------
1   | 2    a   
2   | 3    b   
3   | 4    c   


## Generating PyKX Objects
In order to use the power of q and the functionality provided by PyKX a user must at some point interact with a PyKX object. At it's most basic level these items are allocated C representations of q/kdb+ objects within a memory space managed by q. Keeping the data in this format allows it to be used directly for query/analytic execution in q without any translation overhead.

There are a number of ways to generate PyKX objects:

- Explicitly converting from a Python object to a PyKX object

- By evaluating q code using kx.q

- By retrieving a named entity from q's memory
 
- Through query of an external q session

### Creating PyKX Objects from Python Objects
This is one of the most common ways that PyKX data is generated.
Example formats include:
- Python
- Numpy
- Pandas
- PyArrow

#### Python Dictionary to PyKX Dictionary

In [17]:
pydict = {'a': [1, 2, 3], 'b': ['a', 'b', 'c'], 'c': 2}
pykx_dict = kx.toq(pydict)
pykx_dict

#### Numpy Array to PyKX Vector

In [18]:
nparray = np.array([1, 2, 3, 4], dtype = np.int32)
pykx_vector = kx.toq(nparray)
pykx_vector

pykx.IntVector(pykx.q('1 2 3 4i'))

#### Pandas DataFrame to PyKX Table

In [19]:
pdframe = pd.DataFrame(data = {'a':[1, 2, 3], 'b': ['a', 'b', 'c']})
pykx_table = kx.toq(pdframe)
pykx_table

Unnamed: 0,a,b
,,
0.0,1.0,a
1.0,2.0,b
2.0,3.0,c


#### PyArrow Array to PyKX List

In [20]:
pyArray = [1, 2.5, "abc", b"defg"]
pykx_list = kx.toq(pyArray)
pykx_list

pykx.List(pykx.q('
1
2.5
`abc
"defg"
'))

### Convert PyKX Objects to Pythonic Types

#### PyKX Dictionary to Python List

In [21]:
pykx_dict.py()

{'a': [1, 2, 3], 'b': ['a', 'b', 'c'], 'c': 2}

#### PyKX Vector to Numpy Array

In [22]:
pykx_vector.np()

array([1, 2, 3, 4], dtype=int32)

#### PyKX Table to Pandas Dataframe

In [23]:
pykx_table.pd()

Unnamed: 0,a,b
0,1,a
1,2,b
2,3,c


## Data Generation Using PyKX Inbuilt Functions

#### Generating Data with 'random' Function

In [24]:
# Generate Vector of 1 million Random Floating Point Values between 0 and 100.0
kx.random.random(1000000,100.0)

pykx.FloatVector(pykx.q('39.27524 51.70911 51.59796 40.66642 17.80839 30.17723 78.5033 53.47096 71.11716 41.1597 49.31835 ..'))

In [25]:
# Generate list of 10 values between 0 and 100
kx.random.random([1,10],100)

pykx.List(pykx.q('62 94 27 50 40 32 88 22 98 53'))

In [26]:
# Generate a multi-dimensional list
kx.random.random([2,3,4], 2)

pykx.List(pykx.q('
0 0 1 0 1 1 0 1 1 0 1 0
0 1 0 0 0 0 0 0 0 1 1 0
'))

In [27]:
# Use nulls and infinties to generate random data across the full allowable range:
print(kx.random.random([5, 1], kx.GUIDAtom.null))
print(kx.random.random([2, 3, 2], kx.IntAtom.inf))

bf46ae83-4272-4d6f-0ed4-dd40c1376e29
8e6cb502-b0cf-9c03-d024-833c0b9ac483
8c2f866d-0f4e-4a96-bf8b-228220fbb9d4
61604999-49e9-f066-48f7-e88e92363957
8b3dfe26-6d4c-620f-f7c0-31852decc639
1566069007 1773121422 2104411811 1441846567 103906494  315107819 
931560883 2025997683  253249654 1026292723  391856598 789142547  


In [28]:
# Set the seed globally
kx.random.seed(10)

# Or, set the seed for individual function call
kx.random.random(10, 100.0, seed = 10)

pykx.FloatVector(pykx.q('8.91041 83.45194 36.21949 99.9934 38.37986 86.19188 9.183638 25.30883 25.04566 75.17286'))

In [29]:
# Generate Table: 'x' column contains random floats between 0 and 100, 'y' column contains random choice of 'a', 'b', or 'c'
PyKX_Table = kx.Table(data={'x':kx.random.random(5,100.0), 'y':kx.random.random(5,['a','b','c'])})
PyKX_Table

Unnamed: 0,x,y
,,
0.0,8.91041,b
1.0,83.45194,a
2.0,36.21949,c
3.0,99.9934,c
4.0,38.37986,c


##### Exercise 3
Create a table with 2 columns; `a` and `b`. 

+ Column `a` should contain 5 random ints between 10 and 20.
+ Column `b` should contain 5 random floats between 0 and 30.

In [30]:
kx.Table(data={'a':kx.random.random(5, [10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20]), 'b':kx.random.random(5, 30.0)})

Unnamed: 0,a,b
,,
0.0,17.0,24.67537
1.0,19.0,27.26629
2.0,10.0,19.3742
3.0,15.0,0.2688803
4.0,13.0,14.76849


### Intro to Indexing and Slicing

#### Retrieve a single item from a PyKX vector with Indexing

In [31]:
print(pykx_vector)
pykx_vector[3]

1 2 3 4i


pykx.IntAtom(pykx.q('4i'))

#### Retrieve the last item in the pykx vector

In [32]:
pykx_vector[-1]

pykx.IntAtom(pykx.q('4i'))

#### Retrieve a subsection of elements from a list

In [33]:
# Generate list of 10 values between 0 and 100
pykx_list = kx.random.random([1,10],100)
# Use slicing to retrieve a subsection of elements from the list
pykx_list[0][3:8]

pykx.LongVector(pykx.q('46 91 37 68 55'))

#### Retrieve the last n elements from a list

In [34]:
print(pykx_list[0])
pykx_list[0][-3:]

20 3 43 46 91 37 68 55 6 16


pykx.LongVector(pykx.q('55 6 16'))

#### Retrieve the content of a column in a table

In [35]:
print(PyKX_Table)
PyKX_Table['x']

x        y
----------
8.91041  b
83.45194 a
36.21949 c
99.9934  c
38.37986 c


pykx.FloatVector(pykx.q('8.91041 83.45194 36.21949 99.9934 38.37986'))

## Loading Data from a CSV

In [36]:
# Using Pandas
btc_df = pd.read_csv("data/BTCUSDT.csv")
print(btc_df.shape)
btc_df.head()

(3565390, 12)


Unnamed: 0,timestamp,open,high,low,close,volume,close_time,quote_asset_volume,number_of_trades,taker_buy_base_asset_volume,taker_buy_quote_asset_volume,ignore
0,2017-08-17 04:00:00,4261.48,4261.48,4261.48,4261.48,1.775183,2017-08-17 04:00:59.999,7564.906851,3,0.075183,320.390851,0
1,2017-08-17 04:01:00,4261.48,4261.48,4261.48,4261.48,0.0,2017-08-17 04:01:59.999,0.0,0,0.0,0.0,0
2,2017-08-17 04:02:00,4280.56,4280.56,4280.56,4280.56,0.261074,2017-08-17 04:02:59.999,1117.542921,2,0.261074,1117.542921,0
3,2017-08-17 04:03:00,4261.48,4261.48,4261.48,4261.48,0.012008,2017-08-17 04:03:59.999,51.171852,3,0.012008,51.171852,0
4,2017-08-17 04:04:00,4261.48,4261.48,4261.48,4261.48,0.140796,2017-08-17 04:04:59.999,599.999338,1,0.140796,599.999338,0


In [37]:
# Using PyKX
tab_BTC = kx.q.read.csv('data/BTCUSDT.csv')
tab_BTC.head()

Unnamed: 0,timestamp,open,high,low,close,volume,close_time,quote_asset_volume,number_of_trades,taker_buy_base_asset_volume,taker_buy_quote_asset_volume,ignore
,,,,,,,,,,,,
0.0,2017.08.17D04:00:00.000000000,4261.48e,4261.48e,4261.48e,4261.48e,1.775183,2017.08.17D04:00:59.999000000,7564.907,3h,0.075183,320.3909,0b
1.0,2017.08.17D04:01:00.000000000,4261.48e,4261.48e,4261.48e,4261.48e,0f,2017.08.17D04:01:59.999000000,0f,0h,0f,0f,0b
2.0,2017.08.17D04:02:00.000000000,4280.56e,4280.56e,4280.56e,4280.56e,0.261074,2017.08.17D04:02:59.999000000,1117.543,2h,0.261074,1117.543,0b
3.0,2017.08.17D04:03:00.000000000,4261.48e,4261.48e,4261.48e,4261.48e,0.012008,2017.08.17D04:03:59.999000000,51.17185,3h,0.012008,51.17185,0b
4.0,2017.08.17D04:04:00.000000000,4261.48e,4261.48e,4261.48e,4261.48e,0.140796,2017.08.17D04:04:59.999000000,599.9993,1h,0.140796,599.9993,0b


## Writing Data to a CSV

In [38]:
kx.q.write.csv('data/write_test.csv', PyKX_Table, delimiter=',')

PosixPath('data/write_test.csv')