### Install tendb
Must install latest tendb before running the commands
Install from released package from pypi
```bash
$ pip install -i https://test.pypi.org/simple/ tendb
```
Read local setup.py and install tendb
```bash
pip install /mnt/c/Users/hkver/Documents/dbai/dbaistuff/py/tendb
```
Install from local wheel file
```bash
pip install /mnt/c/Users/hkver/Documents/dbai/dbaistuff/py/tendb/dist/tendb-0.0.1-py3-none-any.whl
```

Import Apache arrow

In [1]:
import pyarrow as pa
from pyarrow import csv

Import Liten

In [2]:
import liten as ten



Import Ray to be used as a cluster

In [3]:
import ray

Start a cluster with single worker.

In [4]:
ray.init(num_cpus=1)

2021-03-28 18:45:58,635	INFO services.py:1171 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8265[39m[22m


{'node_ip_address': '192.168.128.173',
 'raylet_ip_address': '192.168.128.173',
 'redis_address': '192.168.128.173:6379',
 'object_store_address': '/tmp/ray/session_2021-03-28_18-45-58_028542_852/sockets/plasma_store',
 'raylet_socket_name': '/tmp/ray/session_2021-03-28_18-45-58_028542_852/sockets/raylet',
 'webui_url': '127.0.0.1:8265',
 'session_dir': '/tmp/ray/session_2021-03-28_18-45-58_028542_852',
 'metrics_export_port': 60767,
 'node_id': 'a015d7ebd96693b8f5b7ca0bb66f58c36dafc1f8'}

In [6]:
ray.cluster_resources()

{'CPU': 1.0,
 'memory': 142.0,
 'object_store_memory': 49.0,
 'node:192.168.128.173': 1.0}

Liten wrappers for Arrow and Ray

In [64]:
import functools
from functools import wraps

DimTable = 0
FactTable = 1

class RCLiten:
    """An actor wrapper for Liten Cache"""
    
    num_table = 0
    table_name = ""
    table_type = FactTable
    
    def __init__(self):
        self.tc = ten.new_cache()
        self.table_name = "_t" + str(self.num_table);
        self.table_type = FactTable
    
    def set_table(self, table_name, table_type):
        self.table_name = table_name
        self.table_type = table_type
    
    def read_csv(self, *args, **kwargs):
        arrow_table = pa.csv.read_csv(*args, **kwargs)
        self.tc.add_table(self.table_name.encode('utf-8'), arrow_table, self.table_type)
        added_table_name = self.table_name
        self.num_table = self.num_table + 1
        self.table_name = "_t" + str(self.num_table)
        self.table_type = FactTable
        return added_table_name

    def info(self):
        return self.tc.info()

    def get_table(self, name):
        return self.tc.get_table(name)

    def make_dtensor_table(self, name):
        return self.tc.make_dtensor_table(name)

    def make_dtensor(self):
        return self.tc.make_dtensor()

    def query6(self):
        return self.tc.query6()
    
    def query5(self):
        return self.tc.query5()


Create a Liten Cache Actor. It is residing on a remote node, and being executed on that node. tc is the Liten Cache actor handle.

In [65]:
RCLiten = ray.remote(RCLiten)
tc = RCLiten.remote()



These are fact and dimension tables of TPCH. Read them remotely.

In [66]:
fact_tables = ['lineitem']
dim_tables = ['customer','orders','supplier','nation','region']
tpch_dir = '/mnt/c/Users/hkver/Documents/dbai/tpch-kit/sf1g/'

In [67]:
def read_tables(tables, table_type):
    arrow_tables = []
    for table_name in tables:
        tpch_table = tpch_dir+table_name+'.tbl'
        print('Reading ', tpch_table)
        tc.set_table.remote(table_name, table_type)
        pytable = tc.read_csv.remote(input_file=tpch_table, parse_options=csv_options)
        # print(' Rows=', pytable.num_rows,' Cols=', pytable.num_columns)
        arrow_tables.append(pytable)
    return arrow_tables

In [68]:
%%time
csv_options = pa.csv.ParseOptions(delimiter='|')
pa_fact_tables = read_tables(fact_tables, 1)
pa_dim_tables = read_tables(dim_tables, 0)

Reading  /mnt/c/Users/hkver/Documents/dbai/tpch-kit/sf1g/lineitem.tbl
Reading  /mnt/c/Users/hkver/Documents/dbai/tpch-kit/sf1g/customer.tbl
Reading  /mnt/c/Users/hkver/Documents/dbai/tpch-kit/sf1g/orders.tbl
Reading  /mnt/c/Users/hkver/Documents/dbai/tpch-kit/sf1g/supplier.tbl
Reading  /mnt/c/Users/hkver/Documents/dbai/tpch-kit/sf1g/nation.tbl
Reading  /mnt/c/Users/hkver/Documents/dbai/tpch-kit/sf1g/region.tbl
CPU times: user 25.7 ms, sys: 0 ns, total: 25.7 ms
Wall time: 14.4 ms
[2m[36m(pid=2440)[0m Added Table= b'lineitem'
[2m[36m(pid=2440)[0m Added Table= b'customer'
[2m[36m(pid=2440)[0m Added Table= b'orders'
[2m[36m(pid=2440)[0m Added Table= b'supplier'
[2m[36m(pid=2440)[0m Added Table= b'nation'
[2m[36m(pid=2440)[0m Added Table= b'region'


In [69]:
tc.info.remote()

ObjectRef(ef6d8f9f3dbed41a35d7a3b90100000001000000)

[2m[36m(pid=2440)[0m Workers=6


Read a table into TCache

In [70]:
%%time
result = tc.make_dtensor.remote()

CPU times: user 1.76 ms, sys: 702 µs, total: 2.47 ms
Wall time: 1.09 ms


Read Arrow table

In [73]:
result = tc.query6.remote()

[2m[36m(pid=2440)[0m  TPCH QUERY 6 
[2m[36m(pid=2440)[0m SELECT 
[2m[36m(pid=2440)[0m   SUM(L_EXTENDEDPRICE * L_DISCOUNT) AS REVENUE 
[2m[36m(pid=2440)[0m FROM 
[2m[36m(pid=2440)[0m   LINEITEM
[2m[36m(pid=2440)[0m WHERE
[2m[36m(pid=2440)[0m   L_SHIPDATE >= DATE '1997-01-01'
[2m[36m(pid=2440)[0m   AND L_SHIPDATE < DATE '1997-01-01' + INTERVAL '1' YEAR
[2m[36m(pid=2440)[0m   AND L_DISCOUNT BETWEEN 0.07 - 0.01 AND 0.07 + 0.01
[2m[36m(pid=2440)[0m   AND L_QUANTITY < 25;
[2m[36m(pid=2440)[0m 
[2m[36m(pid=2440)[0m Revenue= 156594095.60960016
[2m[36m(pid=2440)[0m 


In [72]:
result = tc.query5.remote()

[2m[36m(pid=2440)[0m  TPCH QUERY 6 
[2m[36m(pid=2440)[0m SELECT 
[2m[36m(pid=2440)[0m   SUM(L_EXTENDEDPRICE * L_DISCOUNT) AS REVENUE 
[2m[36m(pid=2440)[0m FROM 
[2m[36m(pid=2440)[0m   LINEITEM
[2m[36m(pid=2440)[0m WHERE
[2m[36m(pid=2440)[0m   L_SHIPDATE >= DATE '1997-01-01'
[2m[36m(pid=2440)[0m   AND L_SHIPDATE < DATE '1997-01-01' + INTERVAL '1' YEAR
[2m[36m(pid=2440)[0m   AND L_DISCOUNT BETWEEN 0.07 - 0.01 AND 0.07 + 0.01
[2m[36m(pid=2440)[0m   AND L_QUANTITY < 25;
[2m[36m(pid=2440)[0m 
[2m[36m(pid=2440)[0m Revenue= 156594095.60960016
[2m[36m(pid=2440)[0m 
[2m[36m(pid=2440)[0m  
[2m[36m(pid=2440)[0m SELECT
[2m[36m(pid=2440)[0m 	N_NAME,
[2m[36m(pid=2440)[0m 	SUM(L_EXTENDEDPRICE * (1 - L_DISCOUNT)) AS REVENUE
[2m[36m(pid=2440)[0m FROM
[2m[36m(pid=2440)[0m 	CUSTOMER,
[2m[36m(pid=2440)[0m 	ORDERS,
[2m[36m(pid=2440)[0m 	LINEITEM,
[2m[36m(pid=2440)[0m 	SUPPLIER,
[2m[36m(pid=2440)[0m 	NATION,
[2m[36m(pid=2440)[0m 	REGION

This will kill remote Liten Cache.

In [74]:
ray.kill(tc)

Shut down ray now

In [75]:
ray.shutdown()