# Import libraries

In [4]:
import gzip
import requests
import json
import time

# Data Compression

In [10]:
def main():
    table_name = 'example'
    table_csv = "/Users/jafarabdurrahmaan/Dropbox/00_Uni/Masterthesis/A_Thesis/1_Components/13_Benchmark_Study_Components/2_TSDB/QuestDB/iter2/energy.csv"
    schema = json.dumps([
        {'name': 'energy_date', 'type': 'DATE', 'pattern': 'yyyy-MM-dd'},
        {'name': 'LCLid', 'type': 'SYMBOL'},
        {'name': 'energy_median', 'type': 'DOUBLE'},
        {'name': 'energy_mean', 'type': 'DOUBLE'},
        {'name': 'energy_max', 'type': 'DOUBLE'},
        {'name': 'energy_count', 'type': 'DOUBLE'},
        {'name': 'energy_std', 'type': 'DOUBLE'},
        {'name': 'energy_sum', 'type': 'DOUBLE'},
        {'name': 'energy_min', 'type': 'DOUBLE'}])

    start_time = time.time()
    
    response = requests.post(
        'http://localhost:9001/imp',
        params={'fmt': 'csv'},
        files={
            'schema': schema,
            'data': open(table_csv, 'rb')})
    
    end_time = time.time()

    if response.ok:
        print('CSV file uploaded successfully.')
        print('Execution time: {:.2f} seconds'.format(end_time - start_time))
    else:
        print('Error uploading CSV file:')
        print(response.content.decode())

if __name__ == '__main__':
    main()

CSV file uploaded successfully.
Execution time: 0.38 seconds


# Define query function

In [11]:
import sys
import requests

host = 'http://localhost:9001'

def run_query(sql_query):
    query_params = {'query': sql_query, 'fmt' : 'json'}
    try:
        response = requests.get(host + '/exec', params=query_params)
        json_response = json.loads(response.text)
        print(json_response)
    except requests.exceptions.RequestException as e:
        print(f'Error: {e}', file=sys.stderr)

In [12]:
run_query("RENAME TABLE 'energy.csv' TO 'energy_symbol'")

{'ddl': 'OK'}


# Simple Retrieval

## First Query Workload

In [43]:
# Measure the execution time
start_time = time.time()

run_query("SELECT energy_min, energy_max FROM 'energy_symbol' WHERE LCLid = 'MAC000131' AND energy_date BETWEEN '2012-06-01' AND '2013-06-01'")

# Calculate the execution time
execution_time = time.time() - start_time

# Print the execution time
print(f"Query Execution Time: {execution_time:.2f} seconds")

{'query': "SELECT energy_min, energy_max FROM 'energy_symbol' WHERE LCLid = 'MAC000131' AND energy_date BETWEEN '2012-06-01' AND '2013-06-01'", 'columns': [{'name': 'energy_min', 'type': 'DOUBLE'}, {'name': 'energy_max', 'type': 'DOUBLE'}], 'dataset': [[0.064, 0.312], [0.065, 0.299], [0.064, 0.782999999999], [0.066, 1.1619999], [0.065, 0.742], [0.064, 0.305], [0.063, 1.2359999], [0.063, 1.108], [0.063, 0.369], [0.064, 0.792999999999], [0.066, 0.814], [0.062, 0.727], [0.064, 0.749], [0.063, 1.392], [0.063, 0.704], [0.063, 0.506], [0.064, 1.007], [0.063, 0.874], [0.063, 1.084], [0.064, 0.677], [0.064, 0.862], [0.064, 0.286], [0.066, 0.800999999999], [0.065, 0.687], [0.064, 0.737], [0.065, 0.718], [0.064, 0.313], [0.067, 0.695], [0.064, 0.757], [0.067, 0.839], [0.066, 0.342], [0.061, 0.659], [0.058, 0.904], [0.06, 0.295], [0.059, 0.307], [0.062, 0.302], [0.061, 0.263], [0.068, 0.847], [0.063, 0.841], [0.058, 0.877], [0.059, 0.68], [0.062, 0.78], [0.059, 0.729], [0.06, 0.735], [0.067, 0.28

## Second Query Workload

In [45]:
# Measure the execution time
start_time = time.time()

run_query("SELECT energy_min, energy_max FROM 'energy_symbol' WHERE LCLid = 'MAC000131' AND energy_date BETWEEN '2012-06-01' AND '2013-06-01'")

# Calculate the execution time
execution_time = time.time() - start_time

# Print the execution time
print(f"Query Execution Time: {execution_time:.2f} seconds")

{'query': "SELECT energy_min, energy_max FROM 'energy_symbol' WHERE LCLid = 'MAC000131' AND energy_date BETWEEN '2012-06-01' AND '2013-06-01'", 'columns': [{'name': 'energy_min', 'type': 'DOUBLE'}, {'name': 'energy_max', 'type': 'DOUBLE'}], 'dataset': [[0.064, 0.312], [0.065, 0.299], [0.064, 0.782999999999], [0.066, 1.1619999], [0.065, 0.742], [0.064, 0.305], [0.063, 1.2359999], [0.063, 1.108], [0.063, 0.369], [0.064, 0.792999999999], [0.066, 0.814], [0.062, 0.727], [0.064, 0.749], [0.063, 1.392], [0.063, 0.704], [0.063, 0.506], [0.064, 1.007], [0.063, 0.874], [0.063, 1.084], [0.064, 0.677], [0.064, 0.862], [0.064, 0.286], [0.066, 0.800999999999], [0.065, 0.687], [0.064, 0.737], [0.065, 0.718], [0.064, 0.313], [0.067, 0.695], [0.064, 0.757], [0.067, 0.839], [0.066, 0.342], [0.061, 0.659], [0.058, 0.904], [0.06, 0.295], [0.059, 0.307], [0.062, 0.302], [0.061, 0.263], [0.068, 0.847], [0.063, 0.841], [0.058, 0.877], [0.059, 0.68], [0.062, 0.78], [0.059, 0.729], [0.06, 0.735], [0.067, 0.28

## Third Query Workload

In [46]:
# Measure the execution time
start_time = time.time()

run_query("SELECT energy_min, energy_max FROM 'energy_symbol' WHERE LCLid = 'MAC000131' AND energy_date BETWEEN '2012-06-01' AND '2013-06-01'")

# Calculate the execution time
execution_time = time.time() - start_time

# Print the execution time
print(f"Query Execution Time: {execution_time:.2f} seconds")

{'query': "SELECT energy_min, energy_max FROM 'energy_symbol' WHERE LCLid = 'MAC000131' AND energy_date BETWEEN '2012-06-01' AND '2013-06-01'", 'columns': [{'name': 'energy_min', 'type': 'DOUBLE'}, {'name': 'energy_max', 'type': 'DOUBLE'}], 'dataset': [[0.064, 0.312], [0.065, 0.299], [0.064, 0.782999999999], [0.066, 1.1619999], [0.065, 0.742], [0.064, 0.305], [0.063, 1.2359999], [0.063, 1.108], [0.063, 0.369], [0.064, 0.792999999999], [0.066, 0.814], [0.062, 0.727], [0.064, 0.749], [0.063, 1.392], [0.063, 0.704], [0.063, 0.506], [0.064, 1.007], [0.063, 0.874], [0.063, 1.084], [0.064, 0.677], [0.064, 0.862], [0.064, 0.286], [0.066, 0.800999999999], [0.065, 0.687], [0.064, 0.737], [0.065, 0.718], [0.064, 0.313], [0.067, 0.695], [0.064, 0.757], [0.067, 0.839], [0.066, 0.342], [0.061, 0.659], [0.058, 0.904], [0.06, 0.295], [0.059, 0.307], [0.062, 0.302], [0.061, 0.263], [0.068, 0.847], [0.063, 0.841], [0.058, 0.877], [0.059, 0.68], [0.062, 0.78], [0.059, 0.729], [0.06, 0.735], [0.067, 0.28

## Overall Execution Time for Simple Retrieval

In [47]:
print(f"Average Query Execution Time: {((0.03+0.03+0.03)/3)*1000} milliseconds")

Average Query Execution Time: 30.0 milliseconds


# Filter and Aggregation

## First Query Workload

In [55]:
# Measure the execution time
start_time = time.time()

run_query("SELECT LCLid, AVG(energy_sum) as sum, AVG(energy_median) as median FROM 'energy_symbol' WHERE energy_date BETWEEN '2012-06-01' AND '2013-06-01' GROUP BY LCLid ORDER BY LCLid")

# Calculate the execution time
execution_time = time.time() - start_time

# Print the execution time
print(f"Query Execution Time: {execution_time:.2f} seconds")

{'query': "SELECT LCLid, AVG(energy_sum) as sum, AVG(energy_median) as median FROM 'energy_symbol' WHERE energy_date BETWEEN '2012-06-01' AND '2013-06-01' GROUP BY LCLid ORDER BY LCLid", 'columns': [{'name': 'LCLid', 'type': 'SYMBOL'}, {'name': 'sum', 'type': 'DOUBLE'}, {'name': 'median', 'type': 'DOUBLE'}], 'dataset': [['MAC000131', 10.087418031967, 0.127491803278], ['MAC000132', 14.368434429234, 0.211061475409], ['MAC000221', 13.435918031967, 0.175685792349], ['MAC000228', 7.397885246721, 0.110952185792], ['MAC000234', 11.512909836065, 0.203515027322], ['MAC000235', 4.175286885519, 0.0796830601092]], 'timestamp': -1, 'count': 6}
Query Execution Time: 0.04 seconds


## Second Query Workload

In [56]:
# Measure the execution time
start_time = time.time()

run_query("SELECT LCLid, AVG(energy_sum) as sum, AVG(energy_median) as median FROM 'energy_symbol' WHERE energy_date BETWEEN '2012-06-01' AND '2013-06-01' GROUP BY LCLid ORDER BY LCLid")

# Calculate the execution time
execution_time = time.time() - start_time

# Print the execution time
print(f"Query Execution Time: {execution_time:.2f} seconds")

{'query': "SELECT LCLid, AVG(energy_sum) as sum, AVG(energy_median) as median FROM 'energy_symbol' WHERE energy_date BETWEEN '2012-06-01' AND '2013-06-01' GROUP BY LCLid ORDER BY LCLid", 'columns': [{'name': 'LCLid', 'type': 'SYMBOL'}, {'name': 'sum', 'type': 'DOUBLE'}, {'name': 'median', 'type': 'DOUBLE'}], 'dataset': [['MAC000131', 10.087418031967, 0.127491803278], ['MAC000132', 14.368434429234, 0.211061475409], ['MAC000221', 13.435918031967, 0.175685792349], ['MAC000228', 7.397885246721, 0.110952185792], ['MAC000234', 11.512909836065, 0.203515027322], ['MAC000235', 4.175286885519, 0.0796830601092]], 'timestamp': -1, 'count': 6}
Query Execution Time: 0.06 seconds


## Third Query Workload

In [57]:
# Measure the execution time
start_time = time.time()

run_query("SELECT LCLid, AVG(energy_sum) as sum, AVG(energy_median) as median FROM 'energy_symbol' WHERE energy_date BETWEEN '2012-06-01' AND '2013-06-01' GROUP BY LCLid ORDER BY LCLid")

# Calculate the execution time
execution_time = time.time() - start_time

# Print the execution time
print(f"Query Execution Time: {execution_time:.2f} seconds")

{'query': "SELECT LCLid, AVG(energy_sum) as sum, AVG(energy_median) as median FROM 'energy_symbol' WHERE energy_date BETWEEN '2012-06-01' AND '2013-06-01' GROUP BY LCLid ORDER BY LCLid", 'columns': [{'name': 'LCLid', 'type': 'SYMBOL'}, {'name': 'sum', 'type': 'DOUBLE'}, {'name': 'median', 'type': 'DOUBLE'}], 'dataset': [['MAC000131', 10.087418031967, 0.127491803278], ['MAC000132', 14.368434429234, 0.211061475409], ['MAC000221', 13.435918031967, 0.175685792349], ['MAC000228', 7.397885246721, 0.110952185792], ['MAC000234', 11.512909836065, 0.203515027322], ['MAC000235', 4.175286885519, 0.0796830601092]], 'timestamp': -1, 'count': 6}
Query Execution Time: 0.07 seconds


## Overall Execution Time for Filter and Aggregation

In [58]:
print(f"Average Query Execution Time: {((0.04+0.06+0.07)/3)*1000} milliseconds")

Average Query Execution Time: 56.66666666666667 milliseconds


# Metadata

In [61]:
run_query("SELECT size_pretty(sum(diskSize)) FROM table_partitions('energy_symbol')")

{'query': "SELECT size_pretty(sum(diskSize)) FROM table_partitions('energy_symbol')", 'columns': [{'name': 'size_pretty', 'type': 'STRING'}], 'dataset': [['360.0 KiB']], 'timestamp': -1, 'count': 1}
