In [None]:
# https://github.com/h2oai/sql-sidekick/releases
#!pip install --force-reinstall sql_sidekick-0.x.x-py3-none-any.whl

In [None]:
%load_ext autoreload
%autoreload 2

In [19]:
from sidekick import prompter

In [20]:
prompter.__version__

'0.1.9'

In [21]:
import logging
import sys

logging.basicConfig(stream=sys.stdout, level=logging.INFO)
logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))

In [22]:
from sidekick.prompter import db_setup, ask
from sidekick.schema_generator import generate_schema
from sidekick.utils import setup_dir, list_db_dialects

In [41]:
base_path = "./"
cache_path = f"{base_path}/var/lib/tmp"
setup_dir(base_path)

In [42]:
list_db_dialects()

['bigquery',
 'clickhouse',
 'duckdb',
 'hive',
 'mysql',
 'oracle',
 'postgres',
 'presto',
 'redshift',
 'snowflake',
 'spark',
 'spark2',
 'sqlite',
 'starrocks',
 'tableau',
 'trino',
 'tsql',
 'databricks',
 'drill',
 'teradata']

In [43]:
# env variables
import os

os.environ['DATABRICKS_HOST'] = "<>.cloud.databricks.com" # e.g. <>.cloud.databricks.com
os.environ['DATABRICKS_CLUSTER_ID'] = "0112-211956-zzzzz" # e.g. 0232-316956-tsltsyz5
os.environ['DATABRICKS_TOKEN'] = ""
os.environ['OPENAI_API_KEY'] = ""

os.environ['H2O_BASE_MODEL_URL'] = 'http://38.128.233.247'
os.environ['H2O_BASE_MODEL_API_KEY'] = ""
os.environ['RECOMMENDATION_MODEL_REMOTE_URL'] = "https://h2ogpte.genai.h2o.ai"  # e.g. https://<>.h2ogpte.h2o.ai
os.environ['RECOMMENDATION_MODEL_API_KEY'] = ""

# Scenario 1

In [None]:
# 1. New dataset for QnA
# 2. Generate SQL syntax, but no access to Databricks cluster for runtime validation.

HOST_NAME = "localhost"
USER_NAME = "sqlite"
PASSWORD = "abc"
DB_NAME = "querydb"
PORT = "5432"


# Given .csv file, auto-generate schema
# Download dataset --> https://www.kaggle.com/datasets/uom190346a/sleep-health-and-lifestyle-dataset
# Adjust data_path as needed.
data_path = "./examples/demo/sleep_health_and_lifestyle_dataset.csv"
table_name = "sleep_health_eda_demo"

table_schema, table_info_path = generate_schema(data_path=data_path, output_path=f"{cache_path}/{table_name}_table_info.jsonl")

# Set add_sample=False if no need to add rows to the table
response , err = db_setup(
                db_name=DB_NAME,
                hostname=HOST_NAME,
                user_name=USER_NAME,
                password=PASSWORD,
                port=PORT,
                table_info_path=table_info_path,
                table_samples_path=data_path,
                table_name=table_name,
                local_base_path=base_path,
                add_sample=True
            )

In [45]:
table_info_path

'.//var/lib/tmp/sleep_health_eda_demo_table_info.jsonl'

In [46]:
table_schema

[{'Column Name': 'Person_ID', 'Column Type': 'NUMERIC'},
 {'Column Name': 'Gender',
  'Column Type': 'TEXT',
  'Sample Values': ['Male', 'Female']},
 {'Column Name': 'Age', 'Column Type': 'NUMERIC'},
 {'Column Name': 'Occupation',
  'Column Type': 'TEXT',
  'Sample Values': ['Accountant',
   'Salesperson',
   'Engineer',
   'Software Engineer',
   'Nurse',
   'Teacher',
   'Manager',
   'Lawyer',
   'Doctor',
   'Sales Representative']},
 {'Column Name': 'Sleep_Duration', 'Column Type': 'NUMERIC'},
 {'Column Name': 'Quality_of_Sleep', 'Column Type': 'NUMERIC'},
 {'Column Name': 'Physical_Activity_Level', 'Column Type': 'NUMERIC'},
 {'Column Name': 'Stress_Level', 'Column Type': 'NUMERIC'},
 {'Column Name': 'BMI_Category',
  'Column Type': 'TEXT',
  'Sample Values': ['Overweight', 'Normal', 'Obese', 'Normal Weight']},
 {'Column Name': 'Blood_Pressure', 'Column Type': 'TEXT'},
 {'Column Name': 'Heart_Rate', 'Column Type': 'NUMERIC'},
 {'Column Name': 'Daily_Steps', 'Column Type': 'NUMERI

In [47]:
# One could also decide to pass pre-configured table schema via for table creation
# Schema format:

# [{'Column Name': 'Person_ID', 'Column Type': 'NUMERIC'},
#  {'Column Name': 'Gender',
#   'Column Type': 'TEXT',
#   'Sample Values': ['Male', 'Female']},
#  {'Column Name': 'Age', 'Column Type': 'NUMERIC'},
#  {'Column Name': 'Occupation',
#   'Column Type': 'TEXT',
#   'Sample Values': ['Accountant',
#    'Manager',
#    'Sales Representative',
#    'Salesperson',
#    'Lawyer',
#    'Nurse',
#    'Teacher',
#    'Software Engineer',
#    'Doctor',
#    'Scientist']},
#  {'Column Name': 'Sleep_Duration', 'Column Type': 'NUMERIC'},
#  {'Column Name': 'Quality_of_Sleep', 'Column Type': 'NUMERIC'},
#  {'Column Name': 'Physical_Activity_Level', 'Column Type': 'NUMERIC'},
#  {'Column Name': 'Stress_Level', 'Column Type': 'NUMERIC'},
#  {'Column Name': 'BMI_Category',
#   'Column Type': 'TEXT',
#   'Sample Values': ['Overweight', 'Normal', 'Obese', 'Normal Weight']},
#  {'Column Name': 'Blood_Pressure', 'Column Type': 'TEXT'},
#  {'Column Name': 'Heart_Rate', 'Column Type': 'NUMERIC'},
#  {'Column Name': 'Daily_Steps', 'Column Type': 'NUMERIC'},
#  {'Column Name': 'Sleep_Disorder',
#   'Column Type': 'TEXT',
#   'Sample Values': ['None', 'Sleep Apnea', 'Insomnia']}]




# response, err = db_setup(
#                 db_name=DB_NAME,
#                 hostname=HOST_NAME,
#                 user_name=USER_NAME,
#                 password=PASSWORD,
#                 port=PORT,
#                 table_name=table_name,
#                 table_schema = r,
#                 local_base_path=base_path,
#                 add_sample=False
#             )

In [48]:
def query(question: str, table_name, table_info_path: str, sample_qna_path: str, regenerate=False, regenerate_with_options=False):
    """Asks question and returns SQL."""
    base_path = "."
    # self_correction is enabled by default, set to False if not needed.
    res = ask(
        question=question,
        table_info_path=table_info_path,
        sample_queries_path=sample_qna_path,
        table_name=table_name,
        db_dialect="databricks",
        is_command=False,
        model_name="h2ogpt-sql-sqlcoder-34b-alpha",
        is_regenerate=regenerate,
        is_regen_with_options=regenerate_with_options,
        execute_query=False,
        local_base_path=base_path,
    )
    sql_str = None
    if res:
        _tmp = res[0][1].split("```")[1].replace("sql", '')
        sql_str = ' '.join(_tmp.split())
    return sql_str

In [49]:
res = query("What is the average sleep duration for each gender?", table_name=table_name, 
            table_info_path=table_info_path, sample_qna_path=None, regenerate=False)

[32m2024-01-19 09:35:33.851[0m | [1mINFO    [0m | [36msidekick.prompter[0m:[36mask[0m:[36m493[0m - [1mTable in use: ['sleep_health_eda_demo'][0m
[32m2024-01-19 09:35:33.852[0m | [1mINFO    [0m | [36msidekick.prompter[0m:[36mask[0m:[36m494[0m - [1mSQL dialect for generation: databricks[0m
[32m2024-01-19 09:35:33.854[0m | [1mINFO    [0m | [36msidekick.prompter[0m:[36mask[0m:[36m527[0m - [1mSetting context...[0m
[32m2024-01-19 09:35:33.855[0m | [1mINFO    [0m | [36msidekick.prompter[0m:[36mask[0m:[36m528[0m - [1mQuestion: What is the average sleep duration for each gender?[0m
[32m2024-01-19 09:35:33.856[0m | [34m[1mDEBUG   [0m | [36msidekick.prompter[0m:[36mask[0m:[36m546[0m - [34m[1mTable info path: .//var/lib/tmp/sleep_health_eda_demo_table_info.jsonl[0m
[32m2024-01-19 09:35:33.857[0m | [1mINFO    [0m | [36msidekick.utils[0m:[36mis_resource_low[0m:[36m353[0m - [1mNumber of GPUs: 1[0m
[32m2024-01-19 09:35:33.857[

Fetching 13 files:   0%|          | 0/13 [00:00<?, ?it/s]

model.safetensors:   0%|          | 0.00/438M [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/438M [00:00<?, ?B/s]

1_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

.gitattributes:   0%|          | 0.00/1.52k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/719 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/90.1k [00:00<?, ?B/s]

modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/366 [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/52.0 [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/711k [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/125 [00:00<?, ?B/s]

INFO:sentence_transformers.SentenceTransformer:Load pretrained SentenceTransformer: ./models/sentence_transformers/models--BAAI--bge-base-en/snapshots/17d124a4b773c4c9248ca816b0b0901e3c49a243/
Load pretrained SentenceTransformer: ./models/sentence_transformers/models--BAAI--bge-base-en/snapshots/17d124a4b773c4c9248ca816b0b0901e3c49a243/
Load pretrained SentenceTransformer: ./models/sentence_transformers/models--BAAI--bge-base-en/snapshots/17d124a4b773c4c9248ca816b0b0901e3c49a243/


[32m2024-01-19 09:35:42.681[0m | [1mINFO    [0m | [36msidekick.utils[0m:[36m_check_file_info[0m:[36m467[0m - [1mUsing information info from path .//var/lib/tmp/sleep_health_eda_demo_table_info.jsonl[0m
[32m2024-01-19 09:35:42.682[0m | [1mINFO    [0m | [36msidekick.prompter[0m:[36mask[0m:[36m596[0m - [1mComputing user request ...[0m
[32m2024-01-19 09:35:42.759[0m | [34m[1mDEBUG   [0m | [36msidekick.utils[0m:[36msemantic_search[0m:[36m153[0m - [34m[1mInput questions: # query: what is the average sleep duration for each gender?[0m


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

[32m2024-01-19 09:35:42.913[0m | [34m[1mDEBUG   [0m | [36msidekick.utils[0m:[36msemantic_search[0m:[36m158[0m - [34m[1mProbable context: ["if patterns like 'current time' or 'now' occurs in question", "if patterns like 'total number', or 'list' occurs in question", 'detailed summary', 'summary'][0m


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

[32m2024-01-19 09:35:42.948[0m | [34m[1mDEBUG   [0m | [36msidekick.utils[0m:[36msemantic_search[0m:[36m166[0m - [34m[1mSimilarity score for: if patterns like 'current time' or 'now' occurs in question: 0.8459207869447033[0m
[32m2024-01-19 09:35:42.950[0m | [34m[1mDEBUG   [0m | [36msidekick.utils[0m:[36msemantic_search[0m:[36m166[0m - [34m[1mSimilarity score for: if patterns like 'total number', or 'list' occurs in question: 0.8319947353454415[0m
[32m2024-01-19 09:35:42.952[0m | [34m[1mDEBUG   [0m | [36msidekick.utils[0m:[36msemantic_search[0m:[36m166[0m - [34m[1mSimilarity score for: detailed summary: 0.8346069603076574[0m
[32m2024-01-19 09:35:42.953[0m | [34m[1mDEBUG   [0m | [36msidekick.utils[0m:[36msemantic_search[0m:[36m166[0m - [34m[1mSimilarity score for: summary: 0.8394152180082535[0m
[32m2024-01-19 09:35:42.954[0m | [34m[1mDEBUG   [0m | [36msidekick.utils[0m:[36msemantic_search[0m:[36m182[0m - [34m[1mSorted co

INFO:httpx:HTTP Request: POST http://38.128.233.247:5000/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST http://38.128.233.247:5000/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST http://38.128.233.247:5000/v1/chat/completions "HTTP/1.1 200 OK"


[32m2024-01-19 09:35:45.369[0m | [34m[1mDEBUG   [0m | [36msidekick.query[0m:[36mgenerate_sql[0m:[36m618[0m - [34m[1mGenerated tokens: 
Gender, AVG(Sleep_Duration) AS Average_Sleep_Duration FROM sleep_health_eda_demo GROUP BY Gender ORDER BY Gender DESC;
[0m
[32m2024-01-19 09:35:45.375[0m | [1mINFO    [0m | [36msidekick.prompter[0m:[36mask[0m:[36m601[0m - [1mInput query: What is the average sleep duration for each gender?[0m
[32m2024-01-19 09:35:45.377[0m | [1mINFO    [0m | [36msidekick.prompter[0m:[36mask[0m:[36m602[0m - [1mGenerated response:

SELECT `Gender`, AVG(`Sleep_Duration`) AS `Average_Sleep_Duration` FROM `sleep_health_eda_demo` GROUP BY `Gender` ORDER BY `Gender` DESC LIMIT 100[0m
[32m2024-01-19 09:35:45.384[0m | [1mINFO    [0m | [36msidekick.prompter[0m:[36mask[0m:[36m628[0m - [1mAlternate responses:

[][0m


Exiting...


In [50]:
res

'SELECT `Gender`, AVG(`Sleep_Duration`) AS `Average_Sleep_Duration` FROM `sleep_health_eda_demo` GROUP BY `Gender` ORDER BY `Gender` DESC LIMIT 100'

# Scenario 2

In [51]:
# 1. Connect to pre-configured Catalog/Schema on Databricks cluster
# 2. Retrieve information
# 3. Generate SQL syntax, execute against configured DB for the final result

In [52]:
# Retrieve information
from sidekick.db_config import DBConfig

DBConfig.dialect = "databricks"
# Using a demo dataset from Databricks Catalog
config_args = {
    "catalog": "samples",
    "schema": "nyctaxi",
    "cluster_id": os.environ.get("DATABRICKS_CLUSTER_ID")
}
table_name = "trips" # sample table related to NYC Taxi dataset
DBConfig.table_name = table_name
column_info, table_info_path = DBConfig.get_column_info(output_path=f"{cache_path}/{table_name}_table_info.jsonl", **config_args)

INFO:databricks.sql.client:Successfully opened session 89def2e0-902d-4230-91f3-0b2f8404e628
Successfully opened session 89def2e0-902d-4230-91f3-0b2f8404e628
Successfully opened session 89def2e0-902d-4230-91f3-0b2f8404e628


In [53]:
input_q = "Compute average trip distance"
result, _, error = ask(
        question=input_q,
        table_info_path=table_info_path,
        sample_queries_path=None,
        table_name=table_name,
        is_command=False,
        model_name="h2ogpt-sql-sqlcoder-34b-alpha",
        db_dialect="databricks",
        execute_db_dialect="databricks",
        is_regenerate=False,
        is_regen_with_options=False,
        execute_query=True,
        local_base_path=base_path,
        debug_mode=False,
        guardrails=True,
        self_correction=True
    )

[32m2024-01-19 09:36:07.870[0m | [1mINFO    [0m | [36msidekick.prompter[0m:[36mask[0m:[36m493[0m - [1mTable in use: ['trips'][0m
[32m2024-01-19 09:36:07.871[0m | [1mINFO    [0m | [36msidekick.prompter[0m:[36mask[0m:[36m494[0m - [1mSQL dialect for generation: databricks[0m
[32m2024-01-19 09:36:07.873[0m | [1mINFO    [0m | [36msidekick.prompter[0m:[36mask[0m:[36m527[0m - [1mSetting context...[0m
[32m2024-01-19 09:36:07.874[0m | [1mINFO    [0m | [36msidekick.prompter[0m:[36mask[0m:[36m528[0m - [1mQuestion: Compute average trip distance[0m
[32m2024-01-19 09:36:07.875[0m | [34m[1mDEBUG   [0m | [36msidekick.prompter[0m:[36mask[0m:[36m546[0m - [34m[1mTable info path: .//var/lib/tmp/trips_table_info.jsonl[0m
[32m2024-01-19 09:36:07.876[0m | [1mINFO    [0m | [36msidekick.utils[0m:[36mis_resource_low[0m:[36m353[0m - [1mNumber of GPUs: 1[0m
[32m2024-01-19 09:36:07.877[0m | [34m[1mDEBUG   [0m | [36msidekick.utils[0m:

Fetching 13 files:   0%|          | 0/13 [00:00<?, ?it/s]

INFO:sentence_transformers.SentenceTransformer:Load pretrained SentenceTransformer: .//models/sentence_transformers/models--BAAI--bge-base-en/snapshots/17d124a4b773c4c9248ca816b0b0901e3c49a243/
Load pretrained SentenceTransformer: .//models/sentence_transformers/models--BAAI--bge-base-en/snapshots/17d124a4b773c4c9248ca816b0b0901e3c49a243/
Load pretrained SentenceTransformer: .//models/sentence_transformers/models--BAAI--bge-base-en/snapshots/17d124a4b773c4c9248ca816b0b0901e3c49a243/
INFO:databricks.sql.client:Successfully opened session 874a4b41-e623-4e85-88fe-9122124ac500
Successfully opened session 874a4b41-e623-4e85-88fe-9122124ac500
Successfully opened session 874a4b41-e623-4e85-88fe-9122124ac500


[32m2024-01-19 09:36:11.779[0m | [1mINFO    [0m | [36msidekick.utils[0m:[36m_check_file_info[0m:[36m467[0m - [1mUsing information info from path .//var/lib/tmp/trips_table_info.jsonl[0m
[32m2024-01-19 09:36:11.781[0m | [1mINFO    [0m | [36msidekick.prompter[0m:[36mask[0m:[36m596[0m - [1mComputing user request ...[0m
[32m2024-01-19 09:36:13.043[0m | [34m[1mDEBUG   [0m | [36msidekick.utils[0m:[36msemantic_search[0m:[36m153[0m - [34m[1mInput questions: # query: compute average trip distance[0m


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

[32m2024-01-19 09:36:13.210[0m | [34m[1mDEBUG   [0m | [36msidekick.utils[0m:[36msemantic_search[0m:[36m158[0m - [34m[1mProbable context: ["if patterns like 'current time' or 'now' occurs in question", "if patterns like 'total number', or 'list' occurs in question", 'detailed summary', 'summary'][0m


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

[32m2024-01-19 09:36:13.244[0m | [34m[1mDEBUG   [0m | [36msidekick.utils[0m:[36msemantic_search[0m:[36m166[0m - [34m[1mSimilarity score for: if patterns like 'current time' or 'now' occurs in question: 0.8562923618568069[0m
[32m2024-01-19 09:36:13.247[0m | [34m[1mDEBUG   [0m | [36msidekick.utils[0m:[36msemantic_search[0m:[36m166[0m - [34m[1mSimilarity score for: if patterns like 'total number', or 'list' occurs in question: 0.8734165297755163[0m
[32m2024-01-19 09:36:13.248[0m | [34m[1mDEBUG   [0m | [36msidekick.utils[0m:[36msemantic_search[0m:[36m166[0m - [34m[1mSimilarity score for: detailed summary: 0.8827507006525596[0m
[32m2024-01-19 09:36:13.250[0m | [34m[1mDEBUG   [0m | [36msidekick.utils[0m:[36msemantic_search[0m:[36m166[0m - [34m[1mSimilarity score for: summary: 0.892255611828783[0m
[32m2024-01-19 09:36:13.251[0m | [34m[1mDEBUG   [0m | [36msidekick.utils[0m:[36msemantic_search[0m:[36m182[0m - [34m[1mSorted con

INFO:httpx:HTTP Request: POST http://38.128.233.247:5000/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST http://38.128.233.247:5000/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST http://38.128.233.247:5000/v1/chat/completions "HTTP/1.1 200 OK"


[32m2024-01-19 09:36:14.569[0m | [34m[1mDEBUG   [0m | [36msidekick.query[0m:[36mgenerate_sql[0m:[36m618[0m - [34m[1mGenerated tokens:  AVG(trip_distance) AS average_trip_distance FROM trips;[0m
[32m2024-01-19 09:36:14.572[0m | [1mINFO    [0m | [36msidekick.prompter[0m:[36mask[0m:[36m601[0m - [1mInput query: Compute average trip distance[0m
[32m2024-01-19 09:36:14.574[0m | [1mINFO    [0m | [36msidekick.prompter[0m:[36mask[0m:[36m602[0m - [1mGenerated response:

SELECT AVG(`trip_distance`) AS `average_trip_distance` FROM `trips` LIMIT 100[0m
[32m2024-01-19 09:36:14.579[0m | [1mINFO    [0m | [36msidekick.prompter[0m:[36mask[0m:[36m628[0m - [1mAlternate responses:

[][0m
[32m2024-01-19 09:36:14.581[0m | [1mINFO    [0m | [36msidekick.prompter[0m:[36mask[0m:[36m641[0m - [1mChecking for vulnerabilities in the provided SQL: SELECT AVG(`trip_distance`) AS `average_trip_distance` FROM `trips` LIMIT 100[0m
[32m2024-01-19 09:36:14.600

Please install the correct version of H2OGPTE with `pip install h2ogpte==1.3.0-dev3`.
You can enable strict version checking by passing strict_version_check=True.
INFO:databricks.sql.client:Successfully opened session 2371a48a-b7f8-4497-b844-68fa41ec06f2
Successfully opened session 2371a48a-b7f8-4497-b844-68fa41ec06f2
Successfully opened session 2371a48a-b7f8-4497-b844-68fa41ec06f2


[32m2024-01-19 09:36:30.379[0m | [34m[1mDEBUG   [0m | [36msidekick.db_config[0m:[36mexecute_query[0m:[36m268[0m - [34m[1mExecuting query:
 SELECT AVG("trip_distance") AS "average_trip_distance" FROM "trips" LIMIT 100[0m
[32m2024-01-19 09:36:30.558[0m | [1mINFO    [0m | [36msidekick.db_config[0m:[36mexecute_query[0m:[36m291[0m - [1mError occurred : (databricks.sql.exc.ServerOperationError) 
[PARSE_SYNTAX_ERROR] Syntax error at or near '"average_trip_distance"'.(line 1, pos 31)

== SQL ==
SELECT AVG("trip_distance") AS "average_trip_distance" FROM "trips" LIMIT 100
-------------------------------^^^

[SQL: SELECT AVG("trip_distance") AS "average_trip_distance" FROM "trips" LIMIT 100]
(Background on this error at: https://sqlalche.me/e/20/4xp6)[0m


INFO:databricks.sql.client:Closing session 2371a48a-b7f8-4497-b844-68fa41ec06f2
Closing session 2371a48a-b7f8-4497-b844-68fa41ec06f2
Closing session 2371a48a-b7f8-4497-b844-68fa41ec06f2


[32m2024-01-19 09:36:30.689[0m | [1mINFO    [0m | [36msidekick.prompter[0m:[36mask[0m:[36m666[0m - [1mAttempting to auto-correct the query...[0m
[32m2024-01-19 09:36:30.690[0m | [34m[1mDEBUG   [0m | [36msidekick.prompter[0m:[36mask[0m:[36m669[0m - [34m[1mAttempt: 1[0m


Please install the correct version of H2OGPTE with `pip install h2ogpte==1.3.0-dev3`.
You can enable strict version checking by passing strict_version_check=True.
INFO:databricks.sql.client:Successfully opened session 856311f5-20ca-45a9-b58d-b3ea0a427ecc
Successfully opened session 856311f5-20ca-45a9-b58d-b3ea0a427ecc
Successfully opened session 856311f5-20ca-45a9-b58d-b3ea0a427ecc


[32m2024-01-19 09:36:48.508[0m | [34m[1mDEBUG   [0m | [36msidekick.db_config[0m:[36mexecute_query[0m:[36m268[0m - [34m[1mExecuting query:
 SELECT AVG(`trip_distance`) AS `average_trip_distance` FROM `trips` LIMIT 100[0m


INFO:databricks.sql.client:Closing session 856311f5-20ca-45a9-b58d-b3ea0a427ecc
Closing session 856311f5-20ca-45a9-b58d-b3ea0a427ecc
Closing session 856311f5-20ca-45a9-b58d-b3ea0a427ecc
The query results are:
 [(2.8528291993434256,)]
Exiting...


In [None]:
?ask

In [54]:
result

['**Generated response for question,**\nCompute average trip distance\n',
 '``` sql\nSELECT AVG(`trip_distance`) AS `average_trip_distance`\nFROM `trips`\nLIMIT 100\n```\n\n',
 '\n',
 '**Result:** \n',
 '[(2.8528291993434256,)]',
 '\n']

In [55]:
# One can make further changes and re-execute the query against the configured Database as required.
DBConfig.execute_query("SELECT SUM(`trip_distance`) AS `average_trip_distance`\nFROM `trips`\nLIMIT 1")

INFO:databricks.sql.client:Successfully opened session 84ea4e01-818d-4f1c-a4a2-7dbb94cd646a
Successfully opened session 84ea4e01-818d-4f1c-a4a2-7dbb94cd646a
Successfully opened session 84ea4e01-818d-4f1c-a4a2-7dbb94cd646a


[32m2024-01-19 09:37:58.015[0m | [34m[1mDEBUG   [0m | [36msidekick.db_config[0m:[36mexecute_query[0m:[36m268[0m - [34m[1mExecuting query:
 SELECT SUM(`trip_distance`) AS `average_trip_distance`
FROM `trips`
LIMIT 1[0m


INFO:databricks.sql.client:Closing session 84ea4e01-818d-4f1c-a4a2-7dbb94cd646a
Closing session 84ea4e01-818d-4f1c-a4a2-7dbb94cd646a
Closing session 84ea4e01-818d-4f1c-a4a2-7dbb94cd646a


([(62568.24999999994,)],
 'SELECT SUM(`trip_distance`) AS `average_trip_distance`\nFROM `trips`\nLIMIT 1')