In [1]:
!python3 -m pip install --force-reinstall sql_sidekick-0.1.7-py3-none-any.whl

Processing ./sql_sidekick-0.1.7-py3-none-any.whl
Collecting numpy<2.0.0,>=1.21.2
  Using cached numpy-1.24.4-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (17.3 MB)
Collecting pandasql==0.7.3
  Using cached pandasql-0.7.3-py3-none-any.whl
Collecting click<9.0.0,>=8.0.1
  Using cached click-8.1.7-py3-none-any.whl (97 kB)
Collecting loguru<0.8.0,>=0.7.0
  Using cached loguru-0.7.2-py3-none-any.whl (62 kB)
Collecting toml<0.11.0,>=0.10.2
  Using cached toml-0.10.2-py2.py3-none-any.whl (16 kB)
Collecting psycopg2-binary<3.0.0,>=2.9.6
  Using cached psycopg2_binary-2.9.9-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.0 MB)
Collecting bitsandbytes==0.41.0
  Using cached bitsandbytes-0.41.0-py3-none-any.whl (92.6 MB)
Collecting sqlalchemy-utils<0.42.0,>=0.41.1
  Using cached SQLAlchemy_Utils-0.41.1-py3-none-any.whl (92 kB)
Collecting accelerate==0.21.0
  Using cached accelerate-0.21.0-py3-none-any.whl (244 kB)
Collecting sentence-transformers<3.0.0,>=2.2.2
  Using cach

In [2]:
import gc
import json
import logging
import os
from pathlib import Path
from typing import List, Optional

import openai
import toml
import torch

In [3]:
from sidekick.prompter import (data_preview, db_setup, ask,
                               recommend_suggestions)
from sidekick.schema_generator import generate_schema
from sidekick.query import SQLGenerator
from sidekick.utils import (MODEL_CHOICE_MAP_DEFAULT,
                            MODEL_CHOICE_MAP_EVAL_MODE, TASK_CHOICE,
                            get_table_keys, save_query, setup_dir,
                            update_tables)

In [4]:
base_path = "./"
cache_path = f"{base_path}/var/lib/tmp"
setup_dir(base_path)

In [5]:
# Setup the DB for a single table (WIP to extend to multiple tables)

HOST_NAME = "localhost"
USER_NAME = "sqlite"
PASSWORD = "abc"
DB_NAME = "querydb"
PORT = "5432"


# Given .csv file, auto-generate schema
data_path = "./sleep_health_and_lifestyle_dataset.csv"
table_name = "sleep_health_demo"

r, table_info_path = generate_schema(data_path, f"{cache_path}/{table_name}_table_info.jsonl")

llm_response, err = db_setup(
                db_name=DB_NAME,
                hostname=HOST_NAME,
                user_name=USER_NAME,
                password=PASSWORD,
                port=PORT,
                table_info_path=table_info_path,
                table_samples_path=data_path,
                table_name=table_name,
                local_base_path=base_path
            )

 Information supplied:
 querydb, localhost, sqlite, abc, 5432


[32m2023-11-29 22:24:24.183[0m | [34m[1mDEBUG   [0m | [36msidekick.db_config[0m:[36m__init__[0m:[36m42[0m - [34m[1mCreating SQLite DB: sqlite:////home/pramit/testing/q/apps/system/sql-sidekick/db/sqlite/querydb.db[0m


Database already exists!
Table name: sleep_health_2


[32m2023-11-29 22:24:24.189[0m | [34m[1mDEBUG   [0m | [36msidekick.db_config[0m:[36m_extract_schema_info[0m:[36m100[0m - [34m[1mSchema path: /home/pramit/testing/q/apps/system/sql-sidekick/var/lib/tmp/sleep_health_2_table_info.jsonl[0m
[32m2023-11-29 22:24:24.191[0m | [34m[1mDEBUG   [0m | [36msidekick.db_config[0m:[36mcreate_table[0m:[36m145[0m - [34m[1mSchema info used for creating table:
 Person_ID NUMERIC,
Gender TEXT COLLATE NOCASE,
Age NUMERIC,
Occupation TEXT COLLATE NOCASE,
Sleep_Duration NUMERIC,
Quality_of_Sleep NUMERIC,
Physical_Activity_Level NUMERIC,
Stress_Level NUMERIC,
BMI_Category TEXT COLLATE NOCASE,
Blood_Pressure TEXT COLLATE NOCASE,
Heart_Rate NUMERIC,
Daily_Steps NUMERIC,
Sleep_Disorder TEXT COLLATE NOCASE[0m


Checked table sleep_health_2 exists in the DB.


[32m2023-11-29 22:24:24.197[0m | [34m[1mDEBUG   [0m | [36msidekick.db_config[0m:[36madd_samples[0m:[36m179[0m - [34m[1mAdding sample values to table: /home/pramit/testing/q/apps/system/sql-sidekick/examples/demo/sleep_health_and_lifestyle_dataset.csv[0m
[32m2023-11-29 22:24:24.202[0m | [34m[1mDEBUG   [0m | [36msidekick.db_config[0m:[36madd_samples[0m:[36m185[0m - [34m[1mInserting chunk: 0[0m
[32m2023-11-29 22:24:24.492[0m | [1mINFO    [0m | [36msidekick.db_config[0m:[36madd_samples[0m:[36m190[0m - [1mData inserted into table: sleep_health_2[0m
[32m2023-11-29 22:24:24.495[0m | [1mINFO    [0m | [36msidekick.db_config[0m:[36madd_samples[0m:[36m195[0m - [1mNumber of rows inserted: 1122[0m


Created a Database querydb. Inserted sample values from /home/pramit/testing/q/apps/system/sql-sidekick/examples/demo/sleep_health_and_lifestyle_dataset.csv into table sleep_health_2, please ask questions!


In [None]:
def query(question: str, table_info_path: str, sample_qna_path: str):
    """Asks question and returns SQL."""
    base_path = "./"
    ask(
        question=question,
        table_info_path=table_info_path,
        sample_queries_path=sample_qna_path,
        table_name=None,
        is_command=False,
        model_name="h2ogpt-sql-sqlcoder2",
        local_base_path=base_path
    )

In [13]:
# Zero-shot prompting
query("What is the average sleep duration for each gender?", table_info_path=table_info_path, sample_qna_path=None)

[32m2023-11-29 22:28:56.382[0m | [1mINFO    [0m | [36msidekick.prompter[0m:[36mask[0m:[36m421[0m - [1mTable in use: ['sleep_health_2'][0m
[32m2023-11-29 22:28:56.384[0m | [1mINFO    [0m | [36msidekick.prompter[0m:[36mask[0m:[36m453[0m - [1mSetting context...[0m
[32m2023-11-29 22:28:56.385[0m | [1mINFO    [0m | [36msidekick.prompter[0m:[36mask[0m:[36m454[0m - [1mQuestion: What is the average sleep duration for each gender?[0m
[32m2023-11-29 22:28:56.387[0m | [34m[1mDEBUG   [0m | [36msidekick.utils[0m:[36mis_resource_low[0m:[36m325[0m - [34m[1mInformation on device: 0[0m
[32m2023-11-29 22:28:56.388[0m | [1mINFO    [0m | [36msidekick.utils[0m:[36mis_resource_low[0m:[36m328[0m - [1mTotal Memory: 23GB[0m
[32m2023-11-29 22:28:56.390[0m | [1mINFO    [0m | [36msidekick.utils[0m:[36mis_resource_low[0m:[36m329[0m - [1mFree GPU memory: 12GB[0m
[32m2023-11-29 22:28:56.412[0m | [1mINFO    [0m | [36msidekick.utils[0m:[3

The query results are:
 [('Female', 7.229729729729707), ('Male', 7.03650793650794)]
Exiting...
