From 696c8954bef8299235fafafa5f69ac68994b5c7f Mon Sep 17 00:00:00 2001 From: Paul Thurlow Date: Mon, 30 Mar 2026 11:40:39 -0700 Subject: [PATCH 1/4] feat(query): Add async fallback to long running queries --- src/command.rs | 18 ++++++++++-- src/main.rs | 21 ++++++++++++-- src/query.rs | 78 +++++++++++++++++++++++++++++++++++++++++++++++++- 3 files changed, 110 insertions(+), 7 deletions(-) diff --git a/src/command.rs b/src/command.rs index b6e6a41..b5acabb 100644 --- a/src/command.rs +++ b/src/command.rs @@ -25,10 +25,10 @@ pub enum Commands { command: Option, }, - /// Execute a SQL query + /// Execute a SQL query, or check status of a running query Query { - /// SQL query string - sql: String, + /// SQL query string (omit when using a subcommand) + sql: Option, /// Workspace ID (defaults to first workspace from login) #[arg(long, short = 'w')] @@ -41,6 +41,9 @@ pub enum Commands { /// Output format #[arg(long = "output", short = 'o', default_value = "table", value_parser = ["table", "json", "csv"])] output: String, + + #[command(subcommand)] + command: Option, }, /// Manage workspaces @@ -187,6 +190,15 @@ impl From for clap_complete::Shell { } } +#[derive(Subcommand)] +pub enum QueryCommands { + /// Check the status of a running query and retrieve results + Status { + /// Query run ID + id: String, + }, +} + #[derive(Subcommand)] pub enum AuthCommands { /// Remove authentication for a profile diff --git a/src/main.rs b/src/main.rs index 0756348..89a24ae 100644 --- a/src/main.rs +++ b/src/main.rs @@ -19,7 +19,7 @@ mod workspace; use anstyle::AnsiColor; use clap::{Parser, builder::Styles}; -use command::{AuthCommands, Commands, ConnectionsCommands, ConnectionsCreateCommands, DatasetsCommands, IndexesCommands, JobsCommands, QueriesCommands, ResultsCommands, SkillCommands, TablesCommands, WorkspaceCommands}; +use command::{AuthCommands, Commands, ConnectionsCommands, ConnectionsCreateCommands, DatasetsCommands, IndexesCommands, JobsCommands, QueriesCommands, QueryCommands, ResultsCommands, SkillCommands, TablesCommands, WorkspaceCommands}; #[derive(Parser)] #[command(name = "hotdata", version, about = concat!("Hotdata CLI - Command line interface for Hotdata (v", env!("CARGO_PKG_VERSION"), ")"), long_about = None, disable_version_flag = true)] @@ -109,9 +109,24 @@ fn main() { } } } - Commands::Query { sql, workspace_id, connection, output } => { + Commands::Query { sql, workspace_id, connection, output, command } => { let workspace_id = resolve_workspace(workspace_id); - query::execute(&sql, &workspace_id, connection.as_deref(), &output) + match command { + Some(QueryCommands::Status { id }) => { + query::poll(&id, &workspace_id, &output) + } + None => { + match sql { + Some(sql) => query::execute(&sql, &workspace_id, connection.as_deref(), &output), + None => { + use clap::CommandFactory; + let mut cmd = Cli::command(); + cmd.build(); + cmd.find_subcommand_mut("query").unwrap().print_help().unwrap(); + } + } + } + } } Commands::Workspaces { command } => match command { WorkspaceCommands::List { output } => workspace::list(&output), diff --git a/src/query.rs b/src/query.rs index 4cbb386..6f9e7e5 100644 --- a/src/query.rs +++ b/src/query.rs @@ -13,6 +13,20 @@ pub struct QueryResponse { pub warning: Option, } +#[derive(Deserialize)] +struct AsyncResponse { + query_run_id: String, + status: String, +} + +#[derive(Deserialize)] +struct QueryRunResponse { + query_run_id: String, + status: String, + result_id: Option, + error: Option, +} + fn value_to_string(v: &Value) -> String { match v { Value::Null => "NULL".to_string(), @@ -33,12 +47,40 @@ fn value_to_string(v: &Value) -> String { pub fn execute(sql: &str, workspace_id: &str, connection: Option<&str>, format: &str) { let api = ApiClient::new(Some(workspace_id)); - let mut body = serde_json::json!({ "sql": sql }); + let mut body = serde_json::json!({ + "sql": sql, + "async": true, + "async_after_ms": 1000, + }); if let Some(conn) = connection { body["connection_id"] = Value::String(conn.to_string()); } + let spinner = indicatif::ProgressBar::new_spinner(); + spinner.set_style( + indicatif::ProgressStyle::with_template("{spinner:.cyan} {msg}") + .unwrap(), + ); + spinner.set_message("running query..."); + spinner.enable_steady_tick(std::time::Duration::from_millis(80)); + let (status, resp_body) = api.post_raw("/query", &body); + spinner.finish_and_clear(); + + if status.as_u16() == 202 { + let async_resp: AsyncResponse = match serde_json::from_str(&resp_body) { + Ok(r) => r, + Err(e) => { + eprintln!("error parsing async response: {e}"); + std::process::exit(1); + } + }; + use crossterm::style::Stylize; + eprintln!("{}", format!("query still running (status: {})", async_resp.status).yellow()); + eprintln!("query_run_id: {}", async_resp.query_run_id); + eprintln!("{}", format!("Poll with: hotdata query status {}", async_resp.query_run_id).dark_grey()); + return; + } if !status.is_success() { let message = serde_json::from_str::(&resp_body) @@ -61,6 +103,40 @@ pub fn execute(sql: &str, workspace_id: &str, connection: Option<&str>, format: print_result(&result, format); } +/// Poll a query run by ID. If succeeded and has a result_id, fetch and display the result. +pub fn poll(query_run_id: &str, workspace_id: &str, format: &str) { + let api = ApiClient::new(Some(workspace_id)); + + let run: QueryRunResponse = api.get(&format!("/query-runs/{query_run_id}")); + + match run.status.as_str() { + "succeeded" => { + match run.result_id { + Some(ref result_id) => { + let result: QueryResponse = api.get(&format!("/results/{result_id}")); + print_result(&result, format); + } + None => { + use crossterm::style::Stylize; + println!("{}", "Query succeeded but no result available.".yellow()); + } + } + } + "failed" => { + use crossterm::style::Stylize; + let err = run.error.as_deref().unwrap_or("unknown error"); + eprintln!("{}", format!("query failed: {err}").red()); + std::process::exit(1); + } + status => { + use crossterm::style::Stylize; + eprintln!("{}", format!("query status: {status}").yellow()); + eprintln!("query_run_id: {}", run.query_run_id); + eprintln!("{}", format!("Poll again with: hotdata query {}", run.query_run_id).dark_grey()); + } + } +} + pub fn print_result(result: &QueryResponse, format: &str) { if let Some(ref warning) = result.warning { eprintln!("warning: {warning}"); From 1f3b6f5d3e914c6ea1175cd356e92125ce4c0167 Mon Sep 17 00:00:00 2001 From: Paul Thurlow Date: Mon, 30 Mar 2026 11:43:28 -0700 Subject: [PATCH 2/4] fix query_runs/id response parsing --- src/query.rs | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/query.rs b/src/query.rs index 6f9e7e5..35c69dd 100644 --- a/src/query.rs +++ b/src/query.rs @@ -21,9 +21,10 @@ struct AsyncResponse { #[derive(Deserialize)] struct QueryRunResponse { - query_run_id: String, + id: String, status: String, result_id: Option, + #[serde(default)] error: Option, } @@ -131,8 +132,8 @@ pub fn poll(query_run_id: &str, workspace_id: &str, format: &str) { status => { use crossterm::style::Stylize; eprintln!("{}", format!("query status: {status}").yellow()); - eprintln!("query_run_id: {}", run.query_run_id); - eprintln!("{}", format!("Poll again with: hotdata query {}", run.query_run_id).dark_grey()); + eprintln!("query_run_id: {}", run.id); + eprintln!("{}", format!("Poll again with: hotdata query {}", run.id).dark_grey()); } } } From 0e9e71efbbb6dc6523bef512a3a6e86b3d536b00 Mon Sep 17 00:00:00 2001 From: Paul Thurlow Date: Mon, 30 Mar 2026 12:18:16 -0700 Subject: [PATCH 3/4] fix hint text --- src/query.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/query.rs b/src/query.rs index 35c69dd..c7cd5d4 100644 --- a/src/query.rs +++ b/src/query.rs @@ -133,7 +133,7 @@ pub fn poll(query_run_id: &str, workspace_id: &str, format: &str) { use crossterm::style::Stylize; eprintln!("{}", format!("query status: {status}").yellow()); eprintln!("query_run_id: {}", run.id); - eprintln!("{}", format!("Poll again with: hotdata query {}", run.id).dark_grey()); + eprintln!("{}", format!("Poll again with: hotdata query status {}", run.id).dark_grey()); } } } From b86193437bf7843df41e568cc635ae79c78d0927 Mon Sep 17 00:00:00 2001 From: Paul Thurlow Date: Mon, 30 Mar 2026 12:30:52 -0700 Subject: [PATCH 4/4] Add to readme and add error code for pending --- README.md | 26 +++++++++++++++++++------- skills/hotdata-cli/SKILL.md | 35 ++++++++++++++++++++++++----------- src/command.rs | 3 ++- src/query.rs | 3 ++- 4 files changed, 47 insertions(+), 20 deletions(-) diff --git a/README.md b/README.md index 9ce9fd6..505ca76 100644 --- a/README.md +++ b/README.md @@ -138,11 +138,15 @@ hotdata datasets create --url "https://example.com/data.parquet" --label "My Dat ## Query ```sh -hotdata query "" [--workspace-id ] [--connection ] [--format table|json|csv] +hotdata query "" [-w ] [--connection ] [-o table|json|csv] +hotdata query status [-o table|json|csv] ``` -- Default format is `table`, which prints results with row count and execution time. +- Default output is `table`, which prints results with row count and execution time. - Use `--connection` to scope the query to a specific connection. +- Long-running queries automatically fall back to async execution and return a `query_run_id`. +- Use `hotdata query status ` to poll for results. +- Exit codes for `query status`: `0` = succeeded, `1` = failed, `2` = still running (poll again). ## Saved Queries @@ -163,13 +167,21 @@ hotdata queries run [--format table|json|csv] ## Search ```sh -hotdata search "" --table --column [--select ] [--limit ] [--format table|json|csv] +# BM25 full-text search +hotdata search "query text" --table --column [--select ] [--limit ] [-o table|json|csv] + +# Vector search with --model (calls OpenAI to embed the query) +hotdata search "query text" --table --column --model text-embedding-3-small [--limit ] + +# Vector search with piped embedding +echo '[0.1, -0.2, ...]' | hotdata search --table
--column [--limit ] ``` -- Full-text search using BM25 across a table column. -- Requires a BM25 index on the target column (see `indexes create`). -- Results are ordered by relevance score (descending). -- `--select` specifies which columns to return (comma-separated, defaults to all). The `score` column is automatically appended when `--select` is used. +- Without `--model` and with query text: BM25 full-text search. Requires a BM25 index on the target column. +- With `--model`: generates an embedding via OpenAI and performs vector search using `l2_distance`. Requires `OPENAI_API_KEY` env var. +- Without query text and with piped stdin: reads a vector (raw JSON array or OpenAI embedding response) and performs vector search. +- BM25 results are ordered by relevance score (descending). Vector results are ordered by distance (ascending). +- `--select` specifies which columns to return (comma-separated, defaults to all). ## Indexes diff --git a/skills/hotdata-cli/SKILL.md b/skills/hotdata-cli/SKILL.md index 50ac800..095393d 100644 --- a/skills/hotdata-cli/SKILL.md +++ b/skills/hotdata-cli/SKILL.md @@ -163,16 +163,21 @@ Use `hotdata datasets ` to look up the `table_name` before writing q ### Execute SQL Query ``` -hotdata query "" [--workspace-id ] [--connection ] [--format table|json|csv] +hotdata query "" [-w ] [--connection ] [-o table|json|csv] +hotdata query status [-o table|json|csv] ``` -- Default format is `table`, which prints results with row count and execution time. +- Default output is `table`, which prints results with row count and execution time. - Use `--connection` to scope the query to a specific connection. - Use `hotdata tables list` to discover tables and columns — do not query `information_schema` directly. - **Always use PostgreSQL dialect SQL.** +- Long-running queries automatically fall back to async execution and return a `query_run_id`. +- Use `hotdata query status ` to poll for results. +- Exit codes for `query status`: `0` = succeeded, `1` = failed, `2` = still running (poll again). +- **When a query returns a `query_run_id`, use `query status` to poll rather than re-running the query.** ### Get Query Result ``` -hotdata results [--workspace-id ] [--format table|json|csv] +hotdata results [-w ] [-o table|json|csv] ``` - Retrieves a previously executed query result by its result ID. - Query results include a `result-id` in the footer (e.g. `[result-id: rslt...]`). @@ -195,23 +200,31 @@ hotdata queries run [--format table|json|csv] ### Search ``` -hotdata search "" --table --column [--select ] [--limit ] [--format table|json|csv] +# BM25 full-text search +hotdata search "query text" --table --column [--select ] [--limit ] [-o table|json|csv] + +# Vector search with --model (calls OpenAI to embed the query) +hotdata search "query text" --table
--column --model text-embedding-3-small [--limit ] + +# Vector search with piped embedding +echo '[0.1, -0.2, ...]' | hotdata search --table
--column [--limit ] ``` -- Full-text search using BM25 across a table column. -- Requires a BM25 index on the target column (see `indexes create`). -- Results are ordered by relevance score (descending). -- `--select` specifies which columns to return (comma-separated, defaults to all). The `score` column is automatically appended when `--select` is used. +- Without `--model` and with query text: BM25 full-text search. Requires a BM25 index on the target column. +- With `--model`: generates an embedding via OpenAI and performs vector search using `l2_distance`. Requires `OPENAI_API_KEY` env var. Supported models: `text-embedding-3-small`, `text-embedding-3-large`. +- Without query text and with piped stdin: reads a vector (raw JSON array or OpenAI embedding response) and performs vector search. +- BM25 results are ordered by relevance score (descending). Vector results are ordered by distance (ascending). +- `--select` specifies which columns to return (comma-separated, defaults to all). - Default limit is 10. +- **For BM25 search, create a BM25 index on the target column first. For vector search, create a vector index.** ### Indexes ``` -hotdata indexes list --connection-id --schema --table
[--workspace-id ] [--format table|json|yaml] -hotdata indexes create --connection-id --schema --table
--name --columns [--type sorted|bm25|vector] [--metric l2|cosine|dot] [--async] +hotdata indexes list -c --schema --table
[-w ] [-o table|json|yaml] +hotdata indexes create -c --schema --table
--name --columns [--type sorted|bm25|vector] [--metric l2|cosine|dot] [--async] ``` - `list` shows indexes on a table with name, type, columns, status, and creation date. - `create` creates an index. Use `--type bm25` for full-text search, `--type vector` for vector search (requires `--metric`). - `--async` submits index creation as a background job. Use `hotdata jobs ` to check status. -- **Before using `hotdata search`, create a BM25 index on the target column.** ### Jobs ``` diff --git a/src/command.rs b/src/command.rs index b5acabb..3f2fba0 100644 --- a/src/command.rs +++ b/src/command.rs @@ -192,7 +192,8 @@ impl From for clap_complete::Shell { #[derive(Subcommand)] pub enum QueryCommands { - /// Check the status of a running query and retrieve results + /// Check the status of a running query and retrieve results. + /// Exit codes: 0 = succeeded, 1 = failed, 2 = still running (poll again) Status { /// Query run ID id: String, diff --git a/src/query.rs b/src/query.rs index c7cd5d4..9c4f609 100644 --- a/src/query.rs +++ b/src/query.rs @@ -80,7 +80,7 @@ pub fn execute(sql: &str, workspace_id: &str, connection: Option<&str>, format: eprintln!("{}", format!("query still running (status: {})", async_resp.status).yellow()); eprintln!("query_run_id: {}", async_resp.query_run_id); eprintln!("{}", format!("Poll with: hotdata query status {}", async_resp.query_run_id).dark_grey()); - return; + std::process::exit(2); } if !status.is_success() { @@ -134,6 +134,7 @@ pub fn poll(query_run_id: &str, workspace_id: &str, format: &str) { eprintln!("{}", format!("query status: {status}").yellow()); eprintln!("query_run_id: {}", run.id); eprintln!("{}", format!("Poll again with: hotdata query status {}", run.id).dark_grey()); + std::process::exit(2); } } }