From 74e39ce8ba21d2d60749df3da8746a3591b31be8 Mon Sep 17 00:00:00 2001 From: Eddie A Tejeda <669988+eddietejeda@users.noreply.github.com> Date: Tue, 19 May 2026 17:54:26 -0700 Subject: [PATCH 1/2] feat(indexes,databases): dot-bracket notation for indexes create; dot notation for databases load - `hotdata indexes create airbnb.listings[col1,col2] --type bm25` replaces verbose --connection-id/--schema/--table/--columns flags - `hotdata databases load airbnb.listings --url ...` replaces `hotdata databases tables load airbnb listings --url ...` - `--name` on indexes create is now optional (auto-derived from table+cols+type) - Add `connections::resolve_connection_id` to look up connection by name or ID - Remove load hint from `databases create` success output Co-Authored-By: Claude Sonnet 4.6 --- src/command.rs | 56 +++++++++++------ src/connections.rs | 20 ++++++ src/databases.rs | 4 -- src/main.rs | 152 +++++++++++++++++++++++++++++++++++++-------- 4 files changed, 183 insertions(+), 49 deletions(-) diff --git a/src/command.rs b/src/command.rs index 341cb4a..b528f45 100644 --- a/src/command.rs +++ b/src/command.rs @@ -320,34 +320,31 @@ pub enum IndexesCommands { output: String, }, - /// Create an index on a table or dataset + /// Create an index on a table or dataset. /// - /// Pass either connection scope (--connection-id + --schema + --table) OR - /// dataset scope (--dataset-id), not both. + /// For connection-scoped indexes, pass the table and columns using bracket notation: + /// `connection.table[col1,col2]` or `connection.schema.table[col1,col2]` + /// (schema defaults to `public` when omitted) + /// + /// For dataset-scoped indexes, use `--dataset-id` with `--columns`. Create { - /// Connection ID (use with --schema and --table) - #[arg(long, short = 'c', conflicts_with = "dataset_id", requires_all = ["schema", "table"])] - connection_id: Option, - - /// Schema name (requires --connection-id) - #[arg(long, requires = "connection_id")] - schema: Option, - - /// Table name (requires --connection-id) - #[arg(long, requires = "connection_id")] - table: Option, + /// Table and columns to index: `connection.table[col1,col2]` + /// or `connection.schema.table[col1,col2]`. Schema defaults to `public`. + #[arg(conflicts_with = "dataset_id")] + target: Option, - /// Dataset ID (alternative scope to --connection-id) - #[arg(long, conflicts_with_all = ["connection_id", "schema", "table"])] + /// Dataset ID (alternative scope to the positional target) + #[arg(long, conflicts_with = "target")] dataset_id: Option, - /// Index name + /// Columns to index (comma-separated). Required with --dataset-id; + /// for connection scope use bracket notation in the target instead. #[arg(long)] - name: String, + columns: Option, - /// Columns to index (comma-separated). Vector indexes accept exactly one column. + /// Index name (derived from table, columns, and type if omitted) #[arg(long)] - columns: String, + name: Option, /// Index type — required (no default; choose deliberately) #[arg(long, value_parser = ["sorted", "bm25", "vector"])] @@ -580,6 +577,25 @@ pub enum DatabasesCommands { name_or_id: String, }, + /// Load a parquet file into a table using dot notation: `database.table` or `database.schema.table` + Load { + /// Table to load into: `database.table` or `database.schema.table`. + /// Schema defaults to `public` when omitted. + target: String, + + /// Path to a local parquet file to upload and load + #[arg(long, conflicts_with_all = ["upload_id", "url"])] + file: Option, + + /// URL of a remote parquet file to download and load + #[arg(long, conflicts_with_all = ["file", "upload_id"])] + url: Option, + + /// Use a previously staged upload ID from `POST /v1/files` instead of uploading + #[arg(long, conflicts_with_all = ["file", "url"])] + upload_id: Option, + }, + /// Manage tables inside a managed database Tables { #[command(subcommand)] diff --git a/src/connections.rs b/src/connections.rs index fcf011e..b486d83 100644 --- a/src/connections.rs +++ b/src/connections.rs @@ -157,6 +157,26 @@ struct ListResponse { connections: Vec, } +/// Resolve a connection name or ID to a connection ID, exiting on failure. +pub fn resolve_connection_id(api: &ApiClient, name_or_id: &str) -> String { + let body: ListResponse = api.get("/connections"); + match body + .connections + .iter() + .find(|c| c.id == name_or_id || c.name == name_or_id) + { + Some(conn) => conn.id.clone(), + None => { + use crossterm::style::Stylize; + eprintln!( + "{}", + format!("error: no connection named or with id '{name_or_id}'").red() + ); + std::process::exit(1); + } + } +} + pub fn get(workspace_id: &str, connection_id: &str, format: &str) { let api = ApiClient::new(Some(workspace_id)); let is_table = format == "table"; diff --git a/src/databases.rs b/src/databases.rs index e57bc6d..1ee73da 100644 --- a/src/databases.rs +++ b/src/databases.rs @@ -405,10 +405,6 @@ pub fn create(workspace_id: &str, name: &str, schema: &str, tables: &[String], f println!("{}", "Database created".green()); println!("name: {}", result.name); println!("id: {}", result.id); - println!( - "load: hotdata databases tables load {} --file ./data.parquet", - result.name - ); } _ => unreachable!(), } diff --git a/src/main.rs b/src/main.rs index aa07b0a..54c49a3 100644 --- a/src/main.rs +++ b/src/main.rs @@ -396,6 +396,23 @@ fn main() { Some(DatabasesCommands::Delete { name_or_id }) => { databases::delete(&workspace_id, &name_or_id) } + Some(DatabasesCommands::Load { + target, + file, + url, + upload_id, + }) => { + let (database, schema, table) = parse_db_target(&target); + databases::tables_load( + &workspace_id, + &database, + &table, + Some(schema.as_str()), + file.as_deref(), + url.as_deref(), + upload_id.as_deref(), + ) + } Some(DatabasesCommands::Tables { command }) => match command { DatabaseTablesCommands::List { database, @@ -565,12 +582,10 @@ fn main() { &output, ), IndexesCommands::Create { - connection_id, - schema, - table, + target, dataset_id, - name, columns, + name, r#type, metric, r#async, @@ -579,32 +594,64 @@ fn main() { output_column, description, } => { - let scope = match ( - dataset_id.as_deref(), - connection_id.as_deref(), - schema.as_deref(), - table.as_deref(), - ) { - (Some(did), _, _, _) => indexes::IndexScope::Dataset { dataset_id: did }, - (None, Some(cid), Some(sch), Some(tbl)) => { - indexes::IndexScope::Connection { - connection_id: cid, - schema: sch, - table: tbl, + let api = api::ApiClient::new(Some(&workspace_id)); + let (scope, resolved_columns, auto_name) = + match (target.as_deref(), dataset_id.as_deref()) { + (Some(tgt), None) => { + let (conn_name, schema, table, cols) = + parse_index_target(tgt); + let conn_id = + connections::resolve_connection_id(&api, &conn_name); + let auto = format!( + "{table}_{cols}_{type}", + cols = cols.join("_"), + type = r#type + ); + ( + (conn_id, schema, table), + cols.join(","), + auto, + ) } - } - _ => { - eprintln!( - "error: provide either --dataset-id or all three of --connection-id, --schema, --table" - ); - std::process::exit(1); + (None, Some(did)) => { + let cols = + columns.as_deref().unwrap_or_else(|| { + eprintln!( + "error: --columns is required with --dataset-id" + ); + std::process::exit(1); + }); + let auto = format!("dataset_{type}", type = r#type); + ( + (did.to_string(), String::new(), String::new()), + cols.to_string(), + auto, + ) + } + _ => { + eprintln!( + "error: provide either (e.g. airbnb.listings[col1,col2]) or --dataset-id with --columns" + ); + std::process::exit(1); + } + }; + let index_name = name.unwrap_or(auto_name); + let is_dataset = dataset_id.is_some(); + let (conn_id, schema, table) = scope; + let resolved_scope = if is_dataset { + indexes::IndexScope::Dataset { dataset_id: &conn_id } + } else { + indexes::IndexScope::Connection { + connection_id: &conn_id, + schema: &schema, + table: &table, } }; indexes::create( &workspace_id, - scope, - &name, - &columns, + resolved_scope, + &index_name, + &resolved_columns, &r#type, metric.as_deref(), r#async, @@ -911,6 +958,61 @@ fn main() { } } +/// Parse a database target like `airbnb.listings` or `airbnb.public.listings` +/// into `(database, schema, table)`. Schema defaults to `public`. +fn parse_db_target(target: &str) -> (String, String, String) { + let parts: Vec<&str> = target.splitn(4, '.').collect(); + match parts.as_slice() { + [db, tbl] => (db.to_string(), "public".to_string(), tbl.to_string()), + [db, schema, tbl] => (db.to_string(), schema.to_string(), tbl.to_string()), + _ => { + eprintln!( + "error: target must be 'database.table' or 'database.schema.table'" + ); + std::process::exit(1); + } + } +} + +/// Parse an index target like `airbnb.listings[col1,col2]` or +/// `airbnb.public.listings[col1,col2]` into `(conn_name, schema, table, columns)`. +/// Schema defaults to `public` when only two dot-parts are given. +fn parse_index_target(target: &str) -> (String, String, String, Vec) { + let Some(bracket_pos) = target.find('[') else { + eprintln!( + "error: target must include columns in brackets, e.g. airbnb.listings[col1,col2]" + ); + std::process::exit(1); + }; + let table_part = &target[..bracket_pos]; + let cols_raw = target[bracket_pos + 1..].trim_end_matches(']'); + + let parts: Vec<&str> = table_part.splitn(4, '.').collect(); + let (conn, schema, table) = match parts.as_slice() { + [c, t] => (c.to_string(), "public".to_string(), t.to_string()), + [c, s, t] => (c.to_string(), s.to_string(), t.to_string()), + _ => { + eprintln!( + "error: target must be 'connection.table[cols]' or 'connection.schema.table[cols]'" + ); + std::process::exit(1); + } + }; + + let columns: Vec = cols_raw + .split(',') + .map(|s| s.trim().to_string()) + .filter(|s| !s.is_empty()) + .collect(); + + if columns.is_empty() { + eprintln!("error: no columns specified in brackets"); + std::process::exit(1); + } + + (conn, schema, table, columns) +} + pub fn get_styles() -> clap::builder::Styles { Styles::styled() .header(AnsiColor::Yellow.on_default()) From 52642e2aa1d0febcb240a99b2dc7355cb9e4461c Mon Sep 17 00:00:00 2001 From: Eddie A Tejeda <669988+eddietejeda@users.noreply.github.com> Date: Tue, 19 May 2026 18:01:19 -0700 Subject: [PATCH 2/2] fix: address review feedback on dot-bracket notation - Dataset auto-name now includes columns (dataset_{cols}_{type}) to avoid collision - Validate bracket closure in parse_index_target; reject unclosed/garbled brackets - Add quoting note to indexes create doc comment for shell glob safety - resolve_connection_id tries GET /connections/{id} directly for conn* prefixed IDs - Add unit tests for parse_db_target and parse_index_target (6 tests) Co-Authored-By: Claude Sonnet 4.6 --- src/command.rs | 3 ++ src/connections.rs | 14 +++++++++- src/main.rs | 70 ++++++++++++++++++++++++++++++++++++++++++++-- 3 files changed, 84 insertions(+), 3 deletions(-) diff --git a/src/command.rs b/src/command.rs index b528f45..a92193a 100644 --- a/src/command.rs +++ b/src/command.rs @@ -330,6 +330,9 @@ pub enum IndexesCommands { Create { /// Table and columns to index: `connection.table[col1,col2]` /// or `connection.schema.table[col1,col2]`. Schema defaults to `public`. + /// + /// Quote the argument to prevent shell glob expansion: + /// `hotdata indexes create 'airbnb.listings[description]' --type bm25` #[arg(conflicts_with = "dataset_id")] target: Option, diff --git a/src/connections.rs b/src/connections.rs index b486d83..135663f 100644 --- a/src/connections.rs +++ b/src/connections.rs @@ -158,7 +158,20 @@ struct ListResponse { } /// Resolve a connection name or ID to a connection ID, exiting on failure. +/// +/// If `name_or_id` looks like a raw connection ID (starts with "conn"), tries +/// `GET /connections/{id}` directly first to avoid listing the full workspace. +/// Falls back to listing and matching by name on a 404 or when given a plain name. pub fn resolve_connection_id(api: &ApiClient, name_or_id: &str) -> String { + use crossterm::style::Stylize; + + if name_or_id.starts_with("conn") { + let (status, _) = api.get_raw(&format!("/connections/{name_or_id}")); + if status.is_success() { + return name_or_id.to_string(); + } + } + let body: ListResponse = api.get("/connections"); match body .connections @@ -167,7 +180,6 @@ pub fn resolve_connection_id(api: &ApiClient, name_or_id: &str) -> String { { Some(conn) => conn.id.clone(), None => { - use crossterm::style::Stylize; eprintln!( "{}", format!("error: no connection named or with id '{name_or_id}'").red() diff --git a/src/main.rs b/src/main.rs index 54c49a3..48f4569 100644 --- a/src/main.rs +++ b/src/main.rs @@ -621,7 +621,11 @@ fn main() { ); std::process::exit(1); }); - let auto = format!("dataset_{type}", type = r#type); + let auto = format!( + "dataset_{cols}_{type}", + cols = cols.replace(',', "_"), + type = r#type + ); ( (did.to_string(), String::new(), String::new()), cols.to_string(), @@ -984,8 +988,14 @@ fn parse_index_target(target: &str) -> (String, String, String, Vec) { ); std::process::exit(1); }; + if !target.ends_with(']') { + eprintln!( + "error: target bracket is not closed — use e.g. 'airbnb.listings[col1,col2]'" + ); + std::process::exit(1); + } let table_part = &target[..bracket_pos]; - let cols_raw = target[bracket_pos + 1..].trim_end_matches(']'); + let cols_raw = &target[bracket_pos + 1..target.len() - 1]; let parts: Vec<&str> = table_part.splitn(4, '.').collect(); let (conn, schema, table) = match parts.as_slice() { @@ -1013,6 +1023,62 @@ fn parse_index_target(target: &str) -> (String, String, String, Vec) { (conn, schema, table, columns) } +#[cfg(test)] +mod tests { + use super::*; + + // --- parse_db_target --- + + #[test] + fn db_target_two_parts_defaults_schema_to_public() { + let (db, schema, table) = parse_db_target("airbnb.listings"); + assert_eq!(db, "airbnb"); + assert_eq!(schema, "public"); + assert_eq!(table, "listings"); + } + + #[test] + fn db_target_three_parts_uses_explicit_schema() { + let (db, schema, table) = parse_db_target("airbnb.staging.listings"); + assert_eq!(db, "airbnb"); + assert_eq!(schema, "staging"); + assert_eq!(table, "listings"); + } + + // --- parse_index_target --- + + #[test] + fn index_target_two_parts_defaults_schema_to_public() { + let (conn, schema, table, cols) = parse_index_target("airbnb.listings[description]"); + assert_eq!(conn, "airbnb"); + assert_eq!(schema, "public"); + assert_eq!(table, "listings"); + assert_eq!(cols, vec!["description"]); + } + + #[test] + fn index_target_three_parts_uses_explicit_schema() { + let (conn, schema, table, cols) = + parse_index_target("airbnb.public.listings[name,description]"); + assert_eq!(conn, "airbnb"); + assert_eq!(schema, "public"); + assert_eq!(table, "listings"); + assert_eq!(cols, vec!["name", "description"]); + } + + #[test] + fn index_target_multiple_columns() { + let (_, _, _, cols) = parse_index_target("db.tbl[a,b,c]"); + assert_eq!(cols, vec!["a", "b", "c"]); + } + + #[test] + fn index_target_trims_column_whitespace() { + let (_, _, _, cols) = parse_index_target("db.tbl[a, b]"); + assert_eq!(cols, vec!["a", "b"]); + } +} + pub fn get_styles() -> clap::builder::Styles { Styles::styled() .header(AnsiColor::Yellow.on_default())