-
Notifications
You must be signed in to change notification settings - Fork 0
feat: dot notation for indexes create and databases load #92
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -320,34 +320,34 @@ pub enum IndexesCommands { | |
| output: String, | ||
| }, | ||
|
|
||
| /// Create an index on a table or dataset | ||
| /// Create an index on a table or dataset. | ||
| /// | ||
| /// Pass either connection scope (--connection-id + --schema + --table) OR | ||
| /// dataset scope (--dataset-id), not both. | ||
| /// For connection-scoped indexes, pass the table and columns using bracket notation: | ||
| /// `connection.table[col1,col2]` or `connection.schema.table[col1,col2]` | ||
| /// (schema defaults to `public` when omitted) | ||
| /// | ||
| /// For dataset-scoped indexes, use `--dataset-id` with `--columns`. | ||
| Create { | ||
| /// Connection ID (use with --schema and --table) | ||
| #[arg(long, short = 'c', conflicts_with = "dataset_id", requires_all = ["schema", "table"])] | ||
| connection_id: Option<String>, | ||
|
|
||
| /// Schema name (requires --connection-id) | ||
| #[arg(long, requires = "connection_id")] | ||
| schema: Option<String>, | ||
|
|
||
| /// Table name (requires --connection-id) | ||
| #[arg(long, requires = "connection_id")] | ||
| table: Option<String>, | ||
| /// Table and columns to index: `connection.table[col1,col2]` | ||
| /// or `connection.schema.table[col1,col2]`. Schema defaults to `public`. | ||
| /// | ||
| /// Quote the argument to prevent shell glob expansion: | ||
| /// `hotdata indexes create 'airbnb.listings[description]' --type bm25` | ||
| #[arg(conflicts_with = "dataset_id")] | ||
| target: Option<String>, | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit: README.md is now out of sync with this command — lines 252–261 still document |
||
|
|
||
| /// Dataset ID (alternative scope to --connection-id) | ||
| #[arg(long, conflicts_with_all = ["connection_id", "schema", "table"])] | ||
| /// Dataset ID (alternative scope to the positional target) | ||
| #[arg(long, conflicts_with = "target")] | ||
| dataset_id: Option<String>, | ||
|
|
||
| /// Index name | ||
| /// Columns to index (comma-separated). Required with --dataset-id; | ||
| /// for connection scope use bracket notation in the target instead. | ||
| #[arg(long)] | ||
| name: String, | ||
| columns: Option<String>, | ||
|
|
||
| /// Columns to index (comma-separated). Vector indexes accept exactly one column. | ||
| /// Index name (derived from table, columns, and type if omitted) | ||
| #[arg(long)] | ||
| columns: String, | ||
| name: Option<String>, | ||
|
|
||
| /// Index type — required (no default; choose deliberately) | ||
| #[arg(long, value_parser = ["sorted", "bm25", "vector"])] | ||
|
|
@@ -580,6 +580,25 @@ pub enum DatabasesCommands { | |
| name_or_id: String, | ||
| }, | ||
|
|
||
| /// Load a parquet file into a table using dot notation: `database.table` or `database.schema.table` | ||
| Load { | ||
| /// Table to load into: `database.table` or `database.schema.table`. | ||
| /// Schema defaults to `public` when omitted. | ||
| target: String, | ||
|
|
||
| /// Path to a local parquet file to upload and load | ||
| #[arg(long, conflicts_with_all = ["upload_id", "url"])] | ||
| file: Option<String>, | ||
|
|
||
| /// URL of a remote parquet file to download and load | ||
| #[arg(long, conflicts_with_all = ["file", "upload_id"])] | ||
| url: Option<String>, | ||
|
|
||
| /// Use a previously staged upload ID from `POST /v1/files` instead of uploading | ||
| #[arg(long, conflicts_with_all = ["file", "url"])] | ||
| upload_id: Option<String>, | ||
| }, | ||
|
|
||
| /// Manage tables inside a managed database | ||
| Tables { | ||
| #[command(subcommand)] | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -157,6 +157,38 @@ struct ListResponse { | |
| connections: Vec<Connection>, | ||
| } | ||
|
|
||
| /// Resolve a connection name or ID to a connection ID, exiting on failure. | ||
| /// | ||
| /// If `name_or_id` looks like a raw connection ID (starts with "conn"), tries | ||
| /// `GET /connections/{id}` directly first to avoid listing the full workspace. | ||
| /// Falls back to listing and matching by name on a 404 or when given a plain name. | ||
| pub fn resolve_connection_id(api: &ApiClient, name_or_id: &str) -> String { | ||
| use crossterm::style::Stylize; | ||
|
|
||
| if name_or_id.starts_with("conn") { | ||
| let (status, _) = api.get_raw(&format!("/connections/{name_or_id}")); | ||
| if status.is_success() { | ||
| return name_or_id.to_string(); | ||
| } | ||
| } | ||
|
|
||
| let body: ListResponse = api.get("/connections"); | ||
| match body | ||
| .connections | ||
| .iter() | ||
| .find(|c| c.id == name_or_id || c.name == name_or_id) | ||
| { | ||
| Some(conn) => conn.id.clone(), | ||
| None => { | ||
| eprintln!( | ||
| "{}", | ||
| format!("error: no connection named or with id '{name_or_id}'").red() | ||
| ); | ||
| std::process::exit(1); | ||
| } | ||
| } | ||
| } | ||
|
Comment on lines
+165
to
+190
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. super nit: this lists every connection in the workspace even when the caller already has a raw ID. For workspaces with many connections (or once |
||
|
|
||
| pub fn get(workspace_id: &str, connection_id: &str, format: &str) { | ||
| let api = ApiClient::new(Some(workspace_id)); | ||
| let is_table = format == "table"; | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -396,6 +396,23 @@ fn main() { | |
| Some(DatabasesCommands::Delete { name_or_id }) => { | ||
| databases::delete(&workspace_id, &name_or_id) | ||
| } | ||
| Some(DatabasesCommands::Load { | ||
| target, | ||
| file, | ||
| url, | ||
| upload_id, | ||
| }) => { | ||
| let (database, schema, table) = parse_db_target(&target); | ||
| databases::tables_load( | ||
| &workspace_id, | ||
| &database, | ||
| &table, | ||
| Some(schema.as_str()), | ||
| file.as_deref(), | ||
| url.as_deref(), | ||
| upload_id.as_deref(), | ||
| ) | ||
| } | ||
| Some(DatabasesCommands::Tables { command }) => match command { | ||
| DatabaseTablesCommands::List { | ||
| database, | ||
|
|
@@ -565,12 +582,10 @@ fn main() { | |
| &output, | ||
| ), | ||
| IndexesCommands::Create { | ||
| connection_id, | ||
| schema, | ||
| table, | ||
| target, | ||
| dataset_id, | ||
| name, | ||
| columns, | ||
| name, | ||
| r#type, | ||
| metric, | ||
| r#async, | ||
|
|
@@ -579,32 +594,68 @@ fn main() { | |
| output_column, | ||
| description, | ||
| } => { | ||
| let scope = match ( | ||
| dataset_id.as_deref(), | ||
| connection_id.as_deref(), | ||
| schema.as_deref(), | ||
| table.as_deref(), | ||
| ) { | ||
| (Some(did), _, _, _) => indexes::IndexScope::Dataset { dataset_id: did }, | ||
| (None, Some(cid), Some(sch), Some(tbl)) => { | ||
| indexes::IndexScope::Connection { | ||
| connection_id: cid, | ||
| schema: sch, | ||
| table: tbl, | ||
| let api = api::ApiClient::new(Some(&workspace_id)); | ||
| let (scope, resolved_columns, auto_name) = | ||
| match (target.as_deref(), dataset_id.as_deref()) { | ||
| (Some(tgt), None) => { | ||
| let (conn_name, schema, table, cols) = | ||
| parse_index_target(tgt); | ||
| let conn_id = | ||
| connections::resolve_connection_id(&api, &conn_name); | ||
| let auto = format!( | ||
| "{table}_{cols}_{type}", | ||
| cols = cols.join("_"), | ||
| type = r#type | ||
| ); | ||
| ( | ||
| (conn_id, schema, table), | ||
| cols.join(","), | ||
| auto, | ||
| ) | ||
| } | ||
| } | ||
| _ => { | ||
| eprintln!( | ||
| "error: provide either --dataset-id or all three of --connection-id, --schema, --table" | ||
| ); | ||
| std::process::exit(1); | ||
| (None, Some(did)) => { | ||
| let cols = | ||
| columns.as_deref().unwrap_or_else(|| { | ||
| eprintln!( | ||
| "error: --columns is required with --dataset-id" | ||
| ); | ||
| std::process::exit(1); | ||
| }); | ||
| let auto = format!( | ||
| "dataset_{cols}_{type}", | ||
| cols = cols.replace(',', "_"), | ||
| type = r#type | ||
| ); | ||
| ( | ||
| (did.to_string(), String::new(), String::new()), | ||
| cols.to_string(), | ||
| auto, | ||
| ) | ||
| } | ||
| _ => { | ||
| eprintln!( | ||
| "error: provide either <target> (e.g. airbnb.listings[col1,col2]) or --dataset-id with --columns" | ||
| ); | ||
| std::process::exit(1); | ||
| } | ||
| }; | ||
| let index_name = name.unwrap_or(auto_name); | ||
| let is_dataset = dataset_id.is_some(); | ||
| let (conn_id, schema, table) = scope; | ||
| let resolved_scope = if is_dataset { | ||
| indexes::IndexScope::Dataset { dataset_id: &conn_id } | ||
| } else { | ||
| indexes::IndexScope::Connection { | ||
| connection_id: &conn_id, | ||
| schema: &schema, | ||
| table: &table, | ||
| } | ||
| }; | ||
| indexes::create( | ||
| &workspace_id, | ||
| scope, | ||
| &name, | ||
| &columns, | ||
| resolved_scope, | ||
| &index_name, | ||
| &resolved_columns, | ||
| &r#type, | ||
| metric.as_deref(), | ||
| r#async, | ||
|
|
@@ -911,6 +962,123 @@ fn main() { | |
| } | ||
| } | ||
|
|
||
| /// Parse a database target like `airbnb.listings` or `airbnb.public.listings` | ||
| /// into `(database, schema, table)`. Schema defaults to `public`. | ||
| fn parse_db_target(target: &str) -> (String, String, String) { | ||
| let parts: Vec<&str> = target.splitn(4, '.').collect(); | ||
| match parts.as_slice() { | ||
| [db, tbl] => (db.to_string(), "public".to_string(), tbl.to_string()), | ||
| [db, schema, tbl] => (db.to_string(), schema.to_string(), tbl.to_string()), | ||
| _ => { | ||
| eprintln!( | ||
| "error: target must be 'database.table' or 'database.schema.table'" | ||
| ); | ||
| std::process::exit(1); | ||
| } | ||
| } | ||
| } | ||
|
|
||
| /// Parse an index target like `airbnb.listings[col1,col2]` or | ||
| /// `airbnb.public.listings[col1,col2]` into `(conn_name, schema, table, columns)`. | ||
| /// Schema defaults to `public` when only two dot-parts are given. | ||
| fn parse_index_target(target: &str) -> (String, String, String, Vec<String>) { | ||
| let Some(bracket_pos) = target.find('[') else { | ||
| eprintln!( | ||
| "error: target must include columns in brackets, e.g. airbnb.listings[col1,col2]" | ||
| ); | ||
| std::process::exit(1); | ||
| }; | ||
| if !target.ends_with(']') { | ||
| eprintln!( | ||
| "error: target bracket is not closed — use e.g. 'airbnb.listings[col1,col2]'" | ||
| ); | ||
| std::process::exit(1); | ||
| } | ||
| let table_part = &target[..bracket_pos]; | ||
| let cols_raw = &target[bracket_pos + 1..target.len() - 1]; | ||
|
|
||
| let parts: Vec<&str> = table_part.splitn(4, '.').collect(); | ||
| let (conn, schema, table) = match parts.as_slice() { | ||
| [c, t] => (c.to_string(), "public".to_string(), t.to_string()), | ||
| [c, s, t] => (c.to_string(), s.to_string(), t.to_string()), | ||
| _ => { | ||
| eprintln!( | ||
| "error: target must be 'connection.table[cols]' or 'connection.schema.table[cols]'" | ||
| ); | ||
| std::process::exit(1); | ||
| } | ||
| }; | ||
|
|
||
| let columns: Vec<String> = cols_raw | ||
| .split(',') | ||
| .map(|s| s.trim().to_string()) | ||
| .filter(|s| !s.is_empty()) | ||
| .collect(); | ||
|
|
||
| if columns.is_empty() { | ||
| eprintln!("error: no columns specified in brackets"); | ||
| std::process::exit(1); | ||
| } | ||
|
|
||
| (conn, schema, table, columns) | ||
|
Comment on lines
+967
to
+1023
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit: these two parsers ( |
||
| } | ||
|
|
||
| #[cfg(test)] | ||
| mod tests { | ||
| use super::*; | ||
|
|
||
| // --- parse_db_target --- | ||
|
|
||
| #[test] | ||
| fn db_target_two_parts_defaults_schema_to_public() { | ||
| let (db, schema, table) = parse_db_target("airbnb.listings"); | ||
| assert_eq!(db, "airbnb"); | ||
| assert_eq!(schema, "public"); | ||
| assert_eq!(table, "listings"); | ||
| } | ||
|
|
||
| #[test] | ||
| fn db_target_three_parts_uses_explicit_schema() { | ||
| let (db, schema, table) = parse_db_target("airbnb.staging.listings"); | ||
| assert_eq!(db, "airbnb"); | ||
| assert_eq!(schema, "staging"); | ||
| assert_eq!(table, "listings"); | ||
| } | ||
|
|
||
| // --- parse_index_target --- | ||
|
|
||
| #[test] | ||
| fn index_target_two_parts_defaults_schema_to_public() { | ||
| let (conn, schema, table, cols) = parse_index_target("airbnb.listings[description]"); | ||
| assert_eq!(conn, "airbnb"); | ||
| assert_eq!(schema, "public"); | ||
| assert_eq!(table, "listings"); | ||
| assert_eq!(cols, vec!["description"]); | ||
| } | ||
|
|
||
| #[test] | ||
| fn index_target_three_parts_uses_explicit_schema() { | ||
| let (conn, schema, table, cols) = | ||
| parse_index_target("airbnb.public.listings[name,description]"); | ||
| assert_eq!(conn, "airbnb"); | ||
| assert_eq!(schema, "public"); | ||
| assert_eq!(table, "listings"); | ||
| assert_eq!(cols, vec!["name", "description"]); | ||
| } | ||
|
|
||
| #[test] | ||
| fn index_target_multiple_columns() { | ||
| let (_, _, _, cols) = parse_index_target("db.tbl[a,b,c]"); | ||
| assert_eq!(cols, vec!["a", "b", "c"]); | ||
| } | ||
|
|
||
| #[test] | ||
| fn index_target_trims_column_whitespace() { | ||
| let (_, _, _, cols) = parse_index_target("db.tbl[a, b]"); | ||
| assert_eq!(cols, vec!["a", "b"]); | ||
| } | ||
| } | ||
|
|
||
| pub fn get_styles() -> clap::builder::Styles { | ||
| Styles::styled() | ||
| .header(AnsiColor::Yellow.on_default()) | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
super nit:
[and]are glob metacharacters in zsh (and in bash withextglob/failglob), sohotdata indexes create airbnb.listings[description] --type bm25will fail withno matches foundfor many users out of the box. Worth a line in the doc comment telling users to quote the target ('airbnb.listings[description]'). Same note would help onDatabasesCommands::Loadeven though it has no brackets, since the README example uses brackets in nearby commands. (not blocking)