Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
59 changes: 39 additions & 20 deletions src/command.rs
Original file line number Diff line number Diff line change
Expand Up @@ -320,34 +320,34 @@ pub enum IndexesCommands {
output: String,
},

/// Create an index on a table or dataset
/// Create an index on a table or dataset.
///
/// Pass either connection scope (--connection-id + --schema + --table) OR
/// dataset scope (--dataset-id), not both.
/// For connection-scoped indexes, pass the table and columns using bracket notation:
/// `connection.table[col1,col2]` or `connection.schema.table[col1,col2]`
/// (schema defaults to `public` when omitted)
///
/// For dataset-scoped indexes, use `--dataset-id` with `--columns`.
Create {
/// Connection ID (use with --schema and --table)
#[arg(long, short = 'c', conflicts_with = "dataset_id", requires_all = ["schema", "table"])]
connection_id: Option<String>,

/// Schema name (requires --connection-id)
#[arg(long, requires = "connection_id")]
schema: Option<String>,

/// Table name (requires --connection-id)
#[arg(long, requires = "connection_id")]
table: Option<String>,
/// Table and columns to index: `connection.table[col1,col2]`
/// or `connection.schema.table[col1,col2]`. Schema defaults to `public`.
///
/// Quote the argument to prevent shell glob expansion:
/// `hotdata indexes create 'airbnb.listings[description]' --type bm25`
#[arg(conflicts_with = "dataset_id")]
target: Option<String>,
Comment on lines +331 to +337
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

super nit: [ and ] are glob metacharacters in zsh (and in bash with extglob/failglob), so hotdata indexes create airbnb.listings[description] --type bm25 will fail with no matches found for many users out of the box. Worth a line in the doc comment telling users to quote the target ('airbnb.listings[description]'). Same note would help on DatabasesCommands::Load even though it has no brackets, since the README example uses brackets in nearby commands. (not blocking)

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: README.md is now out of sync with this command — lines 252–261 still document hotdata indexes create --connection-id <id> --schema <schema> --table <table> --name <name> --columns <cols> ..., but this PR removes those flags from Create. A user copying that example will hit a clap error. Worth updating the README block to show the new connection.table[cols] form (and the dataset-scope path that now requires --columns), plus a one-line mention of the hotdata databases load shorthand alongside tables load. (not blocking)


/// Dataset ID (alternative scope to --connection-id)
#[arg(long, conflicts_with_all = ["connection_id", "schema", "table"])]
/// Dataset ID (alternative scope to the positional target)
#[arg(long, conflicts_with = "target")]
dataset_id: Option<String>,

/// Index name
/// Columns to index (comma-separated). Required with --dataset-id;
/// for connection scope use bracket notation in the target instead.
#[arg(long)]
name: String,
columns: Option<String>,

/// Columns to index (comma-separated). Vector indexes accept exactly one column.
/// Index name (derived from table, columns, and type if omitted)
#[arg(long)]
columns: String,
name: Option<String>,

/// Index type — required (no default; choose deliberately)
#[arg(long, value_parser = ["sorted", "bm25", "vector"])]
Expand Down Expand Up @@ -580,6 +580,25 @@ pub enum DatabasesCommands {
name_or_id: String,
},

/// Load a parquet file into a table using dot notation: `database.table` or `database.schema.table`
Load {
/// Table to load into: `database.table` or `database.schema.table`.
/// Schema defaults to `public` when omitted.
target: String,

/// Path to a local parquet file to upload and load
#[arg(long, conflicts_with_all = ["upload_id", "url"])]
file: Option<String>,

/// URL of a remote parquet file to download and load
#[arg(long, conflicts_with_all = ["file", "upload_id"])]
url: Option<String>,

/// Use a previously staged upload ID from `POST /v1/files` instead of uploading
#[arg(long, conflicts_with_all = ["file", "url"])]
upload_id: Option<String>,
},

/// Manage tables inside a managed database
Tables {
#[command(subcommand)]
Expand Down
32 changes: 32 additions & 0 deletions src/connections.rs
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,38 @@ struct ListResponse {
connections: Vec<Connection>,
}

/// Resolve a connection name or ID to a connection ID, exiting on failure.
///
/// If `name_or_id` looks like a raw connection ID (starts with "conn"), tries
/// `GET /connections/{id}` directly first to avoid listing the full workspace.
/// Falls back to listing and matching by name on a 404 or when given a plain name.
pub fn resolve_connection_id(api: &ApiClient, name_or_id: &str) -> String {
use crossterm::style::Stylize;

if name_or_id.starts_with("conn") {
let (status, _) = api.get_raw(&format!("/connections/{name_or_id}"));
if status.is_success() {
return name_or_id.to_string();
}
}

let body: ListResponse = api.get("/connections");
match body
.connections
.iter()
.find(|c| c.id == name_or_id || c.name == name_or_id)
{
Some(conn) => conn.id.clone(),
None => {
eprintln!(
"{}",
format!("error: no connection named or with id '{name_or_id}'").red()
);
std::process::exit(1);
}
}
}
Comment on lines +165 to +190
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

super nit: this lists every connection in the workspace even when the caller already has a raw ID. For workspaces with many connections (or once /connections paginates) this adds a needless round trip and could miss a valid ID past the first page. A cheap optimization: if name_or_id matches a known ID shape (e.g. starts with a known prefix or looks like a UUID), try GET /connections/{id} first and fall back to listing on 404. Fine to defer — the existing connections::list has the same limitation. (not blocking)


pub fn get(workspace_id: &str, connection_id: &str, format: &str) {
let api = ApiClient::new(Some(workspace_id));
let is_table = format == "table";
Expand Down
4 changes: 0 additions & 4 deletions src/databases.rs
Original file line number Diff line number Diff line change
Expand Up @@ -405,10 +405,6 @@ pub fn create(workspace_id: &str, name: &str, schema: &str, tables: &[String], f
println!("{}", "Database created".green());
println!("name: {}", result.name);
println!("id: {}", result.id);
println!(
"load: hotdata databases tables load {} <table> --file ./data.parquet",
result.name
);
}
_ => unreachable!(),
}
Expand Down
218 changes: 193 additions & 25 deletions src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -396,6 +396,23 @@ fn main() {
Some(DatabasesCommands::Delete { name_or_id }) => {
databases::delete(&workspace_id, &name_or_id)
}
Some(DatabasesCommands::Load {
target,
file,
url,
upload_id,
}) => {
let (database, schema, table) = parse_db_target(&target);
databases::tables_load(
&workspace_id,
&database,
&table,
Some(schema.as_str()),
file.as_deref(),
url.as_deref(),
upload_id.as_deref(),
)
}
Some(DatabasesCommands::Tables { command }) => match command {
DatabaseTablesCommands::List {
database,
Expand Down Expand Up @@ -565,12 +582,10 @@ fn main() {
&output,
),
IndexesCommands::Create {
connection_id,
schema,
table,
target,
dataset_id,
name,
columns,
name,
r#type,
metric,
r#async,
Expand All @@ -579,32 +594,68 @@ fn main() {
output_column,
description,
} => {
let scope = match (
dataset_id.as_deref(),
connection_id.as_deref(),
schema.as_deref(),
table.as_deref(),
) {
(Some(did), _, _, _) => indexes::IndexScope::Dataset { dataset_id: did },
(None, Some(cid), Some(sch), Some(tbl)) => {
indexes::IndexScope::Connection {
connection_id: cid,
schema: sch,
table: tbl,
let api = api::ApiClient::new(Some(&workspace_id));
let (scope, resolved_columns, auto_name) =
match (target.as_deref(), dataset_id.as_deref()) {
(Some(tgt), None) => {
let (conn_name, schema, table, cols) =
parse_index_target(tgt);
let conn_id =
connections::resolve_connection_id(&api, &conn_name);
let auto = format!(
"{table}_{cols}_{type}",
cols = cols.join("_"),
type = r#type
);
(
(conn_id, schema, table),
cols.join(","),
auto,
)
}
}
_ => {
eprintln!(
"error: provide either --dataset-id or all three of --connection-id, --schema, --table"
);
std::process::exit(1);
(None, Some(did)) => {
let cols =
columns.as_deref().unwrap_or_else(|| {
eprintln!(
"error: --columns is required with --dataset-id"
);
std::process::exit(1);
});
let auto = format!(
"dataset_{cols}_{type}",
cols = cols.replace(',', "_"),
type = r#type
);
(
(did.to_string(), String::new(), String::new()),
cols.to_string(),
auto,
)
}
_ => {
eprintln!(
"error: provide either <target> (e.g. airbnb.listings[col1,col2]) or --dataset-id with --columns"
);
std::process::exit(1);
}
};
let index_name = name.unwrap_or(auto_name);
let is_dataset = dataset_id.is_some();
let (conn_id, schema, table) = scope;
let resolved_scope = if is_dataset {
indexes::IndexScope::Dataset { dataset_id: &conn_id }
} else {
indexes::IndexScope::Connection {
connection_id: &conn_id,
schema: &schema,
table: &table,
}
};
indexes::create(
&workspace_id,
scope,
&name,
&columns,
resolved_scope,
&index_name,
&resolved_columns,
&r#type,
metric.as_deref(),
r#async,
Expand Down Expand Up @@ -911,6 +962,123 @@ fn main() {
}
}

/// Parse a database target like `airbnb.listings` or `airbnb.public.listings`
/// into `(database, schema, table)`. Schema defaults to `public`.
fn parse_db_target(target: &str) -> (String, String, String) {
let parts: Vec<&str> = target.splitn(4, '.').collect();
match parts.as_slice() {
[db, tbl] => (db.to_string(), "public".to_string(), tbl.to_string()),
[db, schema, tbl] => (db.to_string(), schema.to_string(), tbl.to_string()),
_ => {
eprintln!(
"error: target must be 'database.table' or 'database.schema.table'"
);
std::process::exit(1);
}
}
}

/// Parse an index target like `airbnb.listings[col1,col2]` or
/// `airbnb.public.listings[col1,col2]` into `(conn_name, schema, table, columns)`.
/// Schema defaults to `public` when only two dot-parts are given.
fn parse_index_target(target: &str) -> (String, String, String, Vec<String>) {
let Some(bracket_pos) = target.find('[') else {
eprintln!(
"error: target must include columns in brackets, e.g. airbnb.listings[col1,col2]"
);
std::process::exit(1);
};
if !target.ends_with(']') {
eprintln!(
"error: target bracket is not closed — use e.g. 'airbnb.listings[col1,col2]'"
);
std::process::exit(1);
}
let table_part = &target[..bracket_pos];
let cols_raw = &target[bracket_pos + 1..target.len() - 1];

let parts: Vec<&str> = table_part.splitn(4, '.').collect();
let (conn, schema, table) = match parts.as_slice() {
[c, t] => (c.to_string(), "public".to_string(), t.to_string()),
[c, s, t] => (c.to_string(), s.to_string(), t.to_string()),
_ => {
eprintln!(
"error: target must be 'connection.table[cols]' or 'connection.schema.table[cols]'"
);
std::process::exit(1);
}
};

let columns: Vec<String> = cols_raw
.split(',')
.map(|s| s.trim().to_string())
.filter(|s| !s.is_empty())
.collect();

if columns.is_empty() {
eprintln!("error: no columns specified in brackets");
std::process::exit(1);
}

(conn, schema, table, columns)
Comment on lines +967 to +1023
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: these two parsers (parse_db_target, parse_index_target) are pure, have several edge cases (2- vs 3-segment, default-public, bracket placement, empty cols, malformed dot counts), and are easy to regress. A small #[cfg(test)] mod tests covering happy paths + each error branch would lock the behavior down — especially since the existing databases_cli.rs tests don't exercise the new shorthand. (not blocking)

}

#[cfg(test)]
mod tests {
use super::*;

// --- parse_db_target ---

#[test]
fn db_target_two_parts_defaults_schema_to_public() {
let (db, schema, table) = parse_db_target("airbnb.listings");
assert_eq!(db, "airbnb");
assert_eq!(schema, "public");
assert_eq!(table, "listings");
}

#[test]
fn db_target_three_parts_uses_explicit_schema() {
let (db, schema, table) = parse_db_target("airbnb.staging.listings");
assert_eq!(db, "airbnb");
assert_eq!(schema, "staging");
assert_eq!(table, "listings");
}

// --- parse_index_target ---

#[test]
fn index_target_two_parts_defaults_schema_to_public() {
let (conn, schema, table, cols) = parse_index_target("airbnb.listings[description]");
assert_eq!(conn, "airbnb");
assert_eq!(schema, "public");
assert_eq!(table, "listings");
assert_eq!(cols, vec!["description"]);
}

#[test]
fn index_target_three_parts_uses_explicit_schema() {
let (conn, schema, table, cols) =
parse_index_target("airbnb.public.listings[name,description]");
assert_eq!(conn, "airbnb");
assert_eq!(schema, "public");
assert_eq!(table, "listings");
assert_eq!(cols, vec!["name", "description"]);
}

#[test]
fn index_target_multiple_columns() {
let (_, _, _, cols) = parse_index_target("db.tbl[a,b,c]");
assert_eq!(cols, vec!["a", "b", "c"]);
}

#[test]
fn index_target_trims_column_whitespace() {
let (_, _, _, cols) = parse_index_target("db.tbl[a, b]");
assert_eq!(cols, vec!["a", "b"]);
}
}

pub fn get_styles() -> clap::builder::Styles {
Styles::styled()
.header(AnsiColor::Yellow.on_default())
Expand Down
Loading