From 84bd1d249bde35ad48611b1b45662c7d549d08f6 Mon Sep 17 00:00:00 2001 From: luca-ctx <216224554+luca-ctx@users.noreply.github.com> Date: Wed, 1 Jul 2026 18:07:55 -0500 Subject: [PATCH] Add native personal agent history providers --- crates/ctx-cli/src/main.rs | 110 +- crates/ctx-cli/tests/cli.rs | 720 +++++- crates/ctx-history-capture/src/lib.rs | 2066 ++++++++++++++++- .../src/provider_sources.rs | 325 ++- crates/ctx-history-core/src/lib.rs | 4 + crates/ctx-history-core/src/provider.rs | 17 +- crates/ctx-history-store/src/lib.rs | 184 +- docs/cli-reference.md | 31 +- docs/first-10-minutes.md | 30 +- docs/limitations.md | 11 +- docs/provider-support-matrix.json | 218 ++ docs/provider-support.md | 10 + docs/providers.md | 22 +- docs/search.md | 13 +- 14 files changed, 3679 insertions(+), 82 deletions(-) diff --git a/crates/ctx-cli/src/main.rs b/crates/ctx-cli/src/main.rs index 10091d1b..62b3de0d 100644 --- a/crates/ctx-cli/src/main.rs +++ b/crates/ctx-cli/src/main.rs @@ -27,17 +27,20 @@ use analytics::{AnalyticsEvent, AnalyticsProperties}; use config::{AppConfig, CONFIG_FILE}; use ctx_history_capture::{ catalog_codex_session_tree, discover_provider_sources, discover_provider_sources_for_provider, - import_antigravity_cli_history, import_claude_projects_jsonl_tree, import_codex_history_jsonl, - import_codex_session_jsonl, import_codex_session_jsonl_tail, import_codex_session_paths, - import_codex_session_tree, import_copilot_cli_session_events, import_cursor_native_history, - import_factory_ai_droid_sessions, import_gemini_cli_history, import_opencode_sqlite, + import_antigravity_cli_history, import_astrbot_sqlite, import_claude_projects_jsonl_tree, + import_codex_history_jsonl, import_codex_session_jsonl, import_codex_session_jsonl_tail, + import_codex_session_paths, import_codex_session_tree, import_copilot_cli_session_events, + import_cursor_native_history, import_factory_ai_droid_sessions, import_gemini_cli_history, + import_hermes_sqlite, import_nanoclaw_project, import_openclaw_history, import_opencode_sqlite, import_pi_session_jsonl, provider_source_for_path, provider_source_spec, stable_capture_uuid, - AntigravityCliImportOptions, CatalogSummary, ClaudeProjectsImportOptions, CodexEventImportMode, - CodexHistoryImportOptions, CodexSessionCatalogOptions, CodexSessionImportOptions, - CodexSessionImportProgress, CodexSessionImportProgressCallback, CodexToolOutputMode, - CopilotCliImportOptions, CursorNativeImportOptions, FactoryAiDroidImportOptions, - GeminiCliImportOptions, OpenCodeSqliteImportOptions, PiSessionImportOptions, - ProviderImportSummary, ProviderImportSupport, ProviderSource, ProviderSourceStatus, + AntigravityCliImportOptions, AstrBotSqliteImportOptions, CatalogSummary, + ClaudeProjectsImportOptions, CodexEventImportMode, CodexHistoryImportOptions, + CodexSessionCatalogOptions, CodexSessionImportOptions, CodexSessionImportProgress, + CodexSessionImportProgressCallback, CodexToolOutputMode, CopilotCliImportOptions, + CursorNativeImportOptions, FactoryAiDroidImportOptions, GeminiCliImportOptions, + HermesSqliteImportOptions, NanoClawImportOptions, OpenClawImportOptions, + OpenCodeSqliteImportOptions, PiSessionImportOptions, ProviderImportSummary, + ProviderImportSupport, ProviderSource, ProviderSourceStatus, }; use ctx_history_core::{ database_path, default_data_root, utc_now, CaptureProvider, ContextCitation, @@ -516,6 +519,13 @@ enum ProviderArg { alias = "factory_ai_droid" )] FactoryAiDroid, + #[value(name = "openclaw", alias = "open-claw", alias = "open_claw")] + OpenClaw, + Hermes, + #[value(name = "nanoclaw", alias = "nano-claw", alias = "nano_claw")] + NanoClaw, + #[value(name = "astrbot", alias = "astr-bot", alias = "astr_bot")] + AstrBot, } #[derive(Debug, Clone, Copy, PartialEq, Eq, ValueEnum)] @@ -538,6 +548,10 @@ impl ProviderArg { Self::Cursor => CaptureProvider::Cursor, Self::CopilotCli => CaptureProvider::CopilotCli, Self::FactoryAiDroid => CaptureProvider::FactoryAiDroid, + Self::OpenClaw => CaptureProvider::OpenClaw, + Self::Hermes => CaptureProvider::Hermes, + Self::NanoClaw => CaptureProvider::NanoClaw, + Self::AstrBot => CaptureProvider::AstrBot, } } @@ -552,6 +566,10 @@ impl ProviderArg { Self::Cursor => "cursor", Self::CopilotCli => "copilot-cli", Self::FactoryAiDroid => "factory-ai-droid", + Self::OpenClaw => "openclaw", + Self::Hermes => "hermes", + Self::NanoClaw => "nanoclaw", + Self::AstrBot => "astrbot", } } } @@ -1639,7 +1657,7 @@ fn run_sources(args: JsonArgs, analytics_properties: &mut AnalyticsProperties) - .iter() .filter(|source| { source.exists - && matches!(source.import_support, ProviderImportSupport::Native) + && source.import_support.is_importable() && source.status == ProviderSourceStatus::Available }) .count(); @@ -4008,7 +4026,7 @@ fn search_refresh_sources(provider: Option) -> Vec { .drain(..) .filter(|source| { source.exists - && matches!(source.import_support, ProviderImportSupport::Native) + && source.import_support.is_auto_importable() && source.status == ProviderSourceStatus::Available && source.source_format != "codex_history_jsonl" }) @@ -4179,7 +4197,7 @@ fn import_requests(args: &ImportArgs) -> Result> { .into_iter() .filter(|source| { source.exists - && matches!(source.import_support, ProviderImportSupport::Native) + && source.import_support.is_auto_importable() && source.status == ProviderSourceStatus::Available }) .collect()); @@ -4195,7 +4213,12 @@ fn import_requests(args: &ImportArgs) -> Result> { .collect::>(); if sources.is_empty() { let spec = provider_source_spec(provider); - if let Some(reason) = spec.and_then(|spec| spec.unsupported_reason) { + if spec + .is_some_and(|spec| matches!(spec.import_support, ProviderImportSupport::Unsupported)) + { + let reason = spec + .and_then(|spec| spec.unsupported_reason) + .unwrap_or("no native local-history parser is implemented"); return Err(anyhow!( "{} native import is unsupported: {reason}", provider.as_str() @@ -4215,6 +4238,7 @@ fn import_requests(args: &ImportArgs) -> Result> { fn validate_source_import_supported(source: &SourceInfo) -> Result<()> { match source.import_support { ProviderImportSupport::Native => Ok(()), + ProviderImportSupport::Preview => Ok(()), ProviderImportSupport::Unsupported => { let reason = source .unsupported_reason @@ -4376,6 +4400,50 @@ fn import_one_source_inner( }, ) .map_err(anyhow::Error::from), + CaptureProvider::OpenClaw => import_openclaw_history( + &source.path, + store, + OpenClawImportOptions { + source_path: Some(source.path.clone()), + history_record_id: Some(record_id), + allow_partial_failures: true, + ..OpenClawImportOptions::default() + }, + ) + .map_err(anyhow::Error::from), + CaptureProvider::Hermes => import_hermes_sqlite( + &source.path, + store, + HermesSqliteImportOptions { + source_path: Some(source.path.clone()), + history_record_id: Some(record_id), + allow_partial_failures: true, + ..HermesSqliteImportOptions::default() + }, + ) + .map_err(anyhow::Error::from), + CaptureProvider::NanoClaw => import_nanoclaw_project( + &source.path, + store, + NanoClawImportOptions { + source_path: Some(source.path.clone()), + history_record_id: Some(record_id), + allow_partial_failures: true, + ..NanoClawImportOptions::default() + }, + ) + .map_err(anyhow::Error::from), + CaptureProvider::AstrBot => import_astrbot_sqlite( + &source.path, + store, + AstrBotSqliteImportOptions { + source_path: Some(source.path.clone()), + history_record_id: Some(record_id), + allow_partial_failures: true, + ..AstrBotSqliteImportOptions::default() + }, + ) + .map_err(anyhow::Error::from), CaptureProvider::Gemini => import_gemini_cli_history( &source.path, store, @@ -4532,7 +4600,14 @@ fn import_manifested_source( } fn source_uses_import_file_manifest(source: &SourceInfo) -> bool { - source.source_format != "codex_session_jsonl_tree" + !matches!( + source.source_format, + "codex_session_jsonl_tree" + | "openclaw_session_jsonl_tree" + | "hermes_state_sqlite" + | "nanoclaw_project" + | "astrbot_data_v4_sqlite" + ) } fn merge_provider_import_summary( @@ -5005,9 +5080,9 @@ fn sources_json(sources: &[SourceInfo]) -> Vec { "source_format": source.source_format, "status": source.status.as_str(), "import_support": import_support_json(source.import_support), - "native_import": matches!(source.import_support, ProviderImportSupport::Native), + "native_import": source.import_support.is_auto_importable(), "importable": source.status == ProviderSourceStatus::Available - && matches!(source.import_support, ProviderImportSupport::Native), + && source.import_support.is_importable(), "raw_retention": raw_retention_json(source.raw_retention), "unsupported_reason": source.unsupported_reason, }) @@ -5018,6 +5093,7 @@ fn sources_json(sources: &[SourceInfo]) -> Vec { fn import_support_json(support: ProviderImportSupport) -> &'static str { match support { ProviderImportSupport::Native => "native", + ProviderImportSupport::Preview => "preview", ProviderImportSupport::Unsupported => "unsupported", } } diff --git a/crates/ctx-cli/tests/cli.rs b/crates/ctx-cli/tests/cli.rs index 43d850c3..1d2c61af 100644 --- a/crates/ctx-cli/tests/cli.rs +++ b/crates/ctx-cli/tests/cli.rs @@ -808,6 +808,81 @@ fn import_all_skips_empty_gemini_source() { .all(|source| source["provider"] != "gemini")); } +#[test] +fn sources_lists_personal_agent_provider_defaults() { + let temp = tempdir(); + install_default_openclaw_fixture(&temp, "openclaw-sources-oracle"); + install_default_hermes_fixture(&temp, "hermes-sources-oracle"); + install_default_astrbot_fixture(&temp, "astrbot-sources-oracle"); + + let sources = json_output(ctx(&temp).args(["sources", "--json"])); + for (provider, source_format, import_support, native_import) in [ + ("openclaw", "openclaw_session_jsonl_tree", "native", true), + ("hermes", "hermes_state_sqlite", "native", true), + ("astrbot", "astrbot_data_v4_sqlite", "preview", false), + ] { + let source = sources["sources"] + .as_array() + .unwrap() + .iter() + .find(|source| { + source["provider"] == provider && source["source_format"] == source_format + }) + .unwrap_or_else(|| panic!("missing {provider} source in {sources:#}")); + assert_eq!(source["status"], "available"); + assert_eq!(source["import_support"], import_support); + assert_eq!(source["native_import"], native_import); + assert_eq!(source["importable"], true); + assert!(source["unsupported_reason"].is_null()); + } +} + +#[test] +fn preview_native_sources_are_listed_but_not_auto_imported() { + let temp = tempdir(); + let query = "nanoclaw-preview-auto-refresh-oracle"; + let project = PathBuf::from(write_native_nanoclaw_fixture(&temp, query)); + + let mut sources_command = ctx(&temp); + sources_command.current_dir(&project); + let sources = json_output(sources_command.args(["sources", "--json"])); + let nanoclaw = sources["sources"] + .as_array() + .unwrap() + .iter() + .find(|source| source["provider"] == "nanoclaw") + .unwrap(); + assert_eq!(nanoclaw["status"], "available"); + assert_eq!(nanoclaw["import_support"], "preview"); + assert_eq!(nanoclaw["native_import"], false); + assert_eq!(nanoclaw["importable"], true); + assert!(nanoclaw["unsupported_reason"].is_null()); + + let mut search_command = ctx(&temp); + search_command.current_dir(&project); + let search = + json_output(search_command.args(["search", query, "--provider", "nanoclaw", "--json"])); + assert_eq!(search["freshness"]["mode"], "auto"); + assert_eq!(search["freshness"]["status"], "no_sources"); + assert_eq!(search["freshness"]["source_count"], 0); + assert!(search["results"].as_array().unwrap().is_empty()); + + let imported = json_output(ctx(&temp).args([ + "import", + "--provider", + "nanoclaw", + "--path", + project.to_str().unwrap(), + "--json", + ])); + assert_eq!(imported["totals"]["failed"], 0); + assert_eq!(imported["totals"]["imported_sources"], 1); + + let search_after_import = + json_output(ctx(&temp).args(["search", query, "--provider", "nanoclaw", "--json"])); + assert_search_provider_oracle(&search_after_import, "nanoclaw", query, 1, "message"); +} + #[test] fn import_all_reports_source_failure_without_losing_successes() { let temp = tempdir(); @@ -885,6 +960,10 @@ fn provider_help_matches_implemented_importers() { "pi", "claude", "opencode", + "openclaw", + "hermes", + "nanoclaw", + "astrbot", "antigravity", "gemini", "cursor", @@ -898,7 +977,13 @@ fn provider_help_matches_implemented_importers() { #[test] fn provider_json_names_are_accepted_as_cli_filter_aliases() { let temp = tempdir(); - for provider in ["copilot_cli", "factory_ai_droid"] { + for (provider, expected) in [ + ("copilot_cli", "copilot_cli"), + ("factory_ai_droid", "factory_ai_droid"), + ("open_claw", "openclaw"), + ("nano_claw", "nanoclaw"), + ("astr_bot", "astrbot"), + ] { let search = json_output(ctx(&temp).args([ "search", "anything", @@ -908,7 +993,7 @@ fn provider_json_names_are_accepted_as_cli_filter_aliases() { "off", "--json", ])); - assert_eq!(search["filters"]["provider"], provider); + assert_eq!(search["filters"]["provider"], expected); } } @@ -1016,7 +1101,7 @@ fn public_subcommand_help_is_golden_enough_for_session_retrieval() { vec![ "Usage: ctx import", "--provider ", - "[possible values: codex, pi, claude, opencode, antigravity, gemini, cursor, copilot-cli, factory-ai-droid]", + "[possible values: codex, pi, claude, opencode, antigravity, gemini, cursor, copilot-cli, factory-ai-droid, openclaw, hermes, nanoclaw, astrbot]", "--path ", "--resume", "--json", @@ -3239,6 +3324,8 @@ fn search_refresh_auto_imports_discovered_top_provider_sources() { ), ("pi", "pi", install_default_pi_fixture), ("cursor", "cursor", install_default_cursor_fixture), + ("openclaw", "openclaw", install_default_openclaw_fixture), + ("hermes", "hermes", install_default_hermes_fixture), ] { let temp = tempdir(); let query = format!("{stored_provider}-default-refresh-oracle"); @@ -3581,6 +3668,30 @@ fn native_provider_cli_flow_imports_new_supported_provider_paths() { "factory_ai_droid_sessions_jsonl", write_native_factory_droid_fixture, ), + ( + "openclaw", + "openclaw", + "openclaw_session_jsonl_tree", + write_native_openclaw_fixture, + ), + ( + "hermes", + "hermes", + "hermes_state_sqlite", + write_native_hermes_fixture, + ), + ( + "nanoclaw", + "nanoclaw", + "nanoclaw_project", + write_native_nanoclaw_fixture, + ), + ( + "astrbot", + "astrbot", + "astrbot_data_v4_sqlite", + write_native_astrbot_fixture, + ), ] { let temp = tempdir(); let query = format!("{stored_provider}-native-cli-oracle"); @@ -3604,6 +3715,24 @@ fn native_provider_cli_flow_imports_new_supported_provider_paths() { let search = json_output(ctx(&temp).args(["search", &query, "--provider", cli_provider, "--json"])); assert_search_provider_oracle(&search, stored_provider, &query, 1, "message"); + let result = &search["results"].as_array().unwrap()[0]; + let ctx_event_id = result["ctx_event_id"].as_str().unwrap(); + let ctx_session_id = result["ctx_session_id"].as_str().unwrap(); + + let show_event = + json_output(ctx(&temp).args(["show", "event", ctx_event_id, "--format", "json"])); + assert_eq!(show_event["event"]["provider"], stored_provider); + assert!(show_event["event"]["source"]["source_format"].is_string()); + assert!(show_event["event"]["source"]["path"].is_string()); + assert!(show_event["event"]["cursor"].is_string()); + + let locate_event = + json_output(ctx(&temp).args(["locate", "event", ctx_event_id, "--json"])); + assert_eq!(locate_event["provider"], stored_provider); + assert_eq!(locate_event["ctx_session_id"], ctx_session_id); + assert!(locate_event["source"]["source_format"].is_string()); + assert!(locate_event["source"]["path"].is_string()); + assert!(locate_event["cursor"].is_string()); let status = json_output(ctx(&temp).args(["status", "--json"])); assert!(status["indexed_items"].as_u64().unwrap() >= 2); @@ -3611,6 +3740,95 @@ fn native_provider_cli_flow_imports_new_supported_provider_paths() { let doctor = json_output(ctx(&temp).args(["doctor", "--json"])); assert_eq!(doctor["ok"], true); + + let second = json_output(ctx(&temp).args([ + "import", + "--provider", + cli_provider, + "--path", + &path, + "--json", + ])); + assert_eq!(second["totals"]["failed"], 0); + assert_eq!(second["totals"]["imported_events"], 0); + } +} + +#[test] +fn personal_agent_provider_imports_are_idempotent_and_incremental() { + for (cli_provider, stored_provider, fixture, append_event) in [ + ( + "openclaw", + "openclaw", + write_native_openclaw_fixture as fn(&TempDir, &str) -> String, + append_native_openclaw_event as fn(&str, &str), + ), + ( + "hermes", + "hermes", + write_native_hermes_fixture, + append_native_hermes_event, + ), + ( + "nanoclaw", + "nanoclaw", + write_native_nanoclaw_fixture, + append_native_nanoclaw_event, + ), + ( + "astrbot", + "astrbot", + write_native_astrbot_fixture, + append_native_astrbot_event, + ), + ] { + let temp = tempdir(); + let initial_query = format!("{stored_provider}-incremental-initial-oracle"); + let incremental_query = format!("{stored_provider}-incremental-next-oracle"); + let path = fixture(&temp, &initial_query); + + let first = json_output(ctx(&temp).args([ + "import", + "--provider", + cli_provider, + "--path", + &path, + "--json", + ])); + assert_eq!(first["totals"]["failed"], 0); + assert!(first["totals"]["imported_events"].as_u64().unwrap() >= 1); + + let second = json_output(ctx(&temp).args([ + "import", + "--provider", + cli_provider, + "--path", + &path, + "--json", + ])); + assert_eq!(second["totals"]["failed"], 0); + assert_eq!(second["totals"]["imported_events"], 0); + + append_event(&path, &incremental_query); + let third = json_output(ctx(&temp).args([ + "import", + "--provider", + cli_provider, + "--path", + &path, + "--json", + ])); + assert_eq!(third["totals"]["failed"], 0); + assert!(third["totals"]["imported_events"].as_u64().unwrap() >= 1); + + let search = json_output(ctx(&temp).args([ + "search", + &incremental_query, + "--provider", + cli_provider, + "--json", + ])); + assert_search_provider_oracle(&search, stored_provider, &incremental_query, 1, "message"); } } @@ -3649,6 +3867,25 @@ fn install_default_cursor_fixture(temp: &TempDir, query: &str) { copy_dir_all(&source, &temp.path().join(".cursor").join("projects")); } +fn install_default_openclaw_fixture(temp: &TempDir, query: &str) { + let source = PathBuf::from(write_native_openclaw_fixture(temp, query)); + copy_dir_all(&source, &temp.path().join(".openclaw")); +} + +fn install_default_hermes_fixture(temp: &TempDir, query: &str) { + let source = PathBuf::from(write_native_hermes_fixture(temp, query)); + let target = temp.path().join(".hermes"); + fs::create_dir_all(&target).unwrap(); + fs::copy(source, target.join("state.db")).unwrap(); +} + +fn install_default_astrbot_fixture(temp: &TempDir, query: &str) { + let source = PathBuf::from(write_native_astrbot_fixture(temp, query)); + let target = temp.path().join(".astrbot/data"); + fs::create_dir_all(&target).unwrap(); + fs::copy(source, target.join("data_v4.db")).unwrap(); +} + fn write_native_claude_fixture(temp: &TempDir, query: &str) -> String { let root = temp.path().join("native-claude/projects/-workspace"); fs::create_dir_all(&root).unwrap(); @@ -3872,6 +4109,479 @@ fn write_native_factory_droid_fixture(temp: &TempDir, query: &str) -> String { .to_owned() } +fn write_native_openclaw_fixture(temp: &TempDir, query: &str) -> String { + let root = temp.path().join("native-openclaw"); + let sessions = root.join("agents/personal-agent/sessions"); + fs::create_dir_all(&sessions).unwrap(); + fs::write( + sessions.join("sessions.json"), + serde_json::to_string(&json!({ + "openclaw-cli-native": { + "sessionId": "openclaw-cli-native", + "sessionFile": sessions.join("openclaw-cli-native.jsonl"), + "sessionStartedAt": "2026-06-24T12:00:00Z", + "modelProvider": "openai", + "model": "gpt-5-mini", + "lastChannel": "telegram" + } + })) + .unwrap(), + ) + .unwrap(); + fs::write( + sessions.join("openclaw-cli-native.jsonl"), + format!( + "{}\n{}\n{}\n", + json!({ + "type": "session", + "version": 1, + "id": "openclaw-cli-native", + "timestamp": "2026-06-24T12:00:00Z", + "cwd": "/workspace" + }), + json!({ + "type": "message", + "id": "openclaw-cli-native-user", + "timestamp": "2026-06-24T12:00:01Z", + "message": {"role": "user", "content": query} + }), + json!({ + "type": "message", + "id": "openclaw-cli-native-assistant", + "parentId": "openclaw-cli-native-user", + "timestamp": "2026-06-24T12:00:02Z", + "message": {"role": "assistant", "content": "native import ok"} + }) + ), + ) + .unwrap(); + root.to_str().unwrap().to_owned() +} + +fn write_native_hermes_fixture(temp: &TempDir, query: &str) -> String { + let path = temp.path().join("native-hermes-state.db"); + let conn = Connection::open(&path).unwrap(); + conn.execute_batch( + "create table sessions ( + id text primary key, + source text not null, + model text, + model_config text, + parent_session_id text, + started_at real not null, + ended_at real, + message_count integer default 0, + tool_call_count integer default 0, + input_tokens integer default 0, + output_tokens integer default 0, + cwd text, + title text, + archived integer default 0 + ); + create table messages ( + id integer primary key autoincrement, + session_id text not null, + role text not null, + content text, + tool_calls text, + tool_call_id text, + tool_name text, + timestamp real not null, + active integer not null default 1, + compacted integer not null default 0 + );", + ) + .unwrap(); + conn.execute( + "insert into sessions ( + id, source, model, model_config, started_at, message_count, cwd, title + ) values (?1, 'acp', 'gpt-5-mini', ?2, 1782259200.0, 2, '/workspace', 'native hermes')", + [ + "hermes-cli-native", + r#"{"cwd":"/workspace","provider":"openai"}"#, + ], + ) + .unwrap(); + conn.execute( + "insert into messages (session_id, role, content, timestamp) values (?1, 'user', ?2, 1782259201.0)", + ["hermes-cli-native", query], + ) + .unwrap(); + conn.execute( + "insert into messages (session_id, role, content, timestamp) values (?1, 'assistant', 'native import ok', 1782259202.0)", + ["hermes-cli-native"], + ) + .unwrap(); + path.to_str().unwrap().to_owned() +} + +fn write_native_nanoclaw_fixture(temp: &TempDir, query: &str) -> String { + let root = temp.path().join("native-nanoclaw"); + let data = root.join("data"); + let session_dir = data.join("v2-sessions/ag-1/session-1"); + fs::create_dir_all(&session_dir).unwrap(); + let central = Connection::open(data.join("v2.db")).unwrap(); + central + .execute_batch( + "create table agent_groups ( + id text primary key, + name text, + folder text, + agent_provider text + ); + create table messaging_groups ( + id text primary key, + channel_type text, + platform_id text, + instance text, + name text + ); + create table sessions ( + id text primary key, + agent_group_id text not null, + messaging_group_id text, + thread_id text, + agent_provider text, + status text, + container_status text, + last_active integer, + created_at integer + );", + ) + .unwrap(); + central + .execute( + "insert into agent_groups values ('ag-1', 'Personal', '/workspace', 'codex')", + [], + ) + .unwrap(); + central + .execute( + "insert into messaging_groups values ('mg-1', 'telegram', 'chat-1', 'default', 'DM')", + [], + ) + .unwrap(); + central + .execute( + "insert into sessions values ( + 'session-1', 'ag-1', 'mg-1', 'thread-1', 'codex', 'active', + 'running', 1782259202000, 1782259200000 + )", + [], + ) + .unwrap(); + let inbound = Connection::open(session_dir.join("inbound.db")).unwrap(); + inbound + .execute_batch( + "create table messages_in ( + id text primary key, + seq integer, + kind text, + timestamp integer, + status text, + trigger text, + platform_id text, + channel_type text, + thread_id text, + content text, + source_session_id text, + on_wake integer + );", + ) + .unwrap(); + inbound + .execute( + "insert into messages_in values ( + 'in-1', 1, 'chat', 1782259201000, 'done', 'message', + 'chat-1', 'telegram', 'thread-1', ?1, null, 0 + )", + [json!({"text": query}).to_string()], + ) + .unwrap(); + let outbound = Connection::open(session_dir.join("outbound.db")).unwrap(); + outbound + .execute_batch( + "create table messages_out ( + id text primary key, + seq integer, + in_reply_to text, + timestamp integer, + kind text, + platform_id text, + channel_type text, + thread_id text, + content text + );", + ) + .unwrap(); + outbound + .execute( + "insert into messages_out values ( + 'out-1', 2, 'in-1', 1782259202000, 'chat', + 'chat-1', 'telegram', 'thread-1', ?1 + )", + [json!({"text": "native import ok"}).to_string()], + ) + .unwrap(); + root.to_str().unwrap().to_owned() +} + +fn write_native_astrbot_fixture(temp: &TempDir, query: &str) -> String { + let data = temp.path().join("native-astrbot/data"); + fs::create_dir_all(&data).unwrap(); + let path = data.join("data_v4.db"); + let conn = Connection::open(&path).unwrap(); + conn.execute_batch( + "create table conversations ( + id integer primary key, + inner_conversation_id text, + conversation_id text, + platform_id text, + user_id text, + content text not null, + title text, + persona_id text, + token_usage text, + created_at integer, + updated_at integer + ); + create table preferences ( + scope text, + key text, + value text + ); + create table platform_message_history ( + id integer primary key, + platform_id text, + user_id text, + sender_id text, + sender_name text, + content text, + llm_checkpoint_id text, + created_at integer + );", + ) + .unwrap(); + conn.execute( + "insert into conversations values ( + 1, 'umo-1', 'conv-1', 'webchat', 'user-1', ?1, 'native astrbot', + 'default', ?2, 1782259200000, 1782259202000 + )", + [ + json!([ + {"role": "user", "content": query}, + {"type": "_checkpoint", "id": "checkpoint-1"}, + {"role": "assistant", "content": "native import ok"} + ]) + .to_string(), + json!({"prompt": 1, "completion": 1}).to_string(), + ], + ) + .unwrap(); + conn.execute( + "insert into preferences values ('umo', 'sel_conv_id', 'conv-1')", + [], + ) + .unwrap(); + conn.execute( + "insert into platform_message_history values ( + 1, 'webchat', 'user-1', 'user-1', 'User', ?1, 'checkpoint-1', 1782259201000 + )", + [json!({"text": query}).to_string()], + ) + .unwrap(); + path.to_str().unwrap().to_owned() +} + +fn append_native_openclaw_event(path: &str, query: &str) { + let transcript = + Path::new(path).join("agents/personal-agent/sessions/openclaw-cli-native.jsonl"); + let mut file = fs::OpenOptions::new() + .append(true) + .open(transcript) + .unwrap(); + writeln!( + file, + "{}", + json!({ + "type": "message", + "id": "openclaw-cli-native-incremental", + "parentId": "openclaw-cli-native-assistant", + "timestamp": "2026-06-24T12:00:03Z", + "message": {"role": "user", "content": query} + }) + ) + .unwrap(); +} + +fn append_native_hermes_event(path: &str, query: &str) { + let conn = Connection::open(path).unwrap(); + conn.execute( + "insert into messages (session_id, role, content, timestamp) values (?1, 'user', ?2, 1782259203.0)", + ["hermes-cli-native", query], + ) + .unwrap(); +} + +fn append_native_nanoclaw_event(path: &str, query: &str) { + let conn = Connection::open( + Path::new(path) + .join("data/v2-sessions/ag-1/session-1") + .join("inbound.db"), + ) + .unwrap(); + conn.execute( + "insert into messages_in values ( + 'in-2', 1, 'chat', 1782259203000, 'done', 'message', + 'chat-1', 'telegram', 'thread-1', ?1, null, 0 + )", + [json!({"text": query}).to_string()], + ) + .unwrap(); +} + +fn append_native_astrbot_event(path: &str, query: &str) { + let conn = Connection::open(path).unwrap(); + let content: String = conn + .query_row( + "select content from conversations where id = 1", + [], + |row| row.get(0), + ) + .unwrap(); + let mut content: Value = serde_json::from_str(&content).unwrap(); + content + .as_array_mut() + .unwrap() + .push(json!({"role": "assistant", "content": query})); + conn.execute( + "update conversations set content = ?1, updated_at = 1782259203000 where id = 1", + [content.to_string()], + ) + .unwrap(); +} + +#[test] +fn openclaw_import_accepts_explicit_session_jsonl_file() { + let temp = tempdir(); + let query = "openclaw-explicit-file-oracle"; + let path = temp.path().join("openclaw-single-session.jsonl"); + fs::write( + &path, + format!( + "{}\n{}\n", + json!({ + "type": "session", + "id": "openclaw-single-session", + "timestamp": "2026-06-24T12:00:00Z" + }), + json!({ + "type": "message", + "id": "openclaw-single-user", + "timestamp": "2026-06-24T12:00:01Z", + "message": {"role": "user", "content": query} + }) + ), + ) + .unwrap(); + + let imported = json_output(ctx(&temp).args([ + "import", + "--provider", + "openclaw", + "--path", + path.to_str().unwrap(), + "--json", + ])); + assert_eq!(imported["totals"]["failed"], 0); + assert_eq!(imported["totals"]["imported_sources"], 1); + + let search = + json_output(ctx(&temp).args(["search", query, "--provider", "openclaw", "--json"])); + assert_search_provider_oracle(&search, "openclaw", query, 1, "message"); +} + +#[test] +fn nanoclaw_import_tolerates_partial_auxiliary_tables() { + let temp = tempdir(); + let query = "nanoclaw-partial-auxiliary-schema-oracle"; + let path = write_native_nanoclaw_fixture(&temp, query); + let conn = Connection::open(Path::new(&path).join("data/v2.db")).unwrap(); + conn.execute_batch( + "drop table agent_groups; + create table agent_groups (id text primary key); + insert into agent_groups values ('ag-1'); + drop table messaging_groups; + create table messaging_groups (id text primary key); + insert into messaging_groups values ('mg-1');", + ) + .unwrap(); + + let imported = json_output(ctx(&temp).args([ + "import", + "--provider", + "nanoclaw", + "--path", + &path, + "--json", + ])); + assert_eq!(imported["totals"]["failed"], 0); + assert_eq!(imported["totals"]["imported_sources"], 1); + + let search = + json_output(ctx(&temp).args(["search", query, "--provider", "nanoclaw", "--json"])); + assert_search_provider_oracle(&search, "nanoclaw", query, 1, "message"); +} + +#[test] +fn personal_agent_sqlite_imports_report_corrupt_databases() { + for (provider, path) in [ + ("hermes", "corrupt-hermes-state.db"), + ("astrbot", "corrupt-astrbot-data_v4.db"), + ] { + let temp = tempdir(); + let db_path = temp.path().join(path); + fs::write(&db_path, b"not sqlite").unwrap(); + let output = ctx(&temp) + .args([ + "import", + "--provider", + provider, + "--path", + db_path.to_str().unwrap(), + "--json", + ]) + .assert() + .failure() + .get_output() + .stderr + .clone(); + let stderr = String::from_utf8(output).unwrap(); + assert!(stderr.contains("not a database"), "{stderr}"); + } + + let temp = tempdir(); + let root = temp.path().join("corrupt-nanoclaw"); + fs::create_dir_all(root.join("data/v2-sessions")).unwrap(); + fs::write(root.join("data/v2.db"), b"not sqlite").unwrap(); + let output = ctx(&temp) + .args([ + "import", + "--provider", + "nanoclaw", + "--path", + root.to_str().unwrap(), + "--json", + ]) + .assert() + .failure() + .get_output() + .stderr + .clone(); + let stderr = String::from_utf8(output).unwrap(); + assert!(stderr.contains("not a database"), "{stderr}"); +} + #[test] fn native_provider_cli_requires_existing_history_or_explicit_path() { for (cli_provider, expected_blocker) in [ @@ -3885,6 +4595,10 @@ fn native_provider_cli_requires_existing_history_or_explicit_path() { "factory-ai-droid", "no native factory_ai_droid history found", ), + ("openclaw", "no native openclaw history found"), + ("hermes", "no native hermes history found"), + ("nanoclaw", "no native nanoclaw history found"), + ("astrbot", "no native astrbot history found"), ] { let temp = tempdir(); ctx(&temp) diff --git a/crates/ctx-history-capture/src/lib.rs b/crates/ctx-history-capture/src/lib.rs index 7e435bfd..e656bfda 100644 --- a/crates/ctx-history-capture/src/lib.rs +++ b/crates/ctx-history-capture/src/lib.rs @@ -22,7 +22,7 @@ use ctx_history_core::{ SyncState, Visibility, PROVIDER_CAPTURE_ENVELOPE_SCHEMA_VERSION, }; use ctx_history_store::{CatalogSession, Store, StoreError}; -use rusqlite::{Connection, OpenFlags}; +use rusqlite::{Connection, OpenFlags, OptionalExtension}; use serde::{Deserialize, Serialize}; use serde_json::{json, Value}; use thiserror::Error; @@ -429,6 +429,90 @@ impl Default for OpenCodeSqliteImportOptions { } } +#[derive(Debug, Clone)] +pub struct OpenClawImportOptions { + pub machine_id: String, + pub source_path: Option, + pub imported_at: DateTime, + pub history_record_id: Option, + pub allow_partial_failures: bool, +} + +impl Default for OpenClawImportOptions { + fn default() -> Self { + Self { + machine_id: default_machine_id(), + source_path: None, + imported_at: utc_now(), + history_record_id: None, + allow_partial_failures: false, + } + } +} + +#[derive(Debug, Clone)] +pub struct HermesSqliteImportOptions { + pub machine_id: String, + pub source_path: Option, + pub imported_at: DateTime, + pub history_record_id: Option, + pub allow_partial_failures: bool, +} + +impl Default for HermesSqliteImportOptions { + fn default() -> Self { + Self { + machine_id: default_machine_id(), + source_path: None, + imported_at: utc_now(), + history_record_id: None, + allow_partial_failures: false, + } + } +} + +#[derive(Debug, Clone)] +pub struct NanoClawImportOptions { + pub machine_id: String, + pub source_path: Option, + pub imported_at: DateTime, + pub history_record_id: Option, + pub allow_partial_failures: bool, +} + +impl Default for NanoClawImportOptions { + fn default() -> Self { + Self { + machine_id: default_machine_id(), + source_path: None, + imported_at: utc_now(), + history_record_id: None, + allow_partial_failures: false, + } + } +} + +#[derive(Debug, Clone)] +pub struct AstrBotSqliteImportOptions { + pub machine_id: String, + pub source_path: Option, + pub imported_at: DateTime, + pub history_record_id: Option, + pub allow_partial_failures: bool, +} + +impl Default for AstrBotSqliteImportOptions { + fn default() -> Self { + Self { + machine_id: default_machine_id(), + source_path: None, + imported_at: utc_now(), + history_record_id: None, + allow_partial_failures: false, + } + } +} + #[derive(Debug, Clone)] pub struct AntigravityCliImportOptions { pub machine_id: String, @@ -736,6 +820,18 @@ pub struct ClaudeProjectsJsonlAdapter; #[derive(Debug, Clone, Copy, Default)] pub struct OpenCodeSqliteAdapter; +#[derive(Debug, Clone, Copy, Default)] +pub struct OpenClawJsonlAdapter; + +#[derive(Debug, Clone, Copy, Default)] +pub struct HermesSqliteAdapter; + +#[derive(Debug, Clone, Copy, Default)] +pub struct NanoClawProjectAdapter; + +#[derive(Debug, Clone, Copy, Default)] +pub struct AstrBotSqliteAdapter; + #[derive(Debug, Clone, Copy, Default)] pub struct AntigravityCliJsonlAdapter; @@ -1364,6 +1460,78 @@ impl ProviderCaptureAdapter for OpenCodeSqliteAdapter { } } +impl ProviderCaptureAdapter for OpenClawJsonlAdapter { + fn provider(&self) -> CaptureProvider { + CaptureProvider::OpenClaw + } + + fn source_format(&self) -> &str { + OPENCLAW_SOURCE_FORMAT + } + + fn normalize_path( + &self, + path: &Path, + context: &ProviderAdapterContext, + ) -> Result { + normalize_openclaw_history(path, context) + } +} + +impl ProviderCaptureAdapter for HermesSqliteAdapter { + fn provider(&self) -> CaptureProvider { + CaptureProvider::Hermes + } + + fn source_format(&self) -> &str { + HERMES_SQLITE_SOURCE_FORMAT + } + + fn normalize_path( + &self, + path: &Path, + context: &ProviderAdapterContext, + ) -> Result { + normalize_hermes_sqlite(path, context) + } +} + +impl ProviderCaptureAdapter for NanoClawProjectAdapter { + fn provider(&self) -> CaptureProvider { + CaptureProvider::NanoClaw + } + + fn source_format(&self) -> &str { + NANOCLAW_SOURCE_FORMAT + } + + fn normalize_path( + &self, + path: &Path, + context: &ProviderAdapterContext, + ) -> Result { + normalize_nanoclaw_project(path, context) + } +} + +impl ProviderCaptureAdapter for AstrBotSqliteAdapter { + fn provider(&self) -> CaptureProvider { + CaptureProvider::AstrBot + } + + fn source_format(&self) -> &str { + ASTRBOT_SQLITE_SOURCE_FORMAT + } + + fn normalize_path( + &self, + path: &Path, + context: &ProviderAdapterContext, + ) -> Result { + normalize_astrbot_sqlite(path, context) + } +} + impl ProviderCaptureAdapter for AntigravityCliJsonlAdapter { fn provider(&self) -> CaptureProvider { CaptureProvider::Antigravity @@ -3076,6 +3244,127 @@ pub fn import_opencode_sqlite( ) } +pub fn import_openclaw_history( + path: impl AsRef, + store: &mut Store, + options: OpenClawImportOptions, +) -> Result { + import_native_jsonl_tree( + store, + NativeJsonlTreeImport { + path: path.as_ref(), + machine_id: options.machine_id, + source_path: options.source_path, + imported_at: options.imported_at, + history_record_id: options.history_record_id, + allow_partial_failures: options.allow_partial_failures, + }, + OpenClawJsonlAdapter, + ) +} + +pub fn import_hermes_sqlite( + path: impl AsRef, + store: &mut Store, + options: HermesSqliteImportOptions, +) -> Result { + let path = path.as_ref(); + let source_path = options + .source_path + .clone() + .unwrap_or_else(|| path.to_path_buf()); + let normalization = HermesSqliteAdapter.normalize_path( + path, + &ProviderAdapterContext { + machine_id: options.machine_id, + source_path: Some(source_path), + imported_at: options.imported_at, + tool_output_mode: CodexToolOutputMode::Full, + event_mode: CodexEventImportMode::Rich, + include_notices: true, + }, + )?; + import_normalized_provider_captures( + store, + normalization, + NormalizedProviderImportOptions { + history_record_id: options.history_record_id, + allow_partial_failures: options.allow_partial_failures, + persist_cursors: true, + wrap_transaction: true, + fast_event_inserts: true, + }, + ) +} + +pub fn import_nanoclaw_project( + path: impl AsRef, + store: &mut Store, + options: NanoClawImportOptions, +) -> Result { + let path = path.as_ref(); + let source_path = options + .source_path + .clone() + .unwrap_or_else(|| path.to_path_buf()); + let normalization = NanoClawProjectAdapter.normalize_path( + path, + &ProviderAdapterContext { + machine_id: options.machine_id, + source_path: Some(source_path), + imported_at: options.imported_at, + tool_output_mode: CodexToolOutputMode::Full, + event_mode: CodexEventImportMode::Rich, + include_notices: true, + }, + )?; + import_normalized_provider_captures( + store, + normalization, + NormalizedProviderImportOptions { + history_record_id: options.history_record_id, + allow_partial_failures: options.allow_partial_failures, + persist_cursors: true, + wrap_transaction: true, + fast_event_inserts: true, + }, + ) +} + +pub fn import_astrbot_sqlite( + path: impl AsRef, + store: &mut Store, + options: AstrBotSqliteImportOptions, +) -> Result { + let path = path.as_ref(); + let source_path = options + .source_path + .clone() + .unwrap_or_else(|| path.to_path_buf()); + let normalization = AstrBotSqliteAdapter.normalize_path( + path, + &ProviderAdapterContext { + machine_id: options.machine_id, + source_path: Some(source_path), + imported_at: options.imported_at, + tool_output_mode: CodexToolOutputMode::Full, + event_mode: CodexEventImportMode::Rich, + include_notices: true, + }, + )?; + import_normalized_provider_captures( + store, + normalization, + NormalizedProviderImportOptions { + history_record_id: options.history_record_id, + allow_partial_failures: options.allow_partial_failures, + persist_cursors: true, + wrap_transaction: true, + fast_event_inserts: true, + }, + ) +} + pub fn import_antigravity_cli_history( path: impl AsRef, store: &mut Store, @@ -3228,6 +3517,10 @@ pub fn import_normalized_provider_captures( const CODEX_SESSION_SOURCE_FORMAT: &str = "codex_session_jsonl"; const CLAUDE_PROJECTS_SOURCE_FORMAT: &str = "claude_projects_jsonl_tree"; const OPENCODE_SQLITE_SOURCE_FORMAT: &str = "opencode_sqlite"; +const OPENCLAW_SOURCE_FORMAT: &str = "openclaw_session_jsonl_tree"; +const HERMES_SQLITE_SOURCE_FORMAT: &str = "hermes_state_sqlite"; +const NANOCLAW_SOURCE_FORMAT: &str = "nanoclaw_project"; +const ASTRBOT_SQLITE_SOURCE_FORMAT: &str = "astrbot_data_v4_sqlite"; const ANTIGRAVITY_CLI_SOURCE_FORMAT: &str = "antigravity_cli_transcript_jsonl_tree"; const GEMINI_CLI_SOURCE_FORMAT: &str = "gemini_cli_chat_recording_jsonl"; const CURSOR_AGENT_TRANSCRIPT_SOURCE_FORMAT: &str = "cursor_agent_transcript_jsonl"; @@ -4994,6 +5287,1777 @@ struct OpenCodeMessageRow { data: String, } +struct NativeSessionDraft { + provider: CaptureProvider, + source_format: &'static str, + provider_session_id: String, + parent_provider_session_id: Option, + root_provider_session_id: Option, + external_agent_id: Option, + agent_type: AgentType, + role_hint: Option, + is_primary: bool, + started_at: DateTime, + ended_at: Option>, + cwd: Option, + fidelity: Fidelity, + raw_source_path: String, + trust: ProviderSourceTrust, + source_metadata: Value, + session_metadata: Value, +} + +fn native_provider_capture( + draft: NativeSessionDraft, + context: &ProviderAdapterContext, + event: Option, +) -> ProviderCaptureEnvelope { + ProviderCaptureEnvelope { + schema_version: PROVIDER_CAPTURE_ENVELOPE_SCHEMA_VERSION, + provider: draft.provider, + source: ProviderSourceEnvelope { + source_format: draft.source_format.to_owned(), + machine_id: context.machine_id.clone(), + observed_at: context.imported_at, + raw_source_path: Some(draft.raw_source_path), + raw_retention: ProviderRawRetention::PathReference, + redaction_boundary: ProviderRedactionBoundary::BeforeExport, + trust: draft.trust, + fidelity: draft.fidelity, + cursor: event.as_ref().and_then(|event| { + event.cursor.as_ref().map(|cursor| ProviderCursorRange { + before: None, + after: Some(ProviderCursorCheckpoint { + stream: provider_cursor_stream(draft.provider, draft.source_format), + cursor: cursor.clone(), + observed_at: event.occurred_at, + }), + }) + }), + idempotency_key: Some(format!( + "provider-source:{}:{}:{}", + draft.provider.as_str(), + draft.source_format, + draft.provider_session_id + )), + metadata: draft.source_metadata, + }, + session: ProviderSessionEnvelope { + provider_session_id: draft.provider_session_id.clone(), + parent_provider_session_id: draft.parent_provider_session_id, + root_provider_session_id: draft.root_provider_session_id, + external_agent_id: draft.external_agent_id, + agent_type: draft.agent_type, + role_hint: draft.role_hint, + is_primary: draft.is_primary, + status: SessionStatus::Imported, + started_at: draft.started_at, + ended_at: draft.ended_at, + cwd: draft.cwd, + fidelity: draft.fidelity, + idempotency_key: Some(format!( + "provider-session:{}:{}", + draft.provider.as_str(), + draft.provider_session_id + )), + artifacts: Vec::new(), + metadata: draft.session_metadata, + }, + event, + } +} + +fn open_provider_sqlite_readonly(path: &Path) -> Result { + ensure_regular_provider_transcript_file(path)?; + let conn = Connection::open_with_flags( + path, + OpenFlags::SQLITE_OPEN_READ_ONLY | OpenFlags::SQLITE_OPEN_NO_MUTEX, + )?; + conn.busy_timeout(std::time::Duration::from_secs(5))?; + conn.pragma_update(None, "query_only", true)?; + Ok(conn) +} + +fn provider_timestamp_seconds(value: Option, fallback: DateTime) -> DateTime { + let Some(value) = value else { + return fallback; + }; + if !value.is_finite() { + return fallback; + } + let millis = if value.abs() > 1_000_000_000_000.0 { + value.round() as i64 + } else { + (value * 1000.0).round() as i64 + }; + DateTime::::from_timestamp_millis(millis).unwrap_or(fallback) +} + +fn provider_timestamp_millis(value: Option, fallback: DateTime) -> DateTime { + value + .and_then(DateTime::::from_timestamp_millis) + .unwrap_or(fallback) +} + +fn provider_timestamp_value(value: Option<&Value>, fallback: DateTime) -> DateTime { + match value { + Some(Value::String(raw)) => parse_rfc3339_utc(raw) + .or_else(|| { + raw.parse::() + .ok() + .map(|ts| provider_timestamp_seconds(Some(ts), fallback)) + }) + .unwrap_or(fallback), + Some(Value::Number(number)) => number + .as_f64() + .map(|ts| provider_timestamp_seconds(Some(ts), fallback)) + .unwrap_or(fallback), + _ => fallback, + } +} + +fn text_id_index(seed: &str, offset: u64) -> u64 { + offset.saturating_add(fnv1a64(seed.as_bytes()) & 0x0fff_ffff) +} + +fn provider_json_text(raw: &str) -> Value { + serde_json::from_str::(raw).unwrap_or_else(|_| Value::String(raw.to_owned())) +} + +fn hermes_decode_content(raw: Option<&str>) -> Value { + let Some(raw) = raw else { + return Value::Null; + }; + if let Some(json) = raw.strip_prefix("\0json:") { + return provider_json_text(json); + } + Value::String(raw.to_owned()) +} + +fn native_event( + provider: CaptureProvider, + source_format: &'static str, + provider_session_id: &str, + provider_event_index: u64, + provider_event_hash: Option, + cursor: String, + event_type: EventType, + role: Option, + occurred_at: DateTime, + text: String, + body: Value, + metadata: Value, +) -> ProviderEventEnvelope { + let (text, truncated) = provider_safe_preview(&text, PROVIDER_MAX_TEXT_CHARS); + ProviderEventEnvelope { + provider_event_index, + provider_event_hash, + cursor: Some(cursor), + event_type, + role, + occurred_at, + fidelity: Fidelity::Imported, + redaction_state: RedactionState::SafePreview, + idempotency_key: Some(format!( + "provider-event:{}:{}:{}", + provider.as_str(), + provider_session_id, + provider_event_index + )), + artifacts: Vec::new(), + payload: json!({ + "text": text, + "truncated": truncated, + "source_format": source_format, + "body": provider_capped_json(&body, PROVIDER_MAX_PREVIEW_CHARS), + }), + metadata, + } +} + +fn openclaw_agent_id(path: &Path) -> Option { + let components = path + .components() + .map(|component| component.as_os_str().to_string_lossy().to_string()) + .collect::>(); + components.windows(2).find_map(|window| { + (window[0] == "agents" && !window[1].is_empty()).then(|| window[1].clone()) + }) +} + +fn provider_path_has_component(path: &Path, expected: &str) -> bool { + path.components() + .any(|component| component.as_os_str() == expected) +} + +fn openclaw_session_indexes(root: &Path) -> BTreeMap { + let mut indexes = BTreeMap::new(); + let mut paths = Vec::new(); + collect_named_paths(root, "sessions.json", &mut paths); + for path in paths { + let Ok(text) = fs::read_to_string(&path) else { + continue; + }; + let Ok(value) = serde_json::from_str::(&text) else { + continue; + }; + let agent_id = openclaw_agent_id(&path); + for (key, value) in openclaw_session_index_entries(value) { + if let Some(session_id) = value + .get("sessionId") + .or_else(|| value.get("id")) + .and_then(Value::as_str) + .filter(|value| !value.trim().is_empty()) + { + if let Some(agent_id) = &agent_id { + indexes + .entry(format!("{agent_id}/{session_id}")) + .or_insert(value.clone()); + } + indexes + .entry(session_id.to_owned()) + .or_insert(value.clone()); + } + if let Some(agent_id) = &agent_id { + indexes + .entry(format!("{agent_id}/{key}")) + .or_insert(value.clone()); + } + indexes.entry(key).or_insert(value); + } + } + indexes +} + +fn openclaw_session_index_entries(value: Value) -> Vec<(String, Value)> { + match value { + Value::Array(items) => items + .into_iter() + .enumerate() + .map(|(index, value)| { + let key = value + .get("sessionId") + .or_else(|| value.get("id")) + .and_then(Value::as_str) + .map(str::to_owned) + .unwrap_or_else(|| index.to_string()); + (key, value) + }) + .collect(), + Value::Object(mut map) => { + if let Some(Value::Array(items)) = map.remove("sessions") { + return openclaw_session_index_entries(Value::Array(items)); + } + map.into_iter().collect() + } + _ => Vec::new(), + } +} + +fn collect_named_paths(root: &Path, name: &str, paths: &mut Vec) { + let Ok(metadata) = fs::symlink_metadata(root) else { + return; + }; + if metadata.file_type().is_symlink() { + return; + } + if metadata.file_type().is_file() { + if root.file_name().and_then(|file_name| file_name.to_str()) == Some(name) { + paths.push(root.to_path_buf()); + } + return; + } + if !metadata.file_type().is_dir() { + return; + } + let Ok(entries) = fs::read_dir(root) else { + return; + }; + for entry in entries.flatten() { + collect_named_paths(&entry.path(), name, paths); + } +} + +fn normalize_openclaw_history( + path: &Path, + context: &ProviderAdapterContext, +) -> Result { + let mut paths = Vec::new(); + collect_jsonl_paths(path, &mut paths)?; + if !path.is_file() { + paths.retain(|candidate| provider_path_has_component(candidate, "sessions")); + } + paths.sort(); + if paths.is_empty() { + return Err(CaptureError::InvalidProviderTranscriptPath { + path: path.to_path_buf(), + reason: "no OpenClaw session JSONL transcripts found", + }); + } + let indexes = openclaw_session_indexes(path); + let mut merged = ProviderNormalizationResult::default(); + for transcript_path in paths { + let mut result = normalize_openclaw_jsonl_file(&transcript_path, context, &indexes)?; + merged.summary.merge(result.summary); + merged.captures.append(&mut result.captures); + merged.files_touched.append(&mut result.files_touched); + } + Ok(merged) +} + +fn normalize_openclaw_jsonl_file( + path: &Path, + context: &ProviderAdapterContext, + indexes: &BTreeMap, +) -> Result { + ensure_regular_provider_transcript_file(path)?; + let file = File::open(path)?; + let mut reader = BufReader::new(file); + let mut result = ProviderNormalizationResult::default(); + let fallback_id = path + .file_stem() + .and_then(|name| name.to_str()) + .unwrap_or("openclaw-session") + .to_owned(); + let agent_id = openclaw_agent_id(path); + let mut provider_session_id = agent_id + .as_ref() + .map(|agent| format!("{agent}/{fallback_id}")) + .unwrap_or_else(|| fallback_id.clone()); + let mut started_at = context.imported_at; + let mut cwd = None; + let mut header_raw = Value::Null; + let mut header_seen = false; + let mut line_number = 0usize; + let mut line = Vec::new(); + loop { + line.clear(); + let read = reader.read_until(b'\n', &mut line)?; + if read == 0 { + break; + } + line_number += 1; + if line.iter().all(u8::is_ascii_whitespace) { + continue; + } + let value: Value = match serde_json::from_slice(&line) { + Ok(value) => value, + Err(err) => { + result.summary.failed += 1; + result.summary.failures.push(ProviderImportFailure { + line: line_number, + error: err.to_string(), + }); + continue; + } + }; + let row_type = value + .get("type") + .and_then(Value::as_str) + .unwrap_or("message"); + if row_type == "session" { + if let Some(id) = value.get("id").and_then(Value::as_str) { + provider_session_id = agent_id + .as_ref() + .map(|agent| format!("{agent}/{id}")) + .unwrap_or_else(|| id.to_owned()); + } + started_at = provider_timestamp_value(value.get("timestamp"), context.imported_at); + cwd = value.get("cwd").and_then(Value::as_str).map(str::to_owned); + header_raw = value.clone(); + header_seen = true; + result.captures.push(( + line_number, + openclaw_capture( + &provider_session_id, + agent_id.as_deref(), + started_at, + None, + cwd.clone(), + path, + context, + indexes, + header_raw.clone(), + None, + ), + )); + continue; + } + + let occurred_at = provider_timestamp_value(value.get("timestamp"), started_at); + let event_index = (line_number - 1) as u64; + let event = openclaw_event( + &provider_session_id, + event_index, + line_number, + &value, + occurred_at, + ); + if !header_seen { + header_seen = true; + result.captures.push(( + line_number, + openclaw_capture( + &provider_session_id, + agent_id.as_deref(), + started_at, + None, + cwd.clone(), + path, + context, + indexes, + header_raw.clone(), + None, + ), + )); + } + result.captures.push(( + line_number, + openclaw_capture( + &provider_session_id, + agent_id.as_deref(), + started_at, + None, + cwd.clone(), + path, + context, + indexes, + header_raw.clone(), + Some(event), + ), + )); + } + Ok(result) +} + +fn openclaw_capture( + provider_session_id: &str, + agent_id: Option<&str>, + started_at: DateTime, + ended_at: Option>, + cwd: Option, + path: &Path, + context: &ProviderAdapterContext, + indexes: &BTreeMap, + header_raw: Value, + event: Option, +) -> ProviderCaptureEnvelope { + let local_id = provider_session_id + .rsplit_once('/') + .map(|(_, id)| id) + .unwrap_or(provider_session_id); + let index = indexes + .get(provider_session_id) + .or_else(|| indexes.get(local_id)) + .cloned() + .unwrap_or(Value::Null); + native_provider_capture( + NativeSessionDraft { + provider: CaptureProvider::OpenClaw, + source_format: OPENCLAW_SOURCE_FORMAT, + provider_session_id: provider_session_id.to_owned(), + parent_provider_session_id: index + .get("parentSessionId") + .or_else(|| index.get("parent_session_id")) + .and_then(Value::as_str) + .map(str::to_owned), + root_provider_session_id: None, + external_agent_id: agent_id.map(str::to_owned), + agent_type: AgentType::Primary, + role_hint: Some("personal-agent".to_owned()), + is_primary: true, + started_at, + ended_at, + cwd, + fidelity: Fidelity::Partial, + raw_source_path: path.display().to_string(), + trust: ProviderSourceTrust::ProviderNative, + source_metadata: json!({ + "adapter": OPENCLAW_SOURCE_FORMAT, + "index": provider_capped_json(&index, PROVIDER_MAX_PREVIEW_CHARS), + "header": provider_capped_json(&header_raw, PROVIDER_MAX_PREVIEW_CHARS), + "support_level": "beta", + }), + session_metadata: json!({ + "source_format": OPENCLAW_SOURCE_FORMAT, + "agent_id": agent_id, + "session_index": provider_capped_json(&index, PROVIDER_MAX_PREVIEW_CHARS), + "fidelity_gap": "OpenClaw session JSONL is current native storage, but upstream keeps a storage-neutral accessor for future schema changes", + }), + }, + context, + event, + ) +} + +fn openclaw_event( + provider_session_id: &str, + event_index: u64, + line_number: usize, + row: &Value, + occurred_at: DateTime, +) -> ProviderEventEnvelope { + let row_type = row.get("type").and_then(Value::as_str).unwrap_or("message"); + let message = row.get("message").unwrap_or(row); + let role = message + .get("role") + .or_else(|| row.get("role")) + .and_then(Value::as_str) + .map(|role| provider_role(Some(role))); + let event_type = match row_type { + "message" => match role { + Some(EventRole::Tool) => EventType::ToolOutput, + _ => EventType::Message, + }, + "leaf" | "compaction" | "custom" => EventType::Notice, + _ => EventType::Notice, + }; + let text = message + .get("content") + .or_else(|| message.get("text")) + .or_else(|| message.get("output")) + .and_then(provider_value_text) + .unwrap_or_else(|| format!("OpenClaw {row_type}")); + native_event( + CaptureProvider::OpenClaw, + OPENCLAW_SOURCE_FORMAT, + provider_session_id, + event_index, + row.get("id").and_then(Value::as_str).map(str::to_owned), + format!("line:{line_number}"), + event_type, + role, + occurred_at, + text, + row.clone(), + json!({ + "source": "openclaw_jsonl", + "source_format": OPENCLAW_SOURCE_FORMAT, + "row_type": row_type, + "message_id": row.get("id").and_then(Value::as_str), + "parent_id": row.get("parentId").or_else(|| row.get("parent_id")).cloned(), + }), + ) +} + +#[derive(Debug, Clone)] +struct HermesSessionRow { + id: String, + source: String, + parent_session_id: Option, + model: Option, + model_config: Option, + started_at: f64, + ended_at: Option, + end_reason: Option, + message_count: i64, + tool_call_count: i64, + input_tokens: i64, + output_tokens: i64, + cache_read_tokens: i64, + cache_write_tokens: i64, + reasoning_tokens: i64, + cwd: Option, + git_branch: Option, + git_repo_root: Option, + billing_provider: Option, + billing_base_url: Option, + billing_mode: Option, + estimated_cost_usd: Option, + actual_cost_usd: Option, + title: Option, + archived: i64, +} + +#[derive(Debug, Clone)] +struct HermesMessageRow { + id: i64, + session_id: String, + role: String, + content: Option, + tool_call_id: Option, + tool_calls: Option, + tool_name: Option, + timestamp: f64, + token_count: Option, + finish_reason: Option, + reasoning: Option, + reasoning_content: Option, + reasoning_details: Option, + codex_reasoning_items: Option, + codex_message_items: Option, + platform_message_id: Option, + observed: i64, + active: i64, + compacted: i64, +} + +fn normalize_hermes_sqlite( + path: &Path, + context: &ProviderAdapterContext, +) -> Result { + let conn = open_provider_sqlite_readonly(path)?; + let user_version: i64 = conn.pragma_query_value(None, "user_version", |row| row.get(0))?; + let schema_fingerprint = opencode_schema_fingerprint(&conn)?; + let sessions = hermes_sessions(&conn)?; + let messages = hermes_messages(&conn)?; + let sessions_by_id = sessions + .into_iter() + .map(|session| (session.id.clone(), session)) + .collect::>(); + let mut result = ProviderNormalizationResult::default(); + + for row in messages { + let Some(session) = sessions_by_id.get(&row.session_id) else { + result.summary.failed += 1; + result.summary.failures.push(ProviderImportFailure { + line: row.id.max(0) as usize, + error: format!( + "Hermes message {} references missing session {}", + row.id, row.session_id + ), + }); + continue; + }; + let provider_session_id = session.id.clone(); + let occurred_at = provider_timestamp_seconds(Some(row.timestamp), context.imported_at); + let started_at = provider_timestamp_seconds(Some(session.started_at), occurred_at); + let ended_at = session + .ended_at + .map(|timestamp| provider_timestamp_seconds(Some(timestamp), context.imported_at)); + let content = hermes_decode_content(row.content.as_deref()); + let text = provider_value_text(&content).unwrap_or_else(|| { + row.tool_name + .as_ref() + .map(|name| format!("tool: {name}")) + .unwrap_or_else(|| format!("Hermes {}", row.role)) + }); + let event_type = hermes_event_type(&row); + let role = Some(provider_role(Some(&row.role))); + let event = native_event( + CaptureProvider::Hermes, + HERMES_SQLITE_SOURCE_FORMAT, + &provider_session_id, + row.id.max(0) as u64, + Some(format!("message:{}", row.id)), + format!("messages:id:{}", row.id), + event_type, + role, + occurred_at, + text, + json!({ + "message_id": row.id, + "role": row.role, + "content": content, + "tool_call_id": row.tool_call_id, + "tool_calls": row.tool_calls.as_deref().map(provider_json_text), + "tool_name": row.tool_name, + "reasoning": row.reasoning, + "reasoning_content": row.reasoning_content, + "reasoning_details": row.reasoning_details.as_deref().map(provider_json_text), + "codex_reasoning_items": row.codex_reasoning_items.as_deref().map(provider_json_text), + "codex_message_items": row.codex_message_items.as_deref().map(provider_json_text), + }), + json!({ + "source": "hermes_state_db", + "source_format": HERMES_SQLITE_SOURCE_FORMAT, + "message_id": row.id, + "platform_message_id": row.platform_message_id, + "token_count": row.token_count, + "finish_reason": row.finish_reason, + "observed": row.observed != 0, + "active": row.active != 0, + "compacted": row.compacted != 0, + }), + ); + result.captures.push(( + row.id.max(0) as usize, + native_provider_capture( + NativeSessionDraft { + provider: CaptureProvider::Hermes, + source_format: HERMES_SQLITE_SOURCE_FORMAT, + provider_session_id: provider_session_id.clone(), + parent_provider_session_id: session.parent_session_id.clone(), + root_provider_session_id: None, + external_agent_id: Some(session.source.clone()), + agent_type: if session.parent_session_id.is_some() { + AgentType::Subagent + } else { + AgentType::Primary + }, + role_hint: Some(session.source.clone()), + is_primary: session.parent_session_id.is_none(), + started_at, + ended_at, + cwd: session.cwd.clone(), + fidelity: Fidelity::Imported, + raw_source_path: path.display().to_string(), + trust: ProviderSourceTrust::ProviderNative, + source_metadata: json!({ + "adapter": HERMES_SQLITE_SOURCE_FORMAT, + "sqlite_user_version": user_version, + "schema_fingerprint": schema_fingerprint, + "upstream_schema_version_at_research": 17, + }), + session_metadata: json!({ + "source_format": HERMES_SQLITE_SOURCE_FORMAT, + "source": session.source, + "title": session.title, + "model": session.model, + "model_config": session.model_config.as_deref().map(provider_json_text), + "end_reason": session.end_reason, + "message_count": session.message_count, + "tool_call_count": session.tool_call_count, + "tokens": { + "input": session.input_tokens, + "output": session.output_tokens, + "cache_read": session.cache_read_tokens, + "cache_write": session.cache_write_tokens, + "reasoning": session.reasoning_tokens, + }, + "git": { + "branch": session.git_branch, + "repo_root": session.git_repo_root, + }, + "billing": { + "provider": session.billing_provider, + "base_url": session.billing_base_url, + "mode": session.billing_mode, + "estimated_cost_usd": session.estimated_cost_usd, + "actual_cost_usd": session.actual_cost_usd, + }, + "archived": session.archived != 0, + }), + }, + context, + Some(event), + ), + )); + } + + Ok(result) +} + +fn hermes_event_type(row: &HermesMessageRow) -> EventType { + if row.role == "tool" { + EventType::ToolOutput + } else if row + .tool_calls + .as_deref() + .is_some_and(|value| !value.trim().is_empty()) + || row + .tool_name + .as_deref() + .is_some_and(|value| !value.trim().is_empty()) + { + EventType::ToolCall + } else { + EventType::Message + } +} + +fn hermes_sessions(conn: &Connection) -> Result> { + if !sqlite_table_exists(conn, "sessions")? { + return Err(CaptureError::InvalidPayload( + "Hermes state.db is missing required sessions table".into(), + )); + } + let columns = sqlite_table_columns(conn, "sessions")?; + ensure_sqlite_table_columns( + &columns, + "Hermes sessions table", + &["id", "source", "started_at"], + )?; + let parent_session_id = optional_column_expr(&columns, "parent_session_id", "NULL"); + let model = optional_column_expr(&columns, "model", "NULL"); + let model_config = optional_column_expr(&columns, "model_config", "NULL"); + let ended_at = optional_column_expr(&columns, "ended_at", "NULL"); + let end_reason = optional_column_expr(&columns, "end_reason", "NULL"); + let message_count = optional_column_expr(&columns, "message_count", "0"); + let tool_call_count = optional_column_expr(&columns, "tool_call_count", "0"); + let input_tokens = optional_column_expr(&columns, "input_tokens", "0"); + let output_tokens = optional_column_expr(&columns, "output_tokens", "0"); + let cache_read_tokens = optional_column_expr(&columns, "cache_read_tokens", "0"); + let cache_write_tokens = optional_column_expr(&columns, "cache_write_tokens", "0"); + let reasoning_tokens = optional_column_expr(&columns, "reasoning_tokens", "0"); + let cwd = optional_column_expr(&columns, "cwd", "NULL"); + let git_branch = optional_column_expr(&columns, "git_branch", "NULL"); + let git_repo_root = optional_column_expr(&columns, "git_repo_root", "NULL"); + let billing_provider = optional_column_expr(&columns, "billing_provider", "NULL"); + let billing_base_url = optional_column_expr(&columns, "billing_base_url", "NULL"); + let billing_mode = optional_column_expr(&columns, "billing_mode", "NULL"); + let estimated_cost_usd = optional_column_expr(&columns, "estimated_cost_usd", "NULL"); + let actual_cost_usd = optional_column_expr(&columns, "actual_cost_usd", "NULL"); + let title = optional_column_expr(&columns, "title", "NULL"); + let archived = optional_column_expr(&columns, "archived", "0"); + let sql = format!( + "select id, source, {parent_session_id}, {model}, {model_config}, started_at, \ + {ended_at}, {end_reason}, {message_count}, {tool_call_count}, {input_tokens}, \ + {output_tokens}, {cache_read_tokens}, {cache_write_tokens}, {reasoning_tokens}, \ + {cwd}, {git_branch}, {git_repo_root}, {billing_provider}, {billing_base_url}, \ + {billing_mode}, {estimated_cost_usd}, {actual_cost_usd}, {title}, {archived} \ + from sessions order by started_at, id" + ); + let mut stmt = conn.prepare(&sql)?; + let rows = stmt.query_map([], |row| { + Ok(HermesSessionRow { + id: row.get(0)?, + source: row.get(1)?, + parent_session_id: row.get(2)?, + model: row.get(3)?, + model_config: row.get(4)?, + started_at: row.get(5)?, + ended_at: row.get(6)?, + end_reason: row.get(7)?, + message_count: row.get(8)?, + tool_call_count: row.get(9)?, + input_tokens: row.get(10)?, + output_tokens: row.get(11)?, + cache_read_tokens: row.get(12)?, + cache_write_tokens: row.get(13)?, + reasoning_tokens: row.get(14)?, + cwd: row.get(15)?, + git_branch: row.get(16)?, + git_repo_root: row.get(17)?, + billing_provider: row.get(18)?, + billing_base_url: row.get(19)?, + billing_mode: row.get(20)?, + estimated_cost_usd: row.get(21)?, + actual_cost_usd: row.get(22)?, + title: row.get(23)?, + archived: row.get(24)?, + }) + })?; + rows.collect::, _>>() + .map_err(CaptureError::from) +} + +fn hermes_messages(conn: &Connection) -> Result> { + if !sqlite_table_exists(conn, "messages")? { + return Err(CaptureError::InvalidPayload( + "Hermes state.db is missing required messages table".into(), + )); + } + let columns = sqlite_table_columns(conn, "messages")?; + ensure_sqlite_table_columns( + &columns, + "Hermes messages table", + &["id", "session_id", "role", "timestamp"], + )?; + let content = optional_column_expr(&columns, "content", "NULL"); + let tool_call_id = optional_column_expr(&columns, "tool_call_id", "NULL"); + let tool_calls = optional_column_expr(&columns, "tool_calls", "NULL"); + let tool_name = optional_column_expr(&columns, "tool_name", "NULL"); + let token_count = optional_column_expr(&columns, "token_count", "NULL"); + let finish_reason = optional_column_expr(&columns, "finish_reason", "NULL"); + let reasoning = optional_column_expr(&columns, "reasoning", "NULL"); + let reasoning_content = optional_column_expr(&columns, "reasoning_content", "NULL"); + let reasoning_details = optional_column_expr(&columns, "reasoning_details", "NULL"); + let codex_reasoning_items = optional_column_expr(&columns, "codex_reasoning_items", "NULL"); + let codex_message_items = optional_column_expr(&columns, "codex_message_items", "NULL"); + let platform_message_id = optional_column_expr(&columns, "platform_message_id", "NULL"); + let observed = optional_column_expr(&columns, "observed", "0"); + let active = optional_column_expr(&columns, "active", "1"); + let compacted = optional_column_expr(&columns, "compacted", "0"); + let visibility = if columns.contains("active") || columns.contains("compacted") { + format!("where ({active} = 1 or {compacted} = 1)") + } else { + String::new() + }; + let sql = format!( + "select id, session_id, role, {content}, {tool_call_id}, {tool_calls}, {tool_name}, \ + timestamp, {token_count}, {finish_reason}, {reasoning}, {reasoning_content}, \ + {reasoning_details}, {codex_reasoning_items}, {codex_message_items}, \ + {platform_message_id}, {observed}, {active}, {compacted} \ + from messages {visibility} order by session_id, id" + ); + let mut stmt = conn.prepare(&sql)?; + let rows = stmt.query_map([], |row| { + Ok(HermesMessageRow { + id: row.get(0)?, + session_id: row.get(1)?, + role: row.get(2)?, + content: row.get(3)?, + tool_call_id: row.get(4)?, + tool_calls: row.get(5)?, + tool_name: row.get(6)?, + timestamp: row.get(7)?, + token_count: row.get(8)?, + finish_reason: row.get(9)?, + reasoning: row.get(10)?, + reasoning_content: row.get(11)?, + reasoning_details: row.get(12)?, + codex_reasoning_items: row.get(13)?, + codex_message_items: row.get(14)?, + platform_message_id: row.get(15)?, + observed: row.get(16)?, + active: row.get(17)?, + compacted: row.get(18)?, + }) + })?; + rows.collect::, _>>() + .map_err(CaptureError::from) +} + +#[derive(Debug, Clone)] +struct NanoClawSessionRow { + id: String, + agent_group_id: String, + messaging_group_id: Option, + thread_id: Option, + agent_provider: Option, + status: Option, + container_status: Option, + last_active: Option, + created_at: Option, + agent_group_name: Option, + agent_group_folder: Option, + messaging_channel_type: Option, + messaging_platform_id: Option, + messaging_instance: Option, + messaging_name: Option, +} + +#[derive(Debug, Clone)] +struct NanoClawMessageRow { + source: &'static str, + id: String, + seq: Option, + kind: Option, + timestamp: Option, + status: Option, + in_reply_to: Option, + platform_id: Option, + channel_type: Option, + thread_id: Option, + content: Option, + trigger: Option, + source_session_id: Option, + on_wake: Option, +} + +fn normalize_nanoclaw_project( + path: &Path, + context: &ProviderAdapterContext, +) -> Result { + let project_root = nanoclaw_project_root(path)?; + let central_path = project_root.join("data").join("v2.db"); + let conn = open_provider_sqlite_readonly(¢ral_path)?; + let user_version: i64 = conn.pragma_query_value(None, "user_version", |row| row.get(0))?; + let schema_fingerprint = opencode_schema_fingerprint(&conn)?; + let sessions = nanoclaw_sessions(&conn)?; + let mut result = ProviderNormalizationResult::default(); + for session in sessions { + let session_dir = project_root + .join("data") + .join("v2-sessions") + .join(&session.agent_group_id) + .join(&session.id); + let mut messages = Vec::new(); + let inbound_path = session_dir.join("inbound.db"); + if inbound_path.is_file() { + messages.extend(nanoclaw_inbound_messages(&inbound_path)?); + } + let outbound_path = session_dir.join("outbound.db"); + if outbound_path.is_file() { + messages.extend(nanoclaw_outbound_messages(&outbound_path)?); + } + messages.sort_by_key(|message| { + ( + message.timestamp.unwrap_or_default(), + message.seq.unwrap_or_default(), + message.source, + message.id.clone(), + ) + }); + for message in messages { + let provider_session_id = format!("{}/{}", session.agent_group_id, session.id); + let occurred_at = provider_timestamp_millis(message.timestamp, context.imported_at); + let started_at = provider_timestamp_millis(session.created_at, occurred_at); + let content = message + .content + .as_deref() + .map(provider_json_text) + .unwrap_or(Value::Null); + let text = provider_value_text(&content).unwrap_or_else(|| { + format!( + "NanoClaw {}", + message.kind.as_deref().unwrap_or(message.source) + ) + }); + let event_index = nanoclaw_event_index(&message); + let role = if message.source == "inbound" { + Some(EventRole::User) + } else { + Some(EventRole::Assistant) + }; + let event = native_event( + CaptureProvider::NanoClaw, + NANOCLAW_SOURCE_FORMAT, + &provider_session_id, + event_index, + Some(format!("{}:{}", message.source, message.id)), + format!( + "{}:{}:{}", + message.source, + session.id, + message.seq.unwrap_or_default() + ), + EventType::Message, + role, + occurred_at, + text, + json!({ + "message_id": message.id, + "seq": message.seq, + "kind": message.kind, + "content": content, + "status": message.status, + "in_reply_to": message.in_reply_to, + "platform_id": message.platform_id, + "channel_type": message.channel_type, + "thread_id": message.thread_id, + "trigger": message.trigger, + "source_session_id": message.source_session_id, + "on_wake": message.on_wake, + }), + json!({ + "source": format!("nanoclaw_{}", message.source), + "source_format": NANOCLAW_SOURCE_FORMAT, + "message_id": message.id, + "seq": message.seq, + }), + ); + result.captures.push(( + event_index.min(usize::MAX as u64) as usize, + native_provider_capture( + NativeSessionDraft { + provider: CaptureProvider::NanoClaw, + source_format: NANOCLAW_SOURCE_FORMAT, + provider_session_id: provider_session_id.clone(), + parent_provider_session_id: None, + root_provider_session_id: None, + external_agent_id: session.agent_provider.clone(), + agent_type: AgentType::Primary, + role_hint: Some("container-session".to_owned()), + is_primary: true, + started_at, + ended_at: session.last_active.map(|timestamp| { + provider_timestamp_millis(Some(timestamp), context.imported_at) + }), + cwd: session.agent_group_folder.clone(), + fidelity: Fidelity::Partial, + raw_source_path: project_root.display().to_string(), + trust: ProviderSourceTrust::ProviderNative, + source_metadata: json!({ + "adapter": NANOCLAW_SOURCE_FORMAT, + "central_db": central_path.display().to_string(), + "sqlite_user_version": user_version, + "schema_fingerprint": schema_fingerprint, + "support_level": "preview", + }), + session_metadata: json!({ + "source_format": NANOCLAW_SOURCE_FORMAT, + "session_id": session.id, + "agent_group_id": session.agent_group_id, + "agent_group_name": session.agent_group_name, + "agent_provider": session.agent_provider, + "status": session.status, + "container_status": session.container_status, + "messaging_group_id": session.messaging_group_id, + "messaging": { + "channel_type": session.messaging_channel_type, + "platform_id": session.messaging_platform_id, + "instance": session.messaging_instance, + "name": session.messaging_name, + "thread_id": session.thread_id, + }, + }), + }, + context, + Some(event), + ), + )); + } + } + Ok(result) +} + +fn nanoclaw_project_root(path: &Path) -> Result { + if path.is_dir() && path.join("data").join("v2.db").is_file() { + return Ok(path.to_path_buf()); + } + if path.file_name().and_then(|name| name.to_str()) == Some("v2.db") { + if let Some(data_dir) = path.parent() { + if let Some(root) = data_dir.parent() { + return Ok(root.to_path_buf()); + } + } + } + Err(CaptureError::InvalidProviderTranscriptPath { + path: path.to_path_buf(), + reason: "NanoClaw import path must be a project root or data/v2.db", + }) +} + +fn nanoclaw_event_index(message: &NanoClawMessageRow) -> u64 { + if let Some(seq) = message.seq { + let source_bucket = if message.source == "outbound" { + 500_000 + } else { + 0 + }; + let row_bucket = fnv1a64(format!("{}:{}", message.source, message.id).as_bytes()) % 500_000; + return (seq.max(0) as u64) + .saturating_mul(1_000_000) + .saturating_add(source_bucket) + .saturating_add(row_bucket); + } + text_id_index(&format!("{}:{}", message.source, message.id), 2_000_000_000) +} + +fn nanoclaw_sessions(conn: &Connection) -> Result> { + if !sqlite_table_exists(conn, "sessions")? { + return Err(CaptureError::InvalidPayload( + "NanoClaw data/v2.db is missing required sessions table".into(), + )); + } + let columns = sqlite_table_columns(conn, "sessions")?; + ensure_sqlite_table_columns( + &columns, + "NanoClaw sessions table", + &["id", "agent_group_id"], + )?; + let messaging_group_id = optional_column_expr(&columns, "messaging_group_id", "NULL"); + let thread_id = optional_column_expr(&columns, "thread_id", "NULL"); + let agent_provider = optional_column_expr(&columns, "agent_provider", "NULL"); + let status = optional_column_expr(&columns, "status", "NULL"); + let container_status = optional_column_expr(&columns, "container_status", "NULL"); + let last_active = optional_column_expr(&columns, "last_active", "NULL"); + let created_at = optional_column_expr(&columns, "created_at", "NULL"); + let agent_group_columns = if sqlite_table_exists(conn, "agent_groups")? { + sqlite_table_columns(conn, "agent_groups")? + } else { + BTreeSet::new() + }; + let agent_group_name = + if agent_group_columns.contains("id") && agent_group_columns.contains("name") { + "(select name from agent_groups where agent_groups.id = sessions.agent_group_id)" + } else { + "NULL" + }; + let agent_group_folder = + if agent_group_columns.contains("id") && agent_group_columns.contains("folder") { + "(select folder from agent_groups where agent_groups.id = sessions.agent_group_id)" + } else { + "NULL" + }; + let (messaging_channel_type, messaging_platform_id, messaging_instance, messaging_name) = + if columns.contains("messaging_group_id") && sqlite_table_exists(conn, "messaging_groups")? + { + let messaging_columns = sqlite_table_columns(conn, "messaging_groups")?; + ( + if messaging_columns.contains("id") && messaging_columns.contains("channel_type") { + "(select channel_type from messaging_groups where messaging_groups.id = sessions.messaging_group_id)" + } else { + "NULL" + }, + if messaging_columns.contains("id") && messaging_columns.contains("platform_id") { + "(select platform_id from messaging_groups where messaging_groups.id = sessions.messaging_group_id)" + } else { + "NULL" + }, + if messaging_columns.contains("id") && messaging_columns.contains("instance") { + "(select instance from messaging_groups where messaging_groups.id = sessions.messaging_group_id)" + } else { + "NULL" + }, + if messaging_columns.contains("id") && messaging_columns.contains("name") { + "(select name from messaging_groups where messaging_groups.id = sessions.messaging_group_id)" + } else { + "NULL" + }, + ) + } else { + ("NULL", "NULL", "NULL", "NULL") + }; + let sql = format!( + "select id, agent_group_id, {messaging_group_id}, {thread_id}, {agent_provider}, \ + {status}, {container_status}, {last_active}, {created_at}, {agent_group_name}, \ + {agent_group_folder}, {messaging_channel_type}, {messaging_platform_id}, \ + {messaging_instance}, {messaging_name} from sessions order by created_at, id" + ); + let mut stmt = conn.prepare(&sql)?; + let rows = stmt.query_map([], |row| { + Ok(NanoClawSessionRow { + id: row.get(0)?, + agent_group_id: row.get(1)?, + messaging_group_id: row.get(2)?, + thread_id: row.get(3)?, + agent_provider: row.get(4)?, + status: row.get(5)?, + container_status: row.get(6)?, + last_active: row.get(7)?, + created_at: row.get(8)?, + agent_group_name: row.get(9)?, + agent_group_folder: row.get(10)?, + messaging_channel_type: row.get(11)?, + messaging_platform_id: row.get(12)?, + messaging_instance: row.get(13)?, + messaging_name: row.get(14)?, + }) + })?; + rows.collect::, _>>() + .map_err(CaptureError::from) +} + +fn nanoclaw_inbound_messages(path: &Path) -> Result> { + let conn = open_provider_sqlite_readonly(path)?; + if !sqlite_table_exists(&conn, "messages_in")? { + return Ok(Vec::new()); + } + let columns = sqlite_table_columns(&conn, "messages_in")?; + ensure_sqlite_table_columns(&columns, "NanoClaw inbound messages table", &["id"])?; + let seq = optional_column_expr(&columns, "seq", "NULL"); + let kind = optional_column_expr(&columns, "kind", "NULL"); + let timestamp = optional_column_expr(&columns, "timestamp", "NULL"); + let status = optional_column_expr(&columns, "status", "NULL"); + let trigger = optional_column_expr(&columns, "trigger", "NULL"); + let platform_id = optional_column_expr(&columns, "platform_id", "NULL"); + let channel_type = optional_column_expr(&columns, "channel_type", "NULL"); + let thread_id = optional_column_expr(&columns, "thread_id", "NULL"); + let content = optional_column_expr(&columns, "content", "NULL"); + let source_session_id = optional_column_expr(&columns, "source_session_id", "NULL"); + let on_wake = optional_column_expr(&columns, "on_wake", "NULL"); + let sql = format!( + "select id, {seq}, {kind}, {timestamp}, {status}, {trigger}, {platform_id}, \ + {channel_type}, {thread_id}, {content}, {source_session_id}, {on_wake} \ + from messages_in order by {seq}, id" + ); + let mut stmt = conn.prepare(&sql)?; + let rows = stmt.query_map([], |row| { + Ok(NanoClawMessageRow { + source: "inbound", + id: row.get(0)?, + seq: row.get(1)?, + kind: row.get(2)?, + timestamp: row.get(3)?, + status: row.get(4)?, + trigger: row.get(5)?, + platform_id: row.get(6)?, + channel_type: row.get(7)?, + thread_id: row.get(8)?, + content: row.get(9)?, + source_session_id: row.get(10)?, + on_wake: row.get(11)?, + in_reply_to: None, + }) + })?; + rows.collect::, _>>() + .map_err(CaptureError::from) +} + +fn nanoclaw_outbound_messages(path: &Path) -> Result> { + let conn = open_provider_sqlite_readonly(path)?; + if !sqlite_table_exists(&conn, "messages_out")? { + return Ok(Vec::new()); + } + let columns = sqlite_table_columns(&conn, "messages_out")?; + ensure_sqlite_table_columns(&columns, "NanoClaw outbound messages table", &["id"])?; + let seq = optional_column_expr(&columns, "seq", "NULL"); + let kind = optional_column_expr(&columns, "kind", "NULL"); + let timestamp = optional_column_expr(&columns, "timestamp", "NULL"); + let in_reply_to = optional_column_expr(&columns, "in_reply_to", "NULL"); + let platform_id = optional_column_expr(&columns, "platform_id", "NULL"); + let channel_type = optional_column_expr(&columns, "channel_type", "NULL"); + let thread_id = optional_column_expr(&columns, "thread_id", "NULL"); + let content = optional_column_expr(&columns, "content", "NULL"); + let sql = format!( + "select id, {seq}, {kind}, {timestamp}, {in_reply_to}, {platform_id}, \ + {channel_type}, {thread_id}, {content} from messages_out order by {seq}, id" + ); + let mut stmt = conn.prepare(&sql)?; + let rows = stmt.query_map([], |row| { + Ok(NanoClawMessageRow { + source: "outbound", + id: row.get(0)?, + seq: row.get(1)?, + kind: row.get(2)?, + timestamp: row.get(3)?, + in_reply_to: row.get(4)?, + platform_id: row.get(5)?, + channel_type: row.get(6)?, + thread_id: row.get(7)?, + content: row.get(8)?, + status: None, + trigger: None, + source_session_id: None, + on_wake: None, + }) + })?; + rows.collect::, _>>() + .map_err(CaptureError::from) +} + +#[derive(Debug, Clone)] +struct AstrBotConversationRow { + row_id: i64, + inner_conversation_id: Option, + conversation_id: String, + platform_id: Option, + user_id: Option, + content: String, + title: Option, + persona_id: Option, + token_usage: Option, + created_at: Option, + updated_at: Option, +} + +#[derive(Debug, Clone)] +struct AstrBotPlatformMessageRow { + id: i64, + platform_id: Option, + user_id: Option, + sender_id: Option, + sender_name: Option, + content: Option, + llm_checkpoint_id: Option, + created_at: Option, +} + +fn normalize_astrbot_sqlite( + path: &Path, + context: &ProviderAdapterContext, +) -> Result { + let conn = open_provider_sqlite_readonly(path)?; + let user_version: i64 = conn.pragma_query_value(None, "user_version", |row| row.get(0))?; + let schema_fingerprint = opencode_schema_fingerprint(&conn)?; + let conversations = astrbot_conversations(&conn)?; + let platform_messages = astrbot_platform_messages(&conn)?; + let selected_conversation = astrbot_selected_conversation(&conn).ok().flatten(); + let mut result = ProviderNormalizationResult::default(); + let mut checkpoint_sessions = BTreeMap::::new(); + + for conversation in &conversations { + let provider_session_id = astrbot_provider_session_id(conversation); + let started_at = provider_timestamp_millis(conversation.created_at, context.imported_at); + let ended_at = conversation + .updated_at + .map(|timestamp| provider_timestamp_millis(Some(timestamp), context.imported_at)); + let content = provider_json_text(&conversation.content); + if let Value::Array(items) = &content { + for (index, item) in items.iter().enumerate() { + if let Some(checkpoint) = astrbot_checkpoint_id(item) { + checkpoint_sessions.insert(checkpoint, provider_session_id.clone()); + continue; + } + let role = astrbot_role(item); + let text = astrbot_item_text(item) + .unwrap_or_else(|| "AstrBot conversation item".to_owned()); + let event = native_event( + CaptureProvider::AstrBot, + ASTRBOT_SQLITE_SOURCE_FORMAT, + &provider_session_id, + index as u64, + astrbot_item_id(item).map(|id| format!("conversation:{id}")), + format!("conversation:{}:item:{index}", conversation.conversation_id), + EventType::Message, + role, + started_at, + text, + item.clone(), + json!({ + "source": "astrbot_conversations", + "source_format": ASTRBOT_SQLITE_SOURCE_FORMAT, + "conversation_id": conversation.conversation_id, + "inner_conversation_id": conversation.inner_conversation_id, + "item_index": index, + }), + ); + result.captures.push(( + index + 1, + astrbot_capture( + conversation, + &provider_session_id, + started_at, + ended_at, + path, + context, + user_version, + &schema_fingerprint, + selected_conversation.as_deref(), + Some(event), + ), + )); + } + } else { + let text = + provider_value_text(&content).unwrap_or_else(|| "AstrBot conversation".to_owned()); + let event = native_event( + CaptureProvider::AstrBot, + ASTRBOT_SQLITE_SOURCE_FORMAT, + &provider_session_id, + 0, + Some(format!("conversation-row:{}", conversation.row_id)), + format!("conversation:{}:content", conversation.conversation_id), + EventType::Message, + None, + started_at, + text, + content.clone(), + json!({ + "source": "astrbot_conversations", + "source_format": ASTRBOT_SQLITE_SOURCE_FORMAT, + "conversation_id": conversation.conversation_id, + }), + ); + result.captures.push(( + conversation.row_id.max(0) as usize, + astrbot_capture( + conversation, + &provider_session_id, + started_at, + ended_at, + path, + context, + user_version, + &schema_fingerprint, + selected_conversation.as_deref(), + Some(event), + ), + )); + } + } + + let conversations_by_id = conversations + .iter() + .map(|conversation| (astrbot_provider_session_id(conversation), conversation)) + .collect::>(); + for message in platform_messages { + let provider_session_id = message + .llm_checkpoint_id + .as_ref() + .and_then(|checkpoint| checkpoint_sessions.get(checkpoint)) + .cloned() + .unwrap_or_else(|| { + format!( + "platform/{}/{}", + message.platform_id.as_deref().unwrap_or("unknown"), + message.user_id.as_deref().unwrap_or("unknown") + ) + }); + let conversation = conversations_by_id.get(&provider_session_id).copied(); + let started_at = conversation + .and_then(|conversation| conversation.created_at) + .map(|timestamp| provider_timestamp_millis(Some(timestamp), context.imported_at)) + .unwrap_or_else(|| provider_timestamp_millis(message.created_at, context.imported_at)); + let content = message + .content + .as_deref() + .map(provider_json_text) + .unwrap_or(Value::Null); + let text = + provider_value_text(&content).unwrap_or_else(|| "AstrBot platform message".to_owned()); + let role = if message.sender_id.as_deref() == message.user_id.as_deref() { + Some(EventRole::User) + } else { + Some(EventRole::Assistant) + }; + let event_index = 1_000_000u64.saturating_add(message.id.max(0) as u64); + let event = native_event( + CaptureProvider::AstrBot, + ASTRBOT_SQLITE_SOURCE_FORMAT, + &provider_session_id, + event_index, + Some(format!("platform-message:{}", message.id)), + format!("platform_message_history:id:{}", message.id), + EventType::Message, + role, + provider_timestamp_millis(message.created_at, started_at), + text, + json!({ + "message_id": message.id, + "platform_id": message.platform_id, + "user_id": message.user_id, + "sender_id": message.sender_id, + "sender_name": message.sender_name, + "content": content, + "llm_checkpoint_id": message.llm_checkpoint_id, + }), + json!({ + "source": "astrbot_platform_message_history", + "source_format": ASTRBOT_SQLITE_SOURCE_FORMAT, + "message_id": message.id, + }), + ); + if let Some(conversation) = conversation { + result.captures.push(( + event_index.min(usize::MAX as u64) as usize, + astrbot_capture( + conversation, + &provider_session_id, + started_at, + conversation.updated_at.map(|timestamp| { + provider_timestamp_millis(Some(timestamp), context.imported_at) + }), + path, + context, + user_version, + &schema_fingerprint, + selected_conversation.as_deref(), + Some(event), + ), + )); + } else { + result.captures.push(( + event_index.min(usize::MAX as u64) as usize, + native_provider_capture( + NativeSessionDraft { + provider: CaptureProvider::AstrBot, + source_format: ASTRBOT_SQLITE_SOURCE_FORMAT, + provider_session_id: provider_session_id.clone(), + parent_provider_session_id: None, + root_provider_session_id: None, + external_agent_id: message.platform_id.clone(), + agent_type: AgentType::Primary, + role_hint: Some("platform-history".to_owned()), + is_primary: true, + started_at, + ended_at: None, + cwd: None, + fidelity: Fidelity::Partial, + raw_source_path: path.display().to_string(), + trust: ProviderSourceTrust::ProviderNative, + source_metadata: json!({ + "adapter": ASTRBOT_SQLITE_SOURCE_FORMAT, + "sqlite_user_version": user_version, + "schema_fingerprint": schema_fingerprint, + "support_level": "preview", + }), + session_metadata: json!({ + "source_format": ASTRBOT_SQLITE_SOURCE_FORMAT, + "platform_id": message.platform_id, + "user_id": message.user_id, + "fidelity_gap": "platform history row was not linked to a conversations checkpoint", + }), + }, + context, + Some(event), + ), + )); + } + } + + Ok(result) +} + +fn astrbot_provider_session_id(conversation: &AstrBotConversationRow) -> String { + conversation + .inner_conversation_id + .as_ref() + .or(Some(&conversation.conversation_id)) + .cloned() + .unwrap_or_else(|| format!("conversation-row-{}", conversation.row_id)) +} + +fn astrbot_capture( + conversation: &AstrBotConversationRow, + provider_session_id: &str, + started_at: DateTime, + ended_at: Option>, + path: &Path, + context: &ProviderAdapterContext, + user_version: i64, + schema_fingerprint: &str, + selected_conversation: Option<&str>, + event: Option, +) -> ProviderCaptureEnvelope { + native_provider_capture( + NativeSessionDraft { + provider: CaptureProvider::AstrBot, + source_format: ASTRBOT_SQLITE_SOURCE_FORMAT, + provider_session_id: provider_session_id.to_owned(), + parent_provider_session_id: None, + root_provider_session_id: None, + external_agent_id: conversation.platform_id.clone(), + agent_type: AgentType::Primary, + role_hint: Some("llm-context".to_owned()), + is_primary: true, + started_at, + ended_at, + cwd: None, + fidelity: Fidelity::Partial, + raw_source_path: path.display().to_string(), + trust: ProviderSourceTrust::ProviderNative, + source_metadata: json!({ + "adapter": ASTRBOT_SQLITE_SOURCE_FORMAT, + "sqlite_user_version": user_version, + "schema_fingerprint": schema_fingerprint, + "support_level": "preview", + }), + session_metadata: json!({ + "source_format": ASTRBOT_SQLITE_SOURCE_FORMAT, + "conversation_id": conversation.conversation_id, + "inner_conversation_id": conversation.inner_conversation_id, + "platform_id": conversation.platform_id, + "user_id": conversation.user_id, + "title": conversation.title, + "persona_id": conversation.persona_id, + "token_usage": conversation.token_usage.as_deref().map(provider_json_text), + "selected_conversation": selected_conversation, + "fidelity_gap": "AstrBot preview imports local LLM context plus available platform history; it may not be a complete raw IM transcript", + }), + }, + context, + event, + ) +} + +fn astrbot_item_id(item: &Value) -> Option<&str> { + item.get("id") + .or_else(|| item.get("message_id")) + .or_else(|| item.get("checkpoint_id")) + .and_then(Value::as_str) +} + +fn astrbot_checkpoint_id(item: &Value) -> Option { + let item_type = item + .get("type") + .or_else(|| item.get("role")) + .and_then(Value::as_str)?; + if item_type != "_checkpoint" && item_type != "checkpoint" { + return None; + } + astrbot_item_id(item).map(str::to_owned) +} + +fn astrbot_role(item: &Value) -> Option { + item.get("role") + .or_else(|| item.get("type")) + .and_then(Value::as_str) + .map(|role| provider_role(Some(role))) +} + +fn astrbot_item_text(item: &Value) -> Option { + item.get("content") + .or_else(|| item.get("text")) + .or_else(|| item.get("message")) + .and_then(provider_value_text) +} + +fn astrbot_conversations(conn: &Connection) -> Result> { + if !sqlite_table_exists(conn, "conversations")? { + return Err(CaptureError::InvalidPayload( + "AstrBot data_v4.db is missing required conversations table".into(), + )); + } + let columns = sqlite_table_columns(conn, "conversations")?; + ensure_sqlite_table_columns(&columns, "AstrBot conversations table", &["content"])?; + let row_id = if columns.contains("id") { + "id" + } else { + "rowid" + }; + let inner_conversation_id = optional_column_expr(&columns, "inner_conversation_id", "NULL"); + let conversation_id = optional_column_expr( + &columns, + "conversation_id", + optional_column_expr(&columns, "inner_conversation_id", "CAST(rowid AS TEXT)"), + ); + let platform_id = optional_column_expr(&columns, "platform_id", "NULL"); + let user_id = optional_column_expr(&columns, "user_id", "NULL"); + let title = optional_column_expr(&columns, "title", "NULL"); + let persona_id = optional_column_expr(&columns, "persona_id", "NULL"); + let token_usage = optional_column_expr(&columns, "token_usage", "NULL"); + let created_at = optional_column_expr(&columns, "created_at", "NULL"); + let updated_at = optional_column_expr(&columns, "updated_at", "NULL"); + let sql = format!( + "select {row_id}, {inner_conversation_id}, {conversation_id}, {platform_id}, \ + {user_id}, content, {title}, {persona_id}, {token_usage}, {created_at}, \ + {updated_at} from conversations order by {created_at}, {row_id}" + ); + let mut stmt = conn.prepare(&sql)?; + let rows = stmt.query_map([], |row| { + Ok(AstrBotConversationRow { + row_id: row.get(0)?, + inner_conversation_id: row.get(1)?, + conversation_id: row.get::<_, String>(2)?, + platform_id: row.get(3)?, + user_id: row.get(4)?, + content: row.get(5)?, + title: row.get(6)?, + persona_id: row.get(7)?, + token_usage: row.get(8)?, + created_at: row.get(9)?, + updated_at: row.get(10)?, + }) + })?; + rows.collect::, _>>() + .map_err(CaptureError::from) +} + +fn astrbot_platform_messages(conn: &Connection) -> Result> { + if !sqlite_table_exists(conn, "platform_message_history")? { + return Ok(Vec::new()); + } + let columns = sqlite_table_columns(conn, "platform_message_history")?; + let id = if columns.contains("id") { + "id" + } else { + "rowid" + }; + let platform_id = optional_column_expr(&columns, "platform_id", "NULL"); + let user_id = optional_column_expr(&columns, "user_id", "NULL"); + let sender_id = optional_column_expr(&columns, "sender_id", "NULL"); + let sender_name = optional_column_expr(&columns, "sender_name", "NULL"); + let content = optional_column_expr(&columns, "content", "NULL"); + let llm_checkpoint_id = optional_column_expr(&columns, "llm_checkpoint_id", "NULL"); + let created_at = optional_column_expr(&columns, "created_at", "NULL"); + let sql = format!( + "select {id}, {platform_id}, {user_id}, {sender_id}, {sender_name}, \ + {content}, {llm_checkpoint_id}, {created_at} from platform_message_history \ + order by {created_at}, {id}" + ); + let mut stmt = conn.prepare(&sql)?; + let rows = stmt.query_map([], |row| { + Ok(AstrBotPlatformMessageRow { + id: row.get(0)?, + platform_id: row.get(1)?, + user_id: row.get(2)?, + sender_id: row.get(3)?, + sender_name: row.get(4)?, + content: row.get(5)?, + llm_checkpoint_id: row.get(6)?, + created_at: row.get(7)?, + }) + })?; + rows.collect::, _>>() + .map_err(CaptureError::from) +} + +fn astrbot_selected_conversation(conn: &Connection) -> Result> { + if !sqlite_table_exists(conn, "preferences")? { + return Ok(None); + } + let columns = sqlite_table_columns(conn, "preferences")?; + if !columns.contains("key") || !columns.contains("value") { + return Ok(None); + } + let scope_filter = if columns.contains("scope") { + "AND scope = 'umo'" + } else { + "" + }; + let sql = + format!("select value from preferences where key = 'sel_conv_id' {scope_filter} limit 1"); + let value = conn + .query_row(&sql, [], |row| row.get::<_, Option>(0)) + .optional()? + .flatten(); + Ok(value) +} + fn normalize_opencode_sqlite( path: &Path, context: &ProviderAdapterContext, diff --git a/crates/ctx-history-capture/src/provider_sources.rs b/crates/ctx-history-capture/src/provider_sources.rs index de5e75df..2c3ca174 100644 --- a/crates/ctx-history-capture/src/provider_sources.rs +++ b/crates/ctx-history-capture/src/provider_sources.rs @@ -1,4 +1,8 @@ -use std::path::{Path, PathBuf}; +use std::{ + collections::HashSet, + env, + path::{Path, PathBuf}, +}; use ctx_history_core::{CaptureProvider, ProviderRawRetention, ProviderRedactionBoundary}; @@ -11,9 +15,20 @@ pub enum ProviderSourceKind { #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub enum ProviderImportSupport { Native, + Preview, Unsupported, } +impl ProviderImportSupport { + pub fn is_importable(self) -> bool { + matches!(self, Self::Native | Self::Preview) + } + + pub fn is_auto_importable(self) -> bool { + matches!(self, Self::Native) + } +} + #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub enum ProviderCatalogSupport { Native, @@ -136,6 +151,38 @@ const FACTORY_DROID_DEFAULTS: &[ProviderDefaultLocation] = &[ProviderDefaultLoca source_kind: ProviderSourceKind::NativeHistory, }]; +const OPENCLAW_DEFAULTS: &[ProviderDefaultLocation] = &[ + ProviderDefaultLocation { + path_components: &[".openclaw"], + source_format: "openclaw_session_jsonl_tree", + source_kind: ProviderSourceKind::NativeHistory, + }, + ProviderDefaultLocation { + path_components: &[".clawdbot"], + source_format: "openclaw_session_jsonl_tree", + source_kind: ProviderSourceKind::NativeHistory, + }, + ProviderDefaultLocation { + path_components: &[".moltbot"], + source_format: "openclaw_session_jsonl_tree", + source_kind: ProviderSourceKind::NativeHistory, + }, +]; + +const HERMES_DEFAULTS: &[ProviderDefaultLocation] = &[ProviderDefaultLocation { + path_components: &[".hermes", "state.db"], + source_format: "hermes_state_sqlite", + source_kind: ProviderSourceKind::NativeHistory, +}]; + +const NANOCLAW_DEFAULTS: &[ProviderDefaultLocation] = &[]; + +const ASTRBOT_DEFAULTS: &[ProviderDefaultLocation] = &[ProviderDefaultLocation { + path_components: &[".astrbot", "data", "data_v4.db"], + source_format: "astrbot_data_v4_sqlite", + source_kind: ProviderSourceKind::NativeHistory, +}]; + const PROVIDER_SPECS: &[ProviderSourceSpec] = &[ ProviderSourceSpec { provider: CaptureProvider::Codex, @@ -227,6 +274,46 @@ const PROVIDER_SPECS: &[ProviderSourceSpec] = &[ redaction_boundary: ProviderRedactionBoundary::BeforeExport, unsupported_reason: None, }, + ProviderSourceSpec { + provider: CaptureProvider::OpenClaw, + display_name: "OpenClaw", + default_locations: OPENCLAW_DEFAULTS, + import_support: ProviderImportSupport::Native, + catalog_support: ProviderCatalogSupport::None, + raw_retention: ProviderRawRetention::PathReference, + redaction_boundary: ProviderRedactionBoundary::BeforeExport, + unsupported_reason: None, + }, + ProviderSourceSpec { + provider: CaptureProvider::Hermes, + display_name: "Hermes Agent", + default_locations: HERMES_DEFAULTS, + import_support: ProviderImportSupport::Native, + catalog_support: ProviderCatalogSupport::None, + raw_retention: ProviderRawRetention::PathReference, + redaction_boundary: ProviderRedactionBoundary::BeforeExport, + unsupported_reason: None, + }, + ProviderSourceSpec { + provider: CaptureProvider::NanoClaw, + display_name: "NanoClaw", + default_locations: NANOCLAW_DEFAULTS, + import_support: ProviderImportSupport::Preview, + catalog_support: ProviderCatalogSupport::None, + raw_retention: ProviderRawRetention::PathReference, + redaction_boundary: ProviderRedactionBoundary::BeforeExport, + unsupported_reason: None, + }, + ProviderSourceSpec { + provider: CaptureProvider::AstrBot, + display_name: "AstrBot", + default_locations: ASTRBOT_DEFAULTS, + import_support: ProviderImportSupport::Preview, + catalog_support: ProviderCatalogSupport::None, + raw_retention: ProviderRawRetention::PathReference, + redaction_boundary: ProviderRedactionBoundary::BeforeExport, + unsupported_reason: None, + }, ]; pub fn provider_source_specs() -> &'static [ProviderSourceSpec] { @@ -238,39 +325,142 @@ pub fn provider_source_spec(provider: CaptureProvider) -> Option<&'static Provid } pub fn discover_provider_sources(home: &Path) -> Vec { - PROVIDER_SPECS - .iter() - .flat_map(|spec| { - spec.default_locations.iter().map(|location| { - let path = location - .path_components - .iter() - .fold(home.to_path_buf(), |path, component| path.join(component)); - provider_source_from_location(spec, location, path) - }) - }) - .collect() + dedupe_sources( + PROVIDER_SPECS + .iter() + .flat_map(|spec| discover_provider_sources_for_spec(home, spec)) + .collect(), + ) } pub fn discover_provider_sources_for_provider( home: &Path, provider: CaptureProvider, ) -> Vec { - PROVIDER_SPECS + dedupe_sources( + PROVIDER_SPECS + .iter() + .filter(|spec| spec.provider == provider) + .flat_map(|spec| discover_provider_sources_for_spec(home, spec)) + .collect(), + ) +} + +fn discover_provider_sources_for_spec( + home: &Path, + spec: &ProviderSourceSpec, +) -> Vec { + let mut sources = spec + .default_locations .iter() - .filter(|spec| spec.provider == provider) - .flat_map(|spec| { - spec.default_locations.iter().map(|location| { - let path = location - .path_components - .iter() - .fold(home.to_path_buf(), |path, component| path.join(component)); - provider_source_from_location(spec, location, path) - }) + .map(|location| { + let path = location + .path_components + .iter() + .fold(home.to_path_buf(), |path, component| path.join(component)); + provider_source_from_location(spec, location, path) }) + .collect::>(); + + match spec.provider { + CaptureProvider::OpenClaw => { + if let Some(path) = env_path("OPENCLAW_STATE_DIR") { + sources.push(provider_source_from_parts( + spec, + path, + "openclaw_session_jsonl_tree", + ProviderSourceKind::NativeHistory, + )); + } + } + CaptureProvider::Hermes => { + if let Some(path) = env_path("HERMES_HOME") { + sources.push(provider_source_from_parts( + spec, + path.join("state.db"), + "hermes_state_sqlite", + ProviderSourceKind::NativeHistory, + )); + } + } + CaptureProvider::NanoClaw => { + for root in current_dir_ancestors_with(|candidate| { + candidate.join("data").join("v2.db").is_file() + && candidate.join("data").join("v2-sessions").is_dir() + }) { + sources.push(provider_source_from_parts( + spec, + root, + "nanoclaw_project", + ProviderSourceKind::NativeHistory, + )); + } + } + CaptureProvider::AstrBot => { + if let Some(path) = env_path("ASTRBOT_ROOT") { + sources.push(provider_source_from_parts( + spec, + path.join("data").join("data_v4.db"), + "astrbot_data_v4_sqlite", + ProviderSourceKind::NativeHistory, + )); + } + for root in current_dir_ancestors_with(|candidate| { + candidate.join("data").join("data_v4.db").is_file() + }) { + sources.push(provider_source_from_parts( + spec, + root.join("data").join("data_v4.db"), + "astrbot_data_v4_sqlite", + ProviderSourceKind::NativeHistory, + )); + } + } + _ => {} + } + + sources +} + +fn env_path(name: &str) -> Option { + env::var_os(name) + .filter(|value| !value.is_empty()) + .map(PathBuf::from) +} + +fn current_dir_ancestors_with(matches: impl Fn(&Path) -> bool) -> Vec { + let Ok(current_dir) = env::current_dir() else { + return Vec::new(); + }; + current_dir + .ancestors() + .filter(|candidate| matches(candidate)) + .map(Path::to_path_buf) + .collect() +} + +fn dedupe_sources(sources: Vec) -> Vec { + let mut seen = HashSet::new(); + sources + .into_iter() + .filter(|source| seen.insert((source.provider, source.path.clone(), source.source_format))) .collect() } +fn provider_source_from_parts( + spec: &ProviderSourceSpec, + path: PathBuf, + source_format: &'static str, + source_kind: ProviderSourceKind, +) -> ProviderSource { + let location = ProviderDefaultLocation { + path_components: &[], + source_format, + source_kind, + }; + provider_source_from_location(spec, &location, path) +} + pub fn provider_source_for_path(provider: CaptureProvider, path: PathBuf) -> ProviderSource { let unknown_spec = ProviderSourceSpec { provider, @@ -307,10 +497,20 @@ pub fn provider_source_for_path(provider: CaptureProvider, path: PathBuf) -> Pro CaptureProvider::Cursor => "cursor_agent_transcript_jsonl_tree", CaptureProvider::CopilotCli => "copilot_cli_session_events_jsonl", CaptureProvider::FactoryAiDroid => "factory_ai_droid_sessions_jsonl", + CaptureProvider::OpenClaw => "openclaw_session_jsonl_tree", + CaptureProvider::Hermes => "hermes_state_sqlite", + CaptureProvider::NanoClaw => { + if path.file_name().and_then(|name| name.to_str()) == Some("v2.db") { + "nanoclaw_project" + } else { + "nanoclaw_project" + } + } + CaptureProvider::AstrBot => "astrbot_data_v4_sqlite", _ => "unsupported", }; let explicit_import_support = spec.import_support; - let source_kind = if matches!(explicit_import_support, ProviderImportSupport::Native) { + let source_kind = if explicit_import_support.is_importable() { ProviderSourceKind::NativeHistory } else { ProviderSourceKind::DetectionOnly @@ -397,6 +597,14 @@ fn empty_source_reason(provider: CaptureProvider) -> Option<&'static str> { CaptureProvider::FactoryAiDroid => { Some("path exists but no Factory AI Droid session JSONL files were found") } + CaptureProvider::OpenClaw => { + Some("path exists but no OpenClaw agent session JSONL files were found") + } + CaptureProvider::Hermes => Some("path exists but no Hermes state.db file was found"), + CaptureProvider::NanoClaw => { + Some("path exists but no NanoClaw data/v2.db and data/v2-sessions store was found") + } + CaptureProvider::AstrBot => Some("path exists but no AstrBot data/data_v4.db was found"), _ => None, } } @@ -424,6 +632,9 @@ fn unknown_source_reason(provider: CaptureProvider) -> Option<&'static str> { CaptureProvider::FactoryAiDroid => { Some("path exists but the Factory AI Droid transcript probe hit its scan budget") } + CaptureProvider::OpenClaw => { + Some("path exists but the OpenClaw transcript probe hit its scan budget") + } _ => None, } } @@ -441,6 +652,10 @@ fn default_location_import_probe( CaptureProvider::Pi => BoundedProbe::from_bool(path.is_file()), CaptureProvider::OpenCode => BoundedProbe::from_bool(path.is_file()), CaptureProvider::Claude => has_jsonl_file_under_matching(path, 10_000, |_| true), + CaptureProvider::OpenClaw => has_openclaw_session_jsonl(path, 10_000), + CaptureProvider::Hermes => BoundedProbe::from_bool(path.is_file()), + CaptureProvider::NanoClaw => has_nanoclaw_project(path), + CaptureProvider::AstrBot => BoundedProbe::from_bool(path.is_file()), CaptureProvider::Antigravity => has_jsonl_file_under_matching(path, 10_000, |candidate| { matches!( candidate.file_name().and_then(|name| name.to_str()), @@ -467,6 +682,29 @@ fn has_gemini_chat_jsonl(root: &Path, max_entries: usize) -> BoundedProbe { has_jsonl_file_under_matching(&tmp, max_entries, |path| path_has_component(path, "chats")) } +fn has_openclaw_session_jsonl(root: &Path, max_entries: usize) -> BoundedProbe { + if root.is_file() { + return BoundedProbe::from_bool( + root.extension().and_then(|ext| ext.to_str()) == Some("jsonl"), + ); + } + let agents = root.join("agents"); + if agents.is_dir() { + return has_jsonl_file_under_matching(&agents, max_entries, |path| { + path_has_component(path, "sessions") + }); + } + has_jsonl_file_under_matching(root, max_entries, |path| { + path_has_component(path, "sessions") + }) +} + +fn has_nanoclaw_project(root: &Path) -> BoundedProbe { + BoundedProbe::from_bool( + root.join("data").join("v2.db").is_file() && root.join("data").join("v2-sessions").is_dir(), + ) +} + #[derive(Debug, Clone, Copy, PartialEq, Eq)] enum BoundedProbe { Found, @@ -656,6 +894,45 @@ mod tests { CaptureProvider::CopilotCli, ProviderSourceStatus::Available, ); + + let openclaw = temp.path().join(".openclaw/agents/personal/sessions"); + std::fs::create_dir_all(&openclaw).unwrap(); + assert_source_status( + temp.path(), + CaptureProvider::OpenClaw, + ProviderSourceStatus::Empty, + ); + std::fs::write(openclaw.join("session.jsonl"), "{}\n").unwrap(); + assert_source_status( + temp.path(), + CaptureProvider::OpenClaw, + ProviderSourceStatus::Available, + ); + + let hermes = temp.path().join(".hermes"); + std::fs::create_dir_all(&hermes).unwrap(); + std::fs::write(hermes.join("state.db"), b"sqlite fixture marker").unwrap(); + let hermes_source = discover_provider_sources(temp.path()) + .into_iter() + .find(|source| source.provider == CaptureProvider::Hermes) + .unwrap(); + assert_eq!(hermes_source.status, ProviderSourceStatus::Available); + assert_eq!(hermes_source.import_support, ProviderImportSupport::Native); + + let astrbot = temp.path().join(".astrbot/data"); + std::fs::create_dir_all(&astrbot).unwrap(); + std::fs::write(astrbot.join("data_v4.db"), b"sqlite fixture marker").unwrap(); + let astrbot_source = discover_provider_sources(temp.path()) + .into_iter() + .find(|source| source.provider == CaptureProvider::AstrBot) + .unwrap(); + assert_eq!(astrbot_source.status, ProviderSourceStatus::Available); + assert_eq!( + astrbot_source.import_support, + ProviderImportSupport::Preview + ); + assert!(astrbot_source.import_support.is_importable()); + assert!(!astrbot_source.import_support.is_auto_importable()); } #[test] diff --git a/crates/ctx-history-core/src/lib.rs b/crates/ctx-history-core/src/lib.rs index 8e58d271..cac38f6a 100644 --- a/crates/ctx-history-core/src/lib.rs +++ b/crates/ctx-history-core/src/lib.rs @@ -176,6 +176,10 @@ text_enum! { Cursor => "cursor", CopilotCli => "copilot_cli", FactoryAiDroid => "factory_ai_droid", + OpenClaw => "openclaw", + Hermes => "hermes", + NanoClaw => "nanoclaw", + AstrBot => "astrbot", Shell => "shell", Git => "git", Jj => "jj", diff --git a/crates/ctx-history-core/src/provider.rs b/crates/ctx-history-core/src/provider.rs index c849292e..5010d3b6 100644 --- a/crates/ctx-history-core/src/provider.rs +++ b/crates/ctx-history-core/src/provider.rs @@ -52,6 +52,13 @@ pub enum ProviderId { FactoryAiDroid, FactoryDroid, DroidFactoryAi, + #[serde(rename = "openclaw", alias = "open_claw")] + OpenClaw, + Hermes, + #[serde(rename = "nanoclaw", alias = "nano_claw")] + NanoClaw, + #[serde(rename = "astrbot", alias = "astr_bot")] + AstrBot, Goose, #[serde(rename = "openhands")] OpenHands, @@ -69,7 +76,7 @@ pub enum ProviderId { } impl ProviderId { - pub const ALL: [Self; 27] = [ + pub const ALL: [Self; 31] = [ Self::Codex, Self::ClaudeCode, Self::ClaudeCliCrp, @@ -84,6 +91,10 @@ impl ProviderId { Self::FactoryAiDroid, Self::FactoryDroid, Self::DroidFactoryAi, + Self::OpenClaw, + Self::Hermes, + Self::NanoClaw, + Self::AstrBot, Self::Goose, Self::OpenHands, Self::Cagent, @@ -393,13 +404,17 @@ mod tests { .collect::>(); let expected = [ ProviderId::AntigravityCli, + ProviderId::AstrBot, ProviderId::ClaudeCode, ProviderId::Codex, ProviderId::Cursor, ProviderId::CopilotCli, ProviderId::FactoryAiDroid, ProviderId::GeminiCli, + ProviderId::Hermes, + ProviderId::NanoClaw, ProviderId::OpenCode, + ProviderId::OpenClaw, ProviderId::Pi, ] .into_iter() diff --git a/crates/ctx-history-store/src/lib.rs b/crates/ctx-history-store/src/lib.rs index b832fadf..035983ff 100644 --- a/crates/ctx-history-store/src/lib.rs +++ b/crates/ctx-history-store/src/lib.rs @@ -93,7 +93,7 @@ pub enum StoreError { pub type Result = std::result::Result; -const SCHEMA_VERSION: i64 = 14; +const SCHEMA_VERSION: i64 = 15; const BUSY_TIMEOUT: Duration = Duration::from_millis(30_000); const OBJECTS_DIR: &str = "objects"; const SPOOL_DIR: &str = "spool"; @@ -471,7 +471,7 @@ const CREATE_TABLES_SQL: &str = r#" CREATE TABLE IF NOT EXISTS capture_sources ( id TEXT PRIMARY KEY NOT NULL, kind TEXT NOT NULL CHECK (kind IN ('provider_import', 'provider_hook', 'direct_cli', 'manual')), - provider TEXT NOT NULL CHECK (provider IN ('codex', 'claude', 'pi', 'opencode', 'antigravity', 'gemini', 'cursor', 'copilot_cli', 'factory_ai_droid', 'shell', 'git', 'jj', 'gh', 'unknown')), + provider TEXT NOT NULL CHECK (provider IN ('codex', 'claude', 'pi', 'opencode', 'antigravity', 'gemini', 'cursor', 'copilot_cli', 'factory_ai_droid', 'openclaw', 'hermes', 'nanoclaw', 'astrbot', 'shell', 'git', 'jj', 'gh', 'unknown')), machine_id TEXT NOT NULL, process_id INTEGER, cwd TEXT, @@ -488,7 +488,7 @@ CREATE TABLE IF NOT EXISTS capture_sources ( CREATE TABLE IF NOT EXISTS catalog_sessions ( source_path TEXT PRIMARY KEY NOT NULL, - provider TEXT NOT NULL CHECK (provider IN ('codex', 'claude', 'pi', 'opencode', 'antigravity', 'gemini', 'cursor', 'copilot_cli', 'factory_ai_droid', 'shell', 'git', 'jj', 'gh', 'unknown')), + provider TEXT NOT NULL CHECK (provider IN ('codex', 'claude', 'pi', 'opencode', 'antigravity', 'gemini', 'cursor', 'copilot_cli', 'factory_ai_droid', 'openclaw', 'hermes', 'nanoclaw', 'astrbot', 'shell', 'git', 'jj', 'gh', 'unknown')), source_format TEXT NOT NULL, source_root TEXT NOT NULL, external_session_id TEXT, @@ -517,7 +517,7 @@ CREATE TABLE IF NOT EXISTS catalog_sessions ( ); CREATE TABLE IF NOT EXISTS source_import_files ( - provider TEXT NOT NULL CHECK (provider IN ('codex', 'claude', 'pi', 'opencode', 'antigravity', 'gemini', 'cursor', 'copilot_cli', 'factory_ai_droid', 'shell', 'git', 'jj', 'gh', 'unknown')), + provider TEXT NOT NULL CHECK (provider IN ('codex', 'claude', 'pi', 'opencode', 'antigravity', 'gemini', 'cursor', 'copilot_cli', 'factory_ai_droid', 'openclaw', 'hermes', 'nanoclaw', 'astrbot', 'shell', 'git', 'jj', 'gh', 'unknown')), source_format TEXT NOT NULL, source_root TEXT NOT NULL, source_path TEXT NOT NULL, @@ -1385,6 +1385,9 @@ impl Store { if user_version < 14 { migrate_to_v14(&self.conn)?; } + if user_version < 15 { + migrate_to_v15(&self.conn)?; + } create_fts_tables_if_supported(&self.conn)?; Ok(()) } @@ -4875,6 +4878,43 @@ fn migrate_to_v14(conn: &Connection) -> Result<()> { } } +fn migrate_to_v15(conn: &Connection) -> Result<()> { + let foreign_keys_enabled: i64 = conn.query_row("PRAGMA foreign_keys", [], |row| row.get(0))?; + conn.execute_batch("PRAGMA foreign_keys = OFF; BEGIN IMMEDIATE;")?; + let migration = (|| -> Result<()> { + conn.execute_batch(CREATE_TABLES_SQL)?; + if stable_sql_views_exist(conn)? { + drop_stable_sql_views(conn)?; + } + rebuild_capture_sources_provider_check(conn)?; + rebuild_catalog_sessions_provider_check(conn)?; + rebuild_source_import_files_provider_check(conn)?; + conn.execute_batch(INDEXES_SQL)?; + create_stable_sql_views(conn)?; + conn.execute_batch("PRAGMA user_version = 15;")?; + Ok(()) + })(); + + match migration { + Ok(()) => { + conn.execute_batch("COMMIT;")?; + if foreign_keys_enabled != 0 { + conn.execute_batch("PRAGMA foreign_keys = ON;")?; + } + Ok(()) + } + Err(err) => { + if let Err(rollback_err) = conn.execute_batch("ROLLBACK;") { + return Err(StoreError::Sql(rollback_err)); + } + if foreign_keys_enabled != 0 { + conn.execute_batch("PRAGMA foreign_keys = ON;")?; + } + Err(err) + } + } +} + fn create_stable_sql_views(conn: &Connection) -> Result<()> { conn.execute_batch(STABLE_SQL_VIEWS_SQL)?; Ok(()) @@ -5041,7 +5081,7 @@ fn rebuild_capture_sources_provider_check(conn: &Connection) -> Result<()> { CREATE TABLE capture_sources_new ( id TEXT PRIMARY KEY NOT NULL, kind TEXT NOT NULL CHECK (kind IN ('provider_import', 'provider_hook', 'direct_cli', 'manual')), - provider TEXT NOT NULL CHECK (provider IN ('codex', 'claude', 'pi', 'opencode', 'antigravity', 'gemini', 'cursor', 'copilot_cli', 'factory_ai_droid', 'shell', 'git', 'jj', 'gh', 'unknown')), + provider TEXT NOT NULL CHECK (provider IN ('codex', 'claude', 'pi', 'opencode', 'antigravity', 'gemini', 'cursor', 'copilot_cli', 'factory_ai_droid', 'openclaw', 'hermes', 'nanoclaw', 'astrbot', 'shell', 'git', 'jj', 'gh', 'unknown')), machine_id TEXT NOT NULL, process_id INTEGER, cwd TEXT, @@ -5089,7 +5129,7 @@ fn rebuild_catalog_sessions_provider_check(conn: &Connection) -> Result<()> { DROP TABLE IF EXISTS catalog_sessions_new; CREATE TABLE catalog_sessions_new ( source_path TEXT PRIMARY KEY NOT NULL, - provider TEXT NOT NULL CHECK (provider IN ('codex', 'claude', 'pi', 'opencode', 'antigravity', 'gemini', 'cursor', 'copilot_cli', 'factory_ai_droid', 'shell', 'git', 'jj', 'gh', 'unknown')), + provider TEXT NOT NULL CHECK (provider IN ('codex', 'claude', 'pi', 'opencode', 'antigravity', 'gemini', 'cursor', 'copilot_cli', 'factory_ai_droid', 'openclaw', 'hermes', 'nanoclaw', 'astrbot', 'shell', 'git', 'jj', 'gh', 'unknown')), source_format TEXT NOT NULL, source_root TEXT NOT NULL, external_session_id TEXT, @@ -5130,6 +5170,50 @@ fn rebuild_catalog_sessions_provider_check(conn: &Connection) -> Result<()> { Ok(()) } +fn rebuild_source_import_files_provider_check(conn: &Connection) -> Result<()> { + if !table_exists(conn, "source_import_files")? { + conn.execute_batch(CREATE_TABLES_SQL)?; + return Ok(()); + } + + let recreate_views = stable_sql_views_exist(conn)?; + if recreate_views { + drop_stable_sql_views(conn)?; + } + conn.execute_batch( + r#" + DROP TABLE IF EXISTS source_import_files_new; + CREATE TABLE source_import_files_new ( + provider TEXT NOT NULL CHECK (provider IN ('codex', 'claude', 'pi', 'opencode', 'antigravity', 'gemini', 'cursor', 'copilot_cli', 'factory_ai_droid', 'openclaw', 'hermes', 'nanoclaw', 'astrbot', 'shell', 'git', 'jj', 'gh', 'unknown')), + source_format TEXT NOT NULL, + source_root TEXT NOT NULL, + source_path TEXT NOT NULL, + file_size_bytes INTEGER NOT NULL, + file_modified_at_ms INTEGER NOT NULL, + observed_at_ms INTEGER NOT NULL, + is_stale INTEGER NOT NULL DEFAULT 0, + indexed_at_ms INTEGER, + indexed_file_size_bytes INTEGER, + indexed_file_modified_at_ms INTEGER, + indexed_status TEXT NOT NULL DEFAULT 'pending' CHECK (indexed_status IN ('pending', 'indexed', 'failed')), + indexed_error TEXT, + metadata_json TEXT NOT NULL DEFAULT '{}', + PRIMARY KEY (provider, source_root, source_path) + ); + INSERT INTO source_import_files_new + (provider, source_format, source_root, source_path, file_size_bytes, file_modified_at_ms, observed_at_ms, is_stale, indexed_at_ms, indexed_file_size_bytes, indexed_file_modified_at_ms, indexed_status, indexed_error, metadata_json) + SELECT provider, source_format, source_root, source_path, file_size_bytes, file_modified_at_ms, observed_at_ms, is_stale, indexed_at_ms, indexed_file_size_bytes, indexed_file_modified_at_ms, indexed_status, indexed_error, metadata_json + FROM source_import_files; + DROP TABLE source_import_files; + ALTER TABLE source_import_files_new RENAME TO source_import_files; + "#, + )?; + if recreate_views { + create_stable_sql_views(conn)?; + } + Ok(()) +} + fn create_fts_tables_if_supported(conn: &Connection) -> Result<()> { match conn.execute_batch(FTS_TABLES_SQL) { Ok(()) => Ok(()), @@ -8591,4 +8675,92 @@ mod catalog_tests { assert_eq!(source_count, 2); assert_eq!(catalog_count, 2); } + + #[test] + fn schema_v15_rebuilds_provider_checks_with_referenced_sources_and_indexes() { + let temp = tempdir(); + let path = temp.path().join("work.sqlite"); + let source_id = new_id(); + let session_id; + let event_id; + { + let store = Store::open(&path).unwrap(); + let source = CaptureSource { + id: source_id, + descriptor: CaptureSourceDescriptor { + kind: ctx_history_core::CaptureSourceKind::ProviderImport, + provider: CaptureProvider::Codex, + machine_id: "test-machine".to_owned(), + process_id: None, + cwd: Some("/repo".to_owned()), + raw_source_path: Some("/home/user/.codex/sessions/session.jsonl".to_owned()), + external_session_id: Some("codex-session-1".to_owned()), + }, + started_at: fixed_time(), + ended_at: None, + sync: sync_metadata(), + }; + store.upsert_capture_source(&source).unwrap(); + + let mut session = imported_session("codex-session-1"); + session.capture_source_id = Some(source_id); + session_id = session.id; + store.upsert_session(&session).unwrap(); + + let event = Event { + id: new_id(), + seq: 0, + history_record_id: None, + session_id: Some(session_id), + run_id: None, + event_type: EventType::Message, + role: Some(EventRole::User), + occurred_at: fixed_time(), + capture_source_id: Some(source_id), + payload: serde_json::json!({"text": "migration source reference"}), + payload_blob_id: None, + dedupe_key: None, + redaction_state: RedactionState::SafePreview, + sync: sync_metadata(), + }; + event_id = event.id; + store.upsert_event(&event).unwrap(); + store + .conn + .execute_batch("PRAGMA user_version = 14;") + .unwrap(); + } + + let store = Store::open(&path).unwrap(); + let version: i64 = store + .conn + .query_row("PRAGMA user_version", [], |row| row.get(0)) + .unwrap(); + assert_eq!(version, SCHEMA_VERSION); + let source_refs: i64 = store + .conn + .query_row( + "SELECT COUNT(*) FROM sessions s JOIN events e ON e.session_id = s.id \ + WHERE s.id = ?1 AND e.id = ?2 AND s.capture_source_id = ?3 AND e.capture_source_id = ?3", + params![session_id.to_string(), event_id.to_string(), source_id.to_string()], + |row| row.get(0), + ) + .unwrap(); + assert_eq!(source_refs, 1); + for index in [ + "idx_capture_sources_external_session_id", + "idx_catalog_sessions_provider_source_root_import", + "idx_source_import_files_provider_source_root_import", + ] { + let exists: i64 = store + .conn + .query_row( + "SELECT COUNT(*) FROM sqlite_master WHERE type = 'index' AND name = ?1", + [index], + |row| row.get(0), + ) + .unwrap(); + assert_eq!(exists, 1, "missing rebuilt index {index}"); + } + } } diff --git a/docs/cli-reference.md b/docs/cli-reference.md index b29b9a57..b0085922 100644 --- a/docs/cli-reference.md +++ b/docs/cli-reference.md @@ -28,8 +28,8 @@ ctx doctor --json - `setup` creates the data root, opens or creates `work.sqlite`, writes `config.toml` when needed, discovers known provider history locations, - catalogs Codex sessions, imports all discovered importable sources, optimizes - the local search index, and prints next steps. + catalogs Codex sessions, imports all discovered auto-importable sources, + optimizes the local search index, and prints next steps. - `setup --catalog-only` stops after discovery/cataloging. It is useful for fast inventory or troubleshooting, but it does not make history searchable. - `status` reports the ctx root, database path, config path, indexed item @@ -57,8 +57,10 @@ machine. Current rows include: - Codex session trees at `~/.codex/sessions`; - Codex prompt history at `~/.codex/history.jsonl`; - Pi session JSONL at `~/.pi/sessions.jsonl`; -- native rows for supported Antigravity, Claude, OpenCode, Gemini, Cursor, - Copilot CLI, and Factory AI Droid local history locations. +- native rows for supported Antigravity, Claude, OpenCode, OpenClaw, Hermes, + Gemini, Cursor, Copilot CLI, and Factory AI Droid local history locations; +- preview rows for NanoClaw project roots and AstrBot SQLite history when those + paths are discoverable. Each JSON row includes `provider`, `path`, `exists`, `source_format`, `status`, `import_support`, `native_import`, `importable`, `raw_retention`, and any @@ -75,6 +77,10 @@ ctx import --provider pi ctx import --provider antigravity ctx import --provider claude ctx import --provider opencode +ctx import --provider openclaw +ctx import --provider hermes +ctx import --provider nanoclaw --path /path/to/nanoclaw-project +ctx import --provider astrbot --path /path/to/data/data_v4.db ctx import --provider gemini ctx import --provider cursor ctx import --provider copilot-cli @@ -95,11 +101,17 @@ citations, and import totals to SQLite. Import selection rules: -- with no arguments or with `--all`, import all discovered sources that exist; +- with no arguments or with `--all`, import all discovered auto-importable + sources that exist; - with `--provider`, import discovered sources for that provider; - with `--path`, import exactly that path; - with `--path` and no provider, parse the path as Codex format. +Preview providers such as NanoClaw and AstrBot are not included in `--all` or +pre-search refresh. Import them explicitly with `--provider` when discovery +finds the desired source, or add `--path` to target a specific source, then +search the existing index. + The current `--resume` flag is an idempotent-rescan mode marker. JSON reports `resume: true` and `resume_mode: "idempotent_rescan"`, but provider-native cursor resume is not a universal contract yet. @@ -166,7 +178,7 @@ results without a foreground catch-up scan; use `--refresh strict` or `ctx import --all` when you need a full catch-up before querying. Use `--refresh off` to search the existing index without refreshing, or `--refresh strict` to fail when the pre-search refresh cannot run or import -successfully. Search-only sources without native import support are searched +successfully. Preview native sources such as NanoClaw and AstrBot are searched from the existing index until they are explicitly imported through a supported path. The query argument is optional so file or metadata filters can drive a search. Default results are session-diverse: ctx @@ -201,7 +213,7 @@ optimized for agent reading; use `--verbose` for expanded text diagnostics. Filters: -- `--provider codex|pi|claude|opencode|antigravity|gemini|cursor|copilot-cli|factory-ai-droid`; +- `--provider codex|pi|claude|opencode|openclaw|hermes|nanoclaw|astrbot|antigravity|gemini|cursor|copilot-cli|factory-ai-droid`; - `--workspace `, substring match over stored workspace, cwd, source path, or repository-name text; - `--since d`, for example `2026-06-01T00:00:00Z` or `30d`; @@ -219,8 +231,9 @@ Filters: - `--include-current-session`. CLI provider filters use kebab-case names. JSON output and stable SQL views use -provider IDs in ctx output; multiword IDs may be snake_case, such as `copilot_cli` or -`factory_ai_droid`. +provider IDs in ctx output; multiword IDs may be snake_case, such as +`copilot_cli` or `factory_ai_droid`, while compact IDs such as `openclaw`, +`nanoclaw`, and `astrbot` stay compact. `search` reads discovered native provider files for pre-search refresh plus SQLite, and may write newly discovered native provider history into the local diff --git a/docs/first-10-minutes.md b/docs/first-10-minutes.md index 0eddca2d..daf17ea1 100644 --- a/docs/first-10-minutes.md +++ b/docs/first-10-minutes.md @@ -26,8 +26,9 @@ ctx status --json ``` `ctx setup` creates local storage, discovers supported provider history, -catalogs Codex sessions, imports discovered sources, and optimizes the local -search index. The default root is `~/.ctx`. Use a temporary root for trials: +catalogs Codex sessions, imports discovered auto-importable sources, and +optimizes the local search index. The default root is `~/.ctx`. Use a temporary +root for trials: ```bash ctx --data-root /tmp/ctx-first-10 setup @@ -41,11 +42,13 @@ ctx sources --json ``` Expect rows for supported local import providers such as Codex, Pi, -Antigravity, Claude, OpenCode, Gemini, Cursor, Copilot CLI, and Factory AI -Droid. A row with `exists: false` means ctx knows the default path but did not -find local history there. A JSON row with `status: "empty"` means the path -exists but no provider-specific transcript files were found. A row with -`status: "unknown"` means the bounded transcript probe hit its scan budget. +Antigravity, Claude, OpenCode, OpenClaw, Hermes, Gemini, Cursor, Copilot CLI, +and Factory AI Droid. NanoClaw and AstrBot can appear as preview rows when ctx +can discover their local project or SQLite paths. A row with `exists: false` +means ctx knows the default path but did not find local history there. A JSON +row with `status: "empty"` means the path exists but no provider-specific +transcript files were found. A row with `status: "unknown"` means the bounded +transcript probe hit its scan budget. ## 4. Re-Run Or Target Imports @@ -53,15 +56,24 @@ exists but no provider-specific transcript files were found. A row with ctx import --all ``` -Setup already imports discovered sources. Use `ctx import` when you want to -repair, re-run, resume, or pass an explicit path: +Setup already imports discovered auto-importable sources. Use `ctx import` when +you want to repair, re-run, resume, or pass an explicit path: ```bash ctx import --provider codex --path ~/.codex/sessions ctx import --provider pi --path ~/.pi/sessions.jsonl ctx import --provider cursor --path ~/.cursor/projects +ctx import --provider hermes --path ~/.hermes/state.db +ctx import --provider nanoclaw --path /path/to/nanoclaw-project +ctx import --provider astrbot --path /path/to/data/data_v4.db ``` +Preview providers such as NanoClaw and AstrBot are explicit-import only. Use +`ctx import --provider nanoclaw` or `ctx import --provider astrbot` when +discovery finds the desired source, or add `--path` to target a specific source. +They are not included in `ctx import --all` or the default pre-search refresh +until their storage contracts are promoted. + After upgrading from an older ctx version, the first refresh or import can re-read previously indexed provider transcripts once so the local index includes current touched-file metadata and unredacted local transcript text. diff --git a/docs/limitations.md b/docs/limitations.md index 7a637dcb..e7726f19 100644 --- a/docs/limitations.md +++ b/docs/limitations.md @@ -9,9 +9,14 @@ shipped. - Codex local import is supported for documented local JSONL sources. - Pi local import is supported only when a matching local `sessions.jsonl` file exists. -- Antigravity, Claude, OpenCode, Gemini, Cursor, Copilot CLI, and Factory AI - Droid local import is supported only when their documented local history - paths exist and match the supported native formats in the provider matrix. +- Antigravity, Claude, OpenCode, OpenClaw, Hermes, Gemini, Cursor, Copilot CLI, + and Factory AI Droid local import is supported only when their documented + local history paths exist and match the supported native formats in the + provider matrix. +- NanoClaw and AstrBot local import are preview/manual-path support. They are + not included in `ctx import --all` or pre-search refresh, and AstrBot imports + local LLM context plus available platform history rather than guaranteeing a + complete raw IM transcript. - Unknown provider formats should not be parsed optimistically. ## Import Semantics diff --git a/docs/provider-support-matrix.json b/docs/provider-support-matrix.json index 1e2c928c..a11a5ff8 100644 --- a/docs/provider-support-matrix.json +++ b/docs/provider-support-matrix.json @@ -215,6 +215,224 @@ "crates/ctx-history-capture/src/lib.rs" ] }, + { + "id": "openclaw", + "display_name": "OpenClaw", + "priority": "p1", + "status": "local_import_when_supported", + "capture_provider": "openclaw", + "implemented_paths": [ + { + "kind": "native_import", + "source_format": "openclaw_session_jsonl_tree", + "fidelity": "partial", + "proof": [ + "ctx sources", + "ctx import --provider openclaw" + ], + "notes": [ + "Reads OpenClaw session JSONL transcripts under OPENCLAW_STATE_DIR, ~/.openclaw, and verified legacy ~/.clawdbot or ~/.moltbot homes.", + "This is beta because upstream exposes newer session helpers and warns plugins not to depend on legacy sessions.json shape." + ] + } + ], + "history_locations": [ + "OPENCLAW_STATE_DIR/agents/*/sessions/*.jsonl", + "~/.openclaw/agents/*/sessions/*.jsonl", + "~/.clawdbot/agents/*/sessions/*.jsonl", + "~/.moltbot/agents/*/sessions/*.jsonl" + ], + "imports_existing_history": true, + "captures_new_runs_passively": false, + "child_sessions_supported": false, + "fidelity": { + "user_prompts": true, + "assistant_messages": true, + "tool_calls": false, + "tool_output": true, + "command_output": false, + "files_touched": false, + "artifacts": false, + "model_identity": true, + "costs": false, + "token_usage": false, + "parent_child_session_edges": false + }, + "redaction_notes": [ + "Imports are local/private and preserve source paths for citations." + ], + "blockers": [ + "Full GA needs confirmation that the local transcript contract remains stable across newer OpenClaw session APIs." + ], + "public_docs": "docs/providers.md", + "fixture_paths": [], + "tests": [ + "crates/ctx-cli/tests/cli.rs", + "crates/ctx-history-capture/src/lib.rs" + ] + }, + { + "id": "hermes", + "display_name": "Hermes Agent", + "priority": "p1", + "status": "local_import_when_supported", + "capture_provider": "hermes", + "implemented_paths": [ + { + "kind": "native_import", + "source_format": "hermes_state_sqlite", + "fidelity": "imported", + "proof": [ + "ctx sources", + "ctx import --provider hermes" + ], + "notes": [ + "Reads Hermes Agent SQLite history from HERMES_HOME/state.db or ~/.hermes/state.db using a read-only SQLite connection.", + "Preserves sessions/messages rows, parent session IDs, model/config metadata, tool call metadata, token fields, and billing fields when present." + ] + } + ], + "history_locations": [ + "HERMES_HOME/state.db", + "~/.hermes/state.db" + ], + "imports_existing_history": true, + "captures_new_runs_passively": false, + "child_sessions_supported": true, + "fidelity": { + "user_prompts": true, + "assistant_messages": true, + "tool_calls": true, + "tool_output": true, + "command_output": false, + "files_touched": false, + "artifacts": false, + "model_identity": true, + "costs": true, + "token_usage": true, + "parent_child_session_edges": true + }, + "redaction_notes": [ + "Reads the provider SQLite database read-only; imported text remains in the local ctx index." + ], + "blockers": [], + "public_docs": "docs/providers.md", + "fixture_paths": [], + "tests": [ + "crates/ctx-cli/tests/cli.rs", + "crates/ctx-history-capture/src/lib.rs" + ] + }, + { + "id": "nanoclaw", + "display_name": "NanoClaw", + "priority": "p1", + "status": "local_import_when_supported", + "capture_provider": "nanoclaw", + "implemented_paths": [ + { + "kind": "native_import", + "source_format": "nanoclaw_project", + "fidelity": "partial", + "proof": [ + "ctx sources", + "ctx import --provider nanoclaw --path " + ], + "notes": [ + "Preview importer for a NanoClaw project root containing data/v2.db and data/v2-sessions/*/*/{inbound.db,outbound.db}.", + "Project roots are discovered from the current working directory and ancestors, but preview sources are not imported by ctx import --all or pre-search refresh." + ] + } + ], + "history_locations": [ + "/data/v2.db", + "/data/v2-sessions/*/*/inbound.db", + "/data/v2-sessions/*/*/outbound.db" + ], + "imports_existing_history": true, + "captures_new_runs_passively": false, + "child_sessions_supported": false, + "fidelity": { + "user_prompts": true, + "assistant_messages": true, + "tool_calls": false, + "tool_output": false, + "command_output": false, + "files_touched": false, + "artifacts": false, + "model_identity": false, + "costs": false, + "token_usage": false, + "parent_child_session_edges": false + }, + "redaction_notes": [ + "Manual preview imports may include IM/chat channel identifiers stored in NanoClaw project databases." + ], + "blockers": [ + "Automatic refresh is intentionally disabled until more real-world NanoClaw project layout drift is validated." + ], + "public_docs": "docs/providers.md", + "fixture_paths": [], + "tests": [ + "crates/ctx-cli/tests/cli.rs", + "crates/ctx-history-capture/src/lib.rs" + ] + }, + { + "id": "astrbot", + "display_name": "AstrBot", + "priority": "p1", + "status": "local_import_when_supported", + "capture_provider": "astrbot", + "implemented_paths": [ + { + "kind": "native_import", + "source_format": "astrbot_data_v4_sqlite", + "fidelity": "partial", + "proof": [ + "ctx sources", + "ctx import --provider astrbot --path " + ], + "notes": [ + "Preview importer for AstrBot data/data_v4.db conversation context plus available platform_message_history rows.", + "This is not a full all-channel IM transcript guarantee; connector/plugin histories may live outside the supported tables." + ] + } + ], + "history_locations": [ + "ASTRBOT_ROOT/data/data_v4.db", + "~/.astrbot/data/data_v4.db", + "/data/data_v4.db" + ], + "imports_existing_history": true, + "captures_new_runs_passively": false, + "child_sessions_supported": false, + "fidelity": { + "user_prompts": true, + "assistant_messages": true, + "tool_calls": false, + "tool_output": false, + "command_output": false, + "files_touched": false, + "artifacts": false, + "model_identity": false, + "costs": false, + "token_usage": true, + "parent_child_session_edges": false + }, + "redaction_notes": [ + "Manual preview imports may include IM platform IDs, user IDs, sender names, and local chat text." + ], + "blockers": [ + "Full GA needs stronger upstream guarantees for complete per-platform raw message retention." + ], + "public_docs": "docs/providers.md", + "fixture_paths": [], + "tests": [ + "crates/ctx-cli/tests/cli.rs", + "crates/ctx-history-capture/src/lib.rs" + ] + }, { "id": "antigravity_cli", "display_name": "Antigravity", diff --git a/docs/provider-support.md b/docs/provider-support.md index ecce897a..5f9ba1b3 100644 --- a/docs/provider-support.md +++ b/docs/provider-support.md @@ -26,12 +26,22 @@ is: | Pi | `local_import_when_supported` | `~/.pi/sessions.jsonl` or an explicit Pi JSONL path. | Static local-history fixture smoke. | | Claude | `local_import_when_supported` | `~/.claude/projects` or an explicit Claude projects JSONL tree. | Static local-history fixture smoke. | | OpenCode | `local_import_when_supported` | `~/.local/share/opencode/opencode.db` or an explicit OpenCode SQLite DB. | Static local-history fixture smoke. | +| OpenClaw | `local_import_when_supported` | `OPENCLAW_STATE_DIR`, `~/.openclaw`, legacy `~/.clawdbot`/`~/.moltbot`, or an explicit OpenClaw state tree. | Static local-history fixture smoke; beta storage-contract notes in the matrix. | +| Hermes Agent | `local_import_when_supported` | `HERMES_HOME/state.db`, `~/.hermes/state.db`, or an explicit Hermes SQLite DB. | Static local-history fixture smoke. | +| NanoClaw | `local_import_when_supported` | Preview/manual import from a NanoClaw project root or `data/v2.db`; cwd/ancestor discovery only. | Static local-history fixture smoke; excluded from `ctx import --all` and pre-search refresh until promoted. | +| AstrBot | `local_import_when_supported` | Preview/manual import from `ASTRBOT_ROOT/data/data_v4.db`, `~/.astrbot/data/data_v4.db`, cwd/ancestor project DBs, or an explicit DB path. | Static local-history fixture smoke; imports LLM context plus available platform history, not guaranteed complete IM transcripts. | | Antigravity | `local_import_when_supported` | Antigravity `transcript_full.jsonl` or `transcript.jsonl` files under `~/.gemini/antigravity-cli/brain`, or an explicit Antigravity transcript JSONL tree. | Static local-history fixture smoke. | | Gemini | `local_import_when_supported` | Gemini chat JSONL files under `~/.gemini/tmp/**/chats`, or an explicit Gemini CLI history tree. | Static local-history fixture smoke. | | Cursor | `local_import_when_supported` | Cursor agent transcript JSONL files under `~/.cursor/projects/**/agent-transcripts`, or an explicit Cursor agent transcript path. | Static local-history fixture smoke. | | Copilot CLI | `local_import_when_supported` | Copilot CLI `events.jsonl` files under `~/.copilot/session-state`, or an explicit Copilot CLI session-state tree. | Static local-history fixture smoke. | | Factory AI Droid | `local_import_when_supported` | `~/.factory/sessions` or an explicit Factory AI Droid sessions tree. | Static local-history fixture smoke. | +`ctx sources --json` uses `import_support: "preview"` and `native_import: +false` for preview sources such as NanoClaw and AstrBot. Those paths can be +imported explicitly with `ctx import --provider ...` when discovery finds them, +or with `ctx import --provider ... --path ...` for a specific path. They are not +swept up by `ctx import --all` or the default pre-search refresh. + Fidelity fields in the machine-readable matrix describe the default public CLI import behavior and normalized ctx storage fields. Supported adapters record normalized `files_touched` metadata when provider transcripts expose file paths diff --git a/docs/providers.md b/docs/providers.md index e5a8ecd5..7f554bb5 100644 --- a/docs/providers.md +++ b/docs/providers.md @@ -14,6 +14,15 @@ The current CLI imports local history for: supported JSONL format; - Claude Code project JSONL transcripts under `~/.claude/projects`; - OpenCode SQLite history under `~/.local/share/opencode/opencode.db`; +- OpenClaw session JSONL trees under `OPENCLAW_STATE_DIR`, `~/.openclaw`, + legacy `~/.clawdbot`, or legacy `~/.moltbot`; +- Hermes Agent SQLite history under `HERMES_HOME/state.db` or + `~/.hermes/state.db`; +- NanoClaw project history from a project root with `data/v2.db` and + `data/v2-sessions` when imported explicitly; +- AstrBot local SQLite history from `ASTRBOT_ROOT/data/data_v4.db`, + `~/.astrbot/data/data_v4.db`, or a project `data/data_v4.db` when imported + explicitly; - Antigravity transcript JSONL mirrors under `~/.gemini/antigravity-cli/brain/*/.system_generated/logs/transcript_full.jsonl` or `transcript.jsonl`; @@ -31,13 +40,20 @@ ctx sources ctx sources --json ``` -CLI provider flags use names such as `copilot-cli` and `factory-ai-droid`. +CLI provider flags use names such as `openclaw`, `hermes`, `nanoclaw`, +`astrbot`, `copilot-cli`, and `factory-ai-droid`. Structured JSON and stable SQL views use provider IDs in ctx output; multiword IDs may be -snake_case, such as `copilot_cli` or `factory_ai_droid`. +snake_case, such as `copilot_cli` or `factory_ai_droid`, while compact native +IDs such as `openclaw`, `nanoclaw`, and `astrbot` stay compact. `ctx sources --json` reports each known provider source with `import_support` and `importable` fields. A native source is marked available/importable only -when provider-specific transcript files exist. Sources with +when provider-specific transcript files exist. Sources with `import_support: +"preview"` are explicit-import preview paths: use `ctx import --provider +nanoclaw` or `ctx import --provider astrbot` when discovery finds the desired +source, or add `--path` to target a specific source before searching it. They +are intentionally excluded from `ctx import --all` and pre-search refresh until +promoted. Sources with `status: "unknown"` hit the bounded transcript probe budget before proving history exists, and sources with `import_support: "unsupported"` are detections or blockers, not importable native history. diff --git a/docs/search.md b/docs/search.md index 95666834..aecef853 100644 --- a/docs/search.md +++ b/docs/search.md @@ -54,7 +54,7 @@ that support it. Search filters narrow both human output and JSON: -- `--provider codex|pi|claude|opencode|antigravity|gemini|cursor|copilot-cli|factory-ai-droid`; +- `--provider codex|pi|claude|opencode|openclaw|hermes|nanoclaw|astrbot|antigravity|gemini|cursor|copilot-cli|factory-ai-droid`; - `--workspace `, substring match over stored workspace, cwd, source path, or repository-name text; - `--since d`; @@ -73,8 +73,9 @@ Search filters narrow both human output and JSON: - `--include-current-session`. CLI provider filters use the kebab-case names above. JSON output and stable SQL -views use provider IDs in ctx output; multiword provider IDs may be snake_case, such as -`copilot_cli` or `factory_ai_droid`. +views use provider IDs in ctx output; multiword provider IDs may be snake_case, +such as `copilot_cli` or `factory_ai_droid`, while compact IDs such as +`openclaw`, `nanoclaw`, and `astrbot` stay compact. `--since` accepts RFC 3339 timestamps such as `2026-06-01T00:00:00Z` or a day window such as `30d`. @@ -107,9 +108,9 @@ refresh fails. On large discovered sources or already-cataloged indexes, `auto` serves current results without a foreground catch-up scan; use `--refresh strict` or `ctx import --all` when you need a full catch-up before querying. `off` skips the pre-search refresh. `strict` fails the search if the -refresh cannot run or import successfully. Search-only sources without native -import support are searched from the existing index until they are explicitly -imported through a supported path. +refresh cannot run or import successfully. Preview native sources such as +NanoClaw and AstrBot are searched from the existing index until they are +explicitly imported through a supported path. Use `--refresh off` for a strictly read-only search over the existing ctx index. This avoids provider imports and avoids updating the ctx SQLite store.