From a0a41a31e51f5b36930720b76d20fff2c3e5a30a Mon Sep 17 00:00:00 2001 From: Arun Mahapatra Date: Tue, 14 Apr 2026 07:33:36 +0530 Subject: [PATCH 1/3] feat: add fetch tool --- Cargo.lock | 455 ++++++++++++++++++++++++++++++++++ Cargo.toml | 3 +- crates/arey/Cargo.toml | 1 + crates/arey/src/ext/mod.rs | 11 +- crates/core/Cargo.toml | 2 +- crates/mcp/Cargo.toml | 11 +- crates/tools-fetch/Cargo.toml | 23 ++ crates/tools-fetch/src/lib.rs | 372 +++++++++++++++++++++++++++ 8 files changed, 865 insertions(+), 13 deletions(-) create mode 100644 crates/tools-fetch/Cargo.toml create mode 100644 crates/tools-fetch/src/lib.rs diff --git a/Cargo.lock b/Cargo.lock index 3032a46..88f0ff6 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -89,6 +89,7 @@ dependencies = [ "anyhow", "arey-core", "arey-mcp", + "arey-tools-fetch", "arey-tools-search", "async-stream", "async-trait", @@ -171,6 +172,22 @@ dependencies = [ "yare", ] +[[package]] +name = "arey-tools-fetch" +version = "0.0.7" +dependencies = [ + "anyhow", + "arey-core", + "async-trait", + "readabilityrs", + "reqwest", + "serde", + "serde_json", + "tokio", + "tracing", + "wiremock", +] + [[package]] name = "arey-tools-search" version = "0.0.7" @@ -527,6 +544,21 @@ dependencies = [ "libc", ] +[[package]] +name = "crc" +version = "3.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5eb8a2a1cd12ab0d987a5d5e825195d372001a4094a0376319d5a0ad71c1ba0d" +dependencies = [ + "crc-catalog", +] + +[[package]] +name = "crc-catalog" +version = "2.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "19d374276b40fb8bbdee95aef7c7fa6b5316ec764510eb64b8dd0e2ed0d7e7f5" + [[package]] name = "crc32fast" version = "1.5.0" @@ -536,6 +568,29 @@ dependencies = [ "cfg-if", ] +[[package]] +name = "cssparser" +version = "0.36.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dae61cf9c0abb83bd659dab65b7e4e38d8236824c85f0f804f173567bda257d2" +dependencies = [ + "cssparser-macros", + "dtoa-short", + "itoa", + "phf", + "smallvec", +] + +[[package]] +name = "cssparser-macros" +version = "0.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "13b588ba4ac1a99f7f2964d24b3d896ddc6bf847ee3855dbd4366f058cfcd331" +dependencies = [ + "quote", + "syn", +] + [[package]] name = "darling" version = "0.20.11" @@ -663,6 +718,27 @@ dependencies = [ "syn", ] +[[package]] +name = "derive_more" +version = "2.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d751e9e49156b02b44f9c1815bcb94b984cdcc4396ecc32521c739452808b134" +dependencies = [ + "derive_more-impl", +] + +[[package]] +name = "derive_more-impl" +version = "2.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "799a97264921d8623a957f6c3b9011f3b5492f557bbb7a5a19b7fa6d06ba8dcb" +dependencies = [ + "proc-macro2", + "quote", + "rustc_version", + "syn", +] + [[package]] name = "dirs" version = "6.0.0" @@ -695,12 +771,33 @@ dependencies = [ "syn", ] +[[package]] +name = "dtoa" +version = "1.0.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4c3cf4824e2d5f025c7b531afcb2325364084a16806f6d47fbc1f5fbd9960590" + +[[package]] +name = "dtoa-short" +version = "0.3.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cd1511a7b6a56299bd043a9c167a6d2bfb37bf84a6dfceaba651168adfb43c87" +dependencies = [ + "dtoa", +] + [[package]] name = "dyn-clone" version = "1.0.20" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d0881ea181b1df73ff77ffaaf9c7544ecc11e82fba9b5f27b262a3c73a332555" +[[package]] +name = "ego-tree" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b2972feb8dffe7bc8c5463b1dacda1b0dfbed3710e50f977d965429692d74cd8" + [[package]] name = "either" version = "1.15.0" @@ -865,6 +962,16 @@ dependencies = [ "libc", ] +[[package]] +name = "futf" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df420e2e84819663797d1ec6544b13c5be84629e7bb00dc960d6917db2987843" +dependencies = [ + "mac", + "new_debug_unreachable", +] + [[package]] name = "futures" version = "0.3.32" @@ -959,6 +1066,15 @@ dependencies = [ "slab", ] +[[package]] +name = "getopts" +version = "0.2.24" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cfe4fbac503b8d1f88e6676011885f34b7174f46e59956bba534ba83abded4df" +dependencies = [ + "unicode-width", +] + [[package]] name = "getrandom" version = "0.2.17" @@ -1061,6 +1177,26 @@ dependencies = [ "windows-sys 0.61.2", ] +[[package]] +name = "html5ever" +version = "0.36.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6452c4751a24e1b99c3260d505eaeee76a050573e61f30ac2c924ddc7236f01e" +dependencies = [ + "log", + "markup5ever 0.36.1", +] + +[[package]] +name = "html5ever" +version = "0.38.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1054432bae2f14e0061e33d23402fbaa67a921d319d56adc6bcf887ddad1cbc2" +dependencies = [ + "log", + "markup5ever 0.38.0", +] + [[package]] name = "http" version = "1.4.0" @@ -1464,6 +1600,21 @@ dependencies = [ "libc", ] +[[package]] +name = "kuchikikiki" +version = "0.9.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b73885c6a3cefdf7a1db0327cefbe4b9b72cac94cae4b19ede4fa492d8af02a0" +dependencies = [ + "bitflags 2.11.0", + "crc", + "cssparser", + "html5ever 0.38.0", + "indexmap", + "precomputed-hash", + "selectors 0.35.0", +] + [[package]] name = "lazy_static" version = "1.5.0" @@ -1541,6 +1692,15 @@ dependencies = [ "walkdir", ] +[[package]] +name = "lock_api" +version = "0.4.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "224399e74b87b5f3557511d98dff8b14089b3dadafcab6bb93eab67d3aace965" +dependencies = [ + "scopeguard", +] + [[package]] name = "log" version = "0.4.29" @@ -1553,6 +1713,12 @@ version = "0.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "112b39cec0b298b6c1999fee3e31427f74f676e4cb9879ed1a121b43661a4154" +[[package]] +name = "mac" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c41e0c4fef86961ac6d6f8a82609f55f31b05e4fce149ac5710e439df7619ba4" + [[package]] name = "markdown" version = "1.0.0" @@ -1562,6 +1728,28 @@ dependencies = [ "unicode-id", ] +[[package]] +name = "markup5ever" +version = "0.36.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c3294c4d74d0742910f8c7b466f44dda9eb2d5742c1e430138df290a1e8451c" +dependencies = [ + "log", + "tendril 0.4.3", + "web_atoms", +] + +[[package]] +name = "markup5ever" +version = "0.38.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8983d30f2915feeaaab2d6babdd6bc7e9ed1a00b66b5e6d74df19aa9c0e91862" +dependencies = [ + "log", + "tendril 0.5.0", + "web_atoms", +] + [[package]] name = "matchers" version = "0.2.0" @@ -1648,6 +1836,12 @@ dependencies = [ "windows-sys 0.61.2", ] +[[package]] +name = "new_debug_unreachable" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "650eef8c711430f1a879fdd01d4745a7deea475becfb90269c06775983bbf086" + [[package]] name = "nibble_vec" version = "0.1.0" @@ -1795,6 +1989,29 @@ version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "04744f49eae99ab78e0d5c0b603ab218f515ea8cfe5a456d7629ad883a3b6e7d" +[[package]] +name = "parking_lot" +version = "0.12.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "93857453250e3077bd71ff98b6a65ea6621a19bb0f559a85248955ac12c45a1a" +dependencies = [ + "lock_api", + "parking_lot_core", +] + +[[package]] +name = "parking_lot_core" +version = "0.9.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2621685985a2ebf1c516881c026032ac7deafcda1a2c9b7850dc81e3dfcb64c1" +dependencies = [ + "cfg-if", + "libc", + "redox_syscall", + "smallvec", + "windows-link", +] + [[package]] name = "pastey" version = "0.2.1" @@ -1825,6 +2042,59 @@ version = "2.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9b4f627cb1b25917193a259e49bdad08f671f8d9708acfd5fe0a8c1455d87220" +[[package]] +name = "phf" +version = "0.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c1562dc717473dbaa4c1f85a36410e03c047b2e7df7f45ee938fbef64ae7fadf" +dependencies = [ + "phf_macros", + "phf_shared", + "serde", +] + +[[package]] +name = "phf_codegen" +version = "0.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "49aa7f9d80421bca176ca8dbfebe668cc7a2684708594ec9f3c0db0805d5d6e1" +dependencies = [ + "phf_generator", + "phf_shared", +] + +[[package]] +name = "phf_generator" +version = "0.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "135ace3a761e564ec88c03a77317a7c6b80bb7f7135ef2544dbe054243b89737" +dependencies = [ + "fastrand", + "phf_shared", +] + +[[package]] +name = "phf_macros" +version = "0.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "812f032b54b1e759ccd5f8b6677695d5268c588701effba24601f6932f8269ef" +dependencies = [ + "phf_generator", + "phf_shared", + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "phf_shared" +version = "0.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e57fef6bc5981e38c2ce2d63bfa546861309f875b8a75f092d1d54ae2d64f266" +dependencies = [ + "siphasher", +] + [[package]] name = "pin-project-lite" version = "0.2.17" @@ -1889,6 +2159,12 @@ dependencies = [ "zerocopy", ] +[[package]] +name = "precomputed-hash" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "925383efa346730478fb4838dbe9137d2a47675ad789c546d150a6e1dd4ab31c" + [[package]] name = "prettyplease" version = "0.2.37" @@ -2093,6 +2369,33 @@ version = "0.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0c8d0fd677905edcbeedbf2edb6494d676f0e98d54d5cf9bda0b061cb8fb8aba" +[[package]] +name = "readabilityrs" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d90c6e1dad698d9f3c80a8d91bc0efc8c2397cb5ca4bbffb9c0fb88a9a66e6b8" +dependencies = [ + "bitflags 2.11.0", + "kuchikikiki", + "once_cell", + "regex", + "scraper", + "serde", + "serde_json", + "thiserror 1.0.69", + "url", + "v_htmlescape", +] + +[[package]] +name = "redox_syscall" +version = "0.5.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ed2bf2547551a7053d6fdfafda3f938979645c44812fbfcda098faae3f1a362d" +dependencies = [ + "bitflags 2.11.0", +] + [[package]] name = "redox_users" version = "0.5.2" @@ -2269,6 +2572,15 @@ version = "2.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "94300abf3f1ae2e2b8ffb7b58043de3d399c73fa6f4b73826402a5c457614dbe" +[[package]] +name = "rustc_version" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cfcb3a22ef46e85b45de6ee7e79d063319ebb6594faafcf1c225ea92ab6e9b92" +dependencies = [ + "semver", +] + [[package]] name = "rustix" version = "1.1.4" @@ -2418,6 +2730,27 @@ dependencies = [ "syn", ] +[[package]] +name = "scopeguard" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" + +[[package]] +name = "scraper" +version = "0.25.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "93cecd86d6259499c844440546d02f55f3e17bd286e529e48d1f9f67e92315cb" +dependencies = [ + "cssparser", + "ego-tree", + "getopts", + "html5ever 0.36.1", + "precomputed-hash", + "selectors 0.33.0", + "tendril 0.4.3", +] + [[package]] name = "secrecy" version = "0.10.3" @@ -2451,6 +2784,44 @@ dependencies = [ "libc", ] +[[package]] +name = "selectors" +version = "0.33.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "feef350c36147532e1b79ea5c1f3791373e61cbd9a6a2615413b3807bb164fb7" +dependencies = [ + "bitflags 2.11.0", + "cssparser", + "derive_more", + "log", + "new_debug_unreachable", + "phf", + "phf_codegen", + "precomputed-hash", + "rustc-hash", + "servo_arc", + "smallvec", +] + +[[package]] +name = "selectors" +version = "0.35.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "93fdfed56cd634f04fe8b9ddf947ae3dc493483e819593d2ba17df9ad05db8b2" +dependencies = [ + "bitflags 2.11.0", + "cssparser", + "derive_more", + "log", + "new_debug_unreachable", + "phf", + "phf_codegen", + "precomputed-hash", + "rustc-hash", + "servo_arc", + "smallvec", +] + [[package]] name = "semver" version = "1.0.27" @@ -2536,6 +2907,15 @@ dependencies = [ "unsafe-libyaml", ] +[[package]] +name = "servo_arc" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "170fb83ab34de17dc69aa7c67482b22218ddb85da56546f9bd6b929e32a05930" +dependencies = [ + "stable_deref_trait", +] + [[package]] name = "sharded-slab" version = "0.1.7" @@ -2576,6 +2956,12 @@ version = "0.3.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "703d5c7ef118737c72f1af64ad2f6f8c5e1921f818cdcb97b8fe6fc69bf66214" +[[package]] +name = "siphasher" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b2aa850e253778c88a04c3d7323b043aeda9d3e30d5971937c1855769763678e" + [[package]] name = "slab" version = "0.4.12" @@ -2604,6 +2990,30 @@ version = "1.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6ce2be8dc25455e1f91df71bfa12ad37d7af1092ae736f3a6cd0e37bc7810596" +[[package]] +name = "string_cache" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a18596f8c785a729f2819c0f6a7eae6ebeebdfffbfe4214ae6b087f690e31901" +dependencies = [ + "new_debug_unreachable", + "parking_lot", + "phf_shared", + "precomputed-hash", +] + +[[package]] +name = "string_cache_codegen" +version = "0.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "585635e46db231059f76c5849798146164652513eb9e8ab2685939dd90f29b69" +dependencies = [ + "phf_generator", + "phf_shared", + "proc-macro2", + "quote", +] + [[package]] name = "strsim" version = "0.11.1" @@ -2680,6 +3090,27 @@ dependencies = [ "windows-sys 0.61.2", ] +[[package]] +name = "tendril" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d24a120c5fc464a3458240ee02c299ebcb9d67b5249c8848b09d639dca8d7bb0" +dependencies = [ + "futf", + "mac", + "utf-8", +] + +[[package]] +name = "tendril" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c4790fc369d5a530f4b544b094e31388b9b3a37c0f4652ade4505945f5660d24" +dependencies = [ + "new_debug_unreachable", + "utf-8", +] + [[package]] name = "thiserror" version = "1.0.69" @@ -3027,6 +3458,12 @@ dependencies = [ "serde", ] +[[package]] +name = "utf-8" +version = "0.7.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09cc8ee72d2a9becf2f2febe0205bbed8fc6615b7cb429ad062dc7b7ddd036a9" + [[package]] name = "utf8_iter" version = "1.0.4" @@ -3039,6 +3476,12 @@ version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" +[[package]] +name = "v_htmlescape" +version = "0.15.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4e8257fbc510f0a46eb602c10215901938b5c2a7d5e70fc11483b1d3c9b5b18c" + [[package]] name = "valuable" version = "0.1.1" @@ -3210,6 +3653,18 @@ dependencies = [ "wasm-bindgen", ] +[[package]] +name = "web_atoms" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "57a9779e9f04d2ac1ce317aee707aa2f6b773afba7b931222bff6983843b1576" +dependencies = [ + "phf", + "phf_codegen", + "string_cache", + "string_cache_codegen", +] + [[package]] name = "winapi-util" version = "0.1.11" diff --git a/Cargo.toml b/Cargo.toml index 74c85e9..6d79ee7 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,5 +1,5 @@ [workspace] -members = ["crates/core", "crates/arey", "crates/tools-search", "crates/mcp"] +members = ["crates/core", "crates/arey", "crates/tools-search", "crates/tools-fetch", "crates/mcp"] resolver = "2" [workspace.package] @@ -45,6 +45,7 @@ tokio = { version = "1.51", features = [ "sync", "signal", "time", + "process", ] } tokio-stream = "0.1.18" tracing = "0.1" diff --git a/crates/arey/Cargo.toml b/crates/arey/Cargo.toml index d52014e..be4b709 100644 --- a/crates/arey/Cargo.toml +++ b/crates/arey/Cargo.toml @@ -11,6 +11,7 @@ path = "src/main.rs" arey-core = { path = "../core", version = "*" } arey-mcp = { path = "../mcp" } arey-tools-search = { path = "../tools-search" } +arey-tools-fetch = { path = "../tools-fetch" } anyhow.workspace = true async-stream.workspace = true chrono.workspace = true diff --git a/crates/arey/src/ext/mod.rs b/crates/arey/src/ext/mod.rs index f22a0c8..c07bff2 100644 --- a/crates/arey/src/ext/mod.rs +++ b/crates/arey/src/ext/mod.rs @@ -8,8 +8,8 @@ use arey_core::{config::Config, registry::ToolRegistry, tools::Tool}; /// Retrieves all available tools from the configuration. /// /// This function iterates through the tools defined in the provided configuration -/// and initializes each one based on its name. Currently, only the "search" tool -/// is supported. +/// and initializes each one based on its name. Currently, "search" and "fetch" tools +/// are supported. The fetch tool is always available. /// /// # Arguments /// @@ -27,12 +27,19 @@ use arey_core::{config::Config, registry::ToolRegistry, tools::Tool}; /// - An unknown tool name is found in the configuration. pub fn get_tools(config: &Config) -> Result { let mut registry = ToolRegistry::new(); + + registry.register(Arc::new(arey_tools_fetch::FetchTool::new()))?; + for (name, tool_config) in &config.tools { let tool: Arc = match name.as_str() { "search" => Arc::new( arey_tools_search::SearchTool::from_config(tool_config) .with_context(|| format!("Failed to initialize tool: {name}"))?, ), + "fetch" => { + tracing::debug!("Fetch tool already registered, skipping config"); + continue; + } _ => return Err(anyhow!("Unknown tool in config: {}", name)), }; registry.register(tool)?; diff --git a/crates/core/Cargo.toml b/crates/core/Cargo.toml index 05f0809..67b212d 100644 --- a/crates/core/Cargo.toml +++ b/crates/core/Cargo.toml @@ -41,7 +41,7 @@ llama-cpp-2 = "=0.1.143" [dev-dependencies] tempfile = { workspace = true, features = [] } wiremock = { workspace = true, features = [] } -tokio = { version = "1.51", features = ["macros", "rt-multi-thread"] } +tokio.workspace = true reqwest.workspace = true yare.workspace = true diff --git a/crates/mcp/Cargo.toml b/crates/mcp/Cargo.toml index ef927d1..c923ac5 100644 --- a/crates/mcp/Cargo.toml +++ b/crates/mcp/Cargo.toml @@ -13,14 +13,7 @@ arey-core = { path = "../core" } rmcp = { version = "1.4.0", features = ["client", "transport-child-process", "transport-io"] } anyhow.workspace = true async-trait.workspace = true -tokio = { version = "1.50", features = [ - "macros", - "process", - "rt-multi-thread", - "sync", - "signal", - "time", -] } +tokio.workspace = true tracing.workspace = true serde.workspace = true serde_json.workspace = true @@ -28,7 +21,7 @@ serde_yaml.workspace = true [dev-dependencies] arey-mcp = { path = ".", features = ["test_utils"] } -tokio = { version = "1.50", features = ["macros", "test-util"] } +tokio.workspace = true serde_yaml.workspace = true yare.workspace = true diff --git a/crates/tools-fetch/Cargo.toml b/crates/tools-fetch/Cargo.toml new file mode 100644 index 0000000..0781af8 --- /dev/null +++ b/crates/tools-fetch/Cargo.toml @@ -0,0 +1,23 @@ +[package] +name = "arey-tools-fetch" +version = "0.0.7" +edition.workspace = true +authors.workspace = true +description = "Fetch tool for arey - extracts readable content from URLs" +readme.workspace = true +license.workspace = true + +[dependencies] +arey-core = { path = "../core" } + +anyhow.workspace = true +async-trait.workspace = true +reqwest.workspace = true +readabilityrs = "0.1" +serde.workspace = true +serde_json.workspace = true +tracing.workspace = true + +[dev-dependencies] +tokio.workspace = true +wiremock.workspace = true \ No newline at end of file diff --git a/crates/tools-fetch/src/lib.rs b/crates/tools-fetch/src/lib.rs new file mode 100644 index 0000000..a584c3d --- /dev/null +++ b/crates/tools-fetch/src/lib.rs @@ -0,0 +1,372 @@ +use anyhow::{Context, Result}; +use arey_core::tools::{Tool, ToolError}; +use async_trait::async_trait; +use readabilityrs::{Readability, ReadabilityOptions}; +use reqwest::Client; +use serde::{Deserialize, Serialize}; +use serde_json::Value; +use tracing::debug; + +const SURROUNDING_LINES: usize = 3; + +#[derive(Serialize, Deserialize, Debug)] +pub struct FetchResult { + pub title: String, + #[serde(skip_serializing_if = "Option::is_none")] + pub author: Option, + pub content: String, +} + +#[derive(Debug)] +pub struct FetchTool { + client: Client, +} + +impl FetchTool { + pub fn new() -> Self { + Self { + client: Client::new(), + } + } + + async fn fetch_html(&self, url: &str) -> Result { + let response = self + .client + .get(url) + .header("User-Agent", "Mozilla/5.0 (compatible; arey/1.0)") + .send() + .await + .context("Failed to fetch URL")?; + + let html = response + .text() + .await + .context("Failed to read response body")?; + + Ok(html) + } + + fn parse_html(&self, html: &str, url: &str) -> Result> { + let options = ReadabilityOptions::builder().output_markdown(true).build(); + let readability = Readability::new(html, Some(url), Some(options)) + .context("Failed to initialize Readability")?; + + Ok(readability.parse()) + } + + fn find_line_number(content: &str, search_text: &str) -> Option { + content + .lines() + .enumerate() + .find(|(_, line)| line.contains(search_text)) + .map(|(i, _)| i + 1) + } + + fn extract_line_excerpt(content: &str, line_num: usize) -> Option { + let lines: Vec<&str> = content.lines().collect(); + if line_num == 0 || line_num > lines.len() { + return None; + } + + let start = line_num.saturating_sub(SURROUNDING_LINES + 1); + let end = (line_num + SURROUNDING_LINES).min(lines.len()); + + if start >= end { + return None; + } + + let excerpt: String = lines[start..end] + .iter() + .enumerate() + .map(|(i, line)| { + let current_line_num = start + i + 1; + if current_line_num == line_num { + format!("> {}", line) + } else { + line.to_string() + } + }) + .collect::>() + .join("\n"); + + Some(excerpt) + } +} + +impl Default for FetchTool { + fn default() -> Self { + Self::new() + } +} + +#[async_trait] +impl Tool for FetchTool { + fn name(&self) -> String { + "fetch".to_string() + } + + fn description(&self) -> String { + "Fetches a URL and extracts readable content as markdown".to_string() + } + + fn parameters(&self) -> Value { + serde_json::json!({ + "type": "object", + "properties": { + "url": { + "type": "string", + "description": "The URL to fetch" + }, + "search_text": { + "type": "string", + "description": "Optional text to search for. If provided, returns only the surrounding context (3 lines before/after matching text) instead of full content." + } + }, + "required": ["url"] + }) + } + + async fn execute(&self, arguments: &Value) -> Result { + let url = arguments["url"] + .as_str() + .ok_or_else(|| ToolError::ExecutionError("Missing 'url' parameter".to_string()))?; + + let search_text = arguments["search_text"].as_str(); + + debug!(url, ?search_text, "Executing fetch"); + + let html = self + .fetch_html(url) + .await + .map_err(|e| ToolError::ExecutionError(e.to_string()))?; + + let article = self + .parse_html(&html, url) + .map_err(|e| ToolError::ExecutionError(e.to_string()))?; + + let article = article.ok_or_else(|| { + ToolError::ExecutionError("Could not extract content from page".to_string()) + })?; + + let content = article + .markdown_content + .or(article.content) + .unwrap_or_default(); + + let content = if let Some(search) = search_text { + Self::find_line_number(&content, search) + .and_then(|ln| Self::extract_line_excerpt(&content, ln)) + .unwrap_or(content) + } else { + content + }; + + let result = FetchResult { + title: article.title.unwrap_or_default(), + author: article.byline, + content, + }; + + debug!(title = result.title, "Fetch completed successfully"); + + serde_json::to_value(result).map_err(|e| ToolError::ExecutionError(e.to_string())) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_find_line_number_found() { + let content = "Line 1\nLine 2\nTarget line\nLine 4"; + + let result = FetchTool::find_line_number(content, "Target"); + assert_eq!(result, Some(3)); + } + + #[test] + fn test_find_line_number_not_found() { + let content = "Line 1\nLine 2\nLine 3"; + + let result = FetchTool::find_line_number(content, "Target"); + assert_eq!(result, None); + } + + #[test] + fn test_find_line_number_first_line() { + let content = "Found it\nOther content"; + + let result = FetchTool::find_line_number(content, "Found"); + assert_eq!(result, Some(1)); + } + + #[test] + fn test_extract_line_excerpt_valid() { + let content = "Line 1\nLine 2\nLine 3\nLine 4\nLine 5"; + + let result = FetchTool::extract_line_excerpt(content, 3); + assert!(result.is_some()); + let excerpt = result.unwrap(); + assert!(excerpt.contains("Line 1")); + assert!(excerpt.contains("Line 2")); + assert!(excerpt.contains("> Line 3")); + assert!(excerpt.contains("Line 4")); + } + + #[test] + fn test_extract_line_excerpt_at_start() { + let content = "Line 1\nLine 2\nLine 3"; + + let result = FetchTool::extract_line_excerpt(content, 1); + assert!(result.is_some()); + let excerpt = result.unwrap(); + assert!(excerpt.contains("> Line 1")); + } + + #[test] + fn test_extract_line_excerpt_out_of_bounds() { + let content = "Line 1\nLine 2"; + + let result = FetchTool::extract_line_excerpt(content, 10); + assert!(result.is_none()); + } + + #[test] + fn test_extract_line_excerpt_zero() { + let content = "Line 1\nLine 2"; + + let result = FetchTool::extract_line_excerpt(content, 0); + assert!(result.is_none()); + } + + #[test] + fn test_fetch_tool_name() { + let tool = FetchTool::new(); + assert_eq!(tool.name(), "fetch"); + } + + #[test] + fn test_fetch_tool_description() { + let tool = FetchTool::new(); + let desc = tool.description().to_lowercase(); + assert!(desc.contains("fetch")); + } + + #[test] + fn test_fetch_tool_parameters() { + let tool = FetchTool::new(); + let params = tool.parameters(); + + assert_eq!(params["type"], "object"); + assert!(params["properties"].get("url").is_some()); + assert!(params["properties"].get("search_text").is_some()); + assert!( + params["required"] + .as_array() + .unwrap() + .contains(&serde_json::json!("url")) + ); + } + + #[test] + fn test_fetch_tool_parameters_search_text_optional() { + let tool = FetchTool::new(); + let params = tool.parameters(); + + let required = params["required"].as_array().unwrap(); + assert!(!required.contains(&serde_json::json!("search_text"))); + } + + #[tokio::test] + async fn test_execute_missing_url() { + let tool = FetchTool::new(); + let args = serde_json::json!({}); + + let result = tool.execute(&args).await; + assert!(result.is_err()); + let err = result.unwrap_err(); + assert!(err.to_string().contains("url")); + } + + #[tokio::test] + async fn test_execute_with_search_text() { + let tool = FetchTool::new(); + let args = serde_json::json!({ + "url": "https://example.com", + "search_text": "test" + }); + + let result = tool.execute(&args).await; + assert!(result.is_err() || result.is_ok()); + } +} + +#[cfg(test)] +mod integration_tests { + use super::*; + use wiremock::matchers::method; + use wiremock::{Mock, MockServer, ResponseTemplate}; + + #[tokio::test] + async fn test_fetch_success() { + let mock_server = MockServer::start().await; + + let html = r#"Test Article

Test Title

This is the content.

"#; + + Mock::given(method("GET")) + .respond_with(ResponseTemplate::new(200).set_body_string(html)) + .mount(&mock_server) + .await; + + let tool = FetchTool::new(); + let args = serde_json::json!({ + "url": mock_server.uri() + }); + + let result = tool.execute(&args).await; + + if result.is_err() { + let err = result.unwrap_err(); + if err.to_string().contains("Could not extract content") { + return; + } + panic!("Unexpected error: {}", err); + } + + let value = result.unwrap(); + assert!(value.get("title").is_some() || value.get("content").is_some()); + } + + #[tokio::test] + async fn test_fetch_with_search_text_returns_excerpt() { + let mock_server = MockServer::start().await; + + let html = r#"Test Article

Test Title

First paragraph.

Target text here.

Third paragraph.

"#; + + Mock::given(method("GET")) + .respond_with(ResponseTemplate::new(200).set_body_string(html)) + .mount(&mock_server) + .await; + + let tool = FetchTool::new(); + let args = serde_json::json!({ + "url": mock_server.uri(), + "search_text": "Target text" + }); + + let result = tool.execute(&args).await; + + if result.is_err() { + let err = result.unwrap_err(); + if err.to_string().contains("Could not extract content") { + return; + } + panic!("Unexpected error: {}", err); + } + + let value = result.unwrap(); + let content = value["content"].as_str().unwrap_or(""); + assert!(content.contains("Target text")); + } +} From 52c498e34d81ed268117cb76747000e5e019b635 Mon Sep 17 00:00:00 2001 From: Arun Mahapatra Date: Tue, 14 Apr 2026 07:53:23 +0530 Subject: [PATCH 2/3] fix: support multiple tools in /tool command - Fix parse_command_line to keep tool names separate (not joined) - Convert test_system_command_parsing_edge_cases to yare parameterized - Convert test_tool_command_parsing to yare parameterized - Add AnotherMockTool for testing multiple tools - Add test_tool_command_set_multiple to verify setting multiple tools works --- crates/arey/src/cli/chat/commands.rs | 134 +++++++++++++++++-------- crates/arey/src/cli/chat/test_utils.rs | 23 +++++ 2 files changed, 117 insertions(+), 40 deletions(-) diff --git a/crates/arey/src/cli/chat/commands.rs b/crates/arey/src/cli/chat/commands.rs index 3b733ac..1a9b3b6 100644 --- a/crates/arey/src/cli/chat/commands.rs +++ b/crates/arey/src/cli/chat/commands.rs @@ -560,7 +560,9 @@ pub fn parse_command_line(line: &str) -> Vec { }; // Special handling for commands that accept multi-word arguments - if !args.is_empty() && (args[0] == "/system" || args[0] == "/sys" || args[0] == "/tool") { + // Only /system and /sys need to join all args into one (for prompts) + // /tool should keep args separate for multiple tool names + if !args.is_empty() && (args[0] == "/system" || args[0] == "/sys") { if args.len() > 1 { // Join all arguments after the command let mut processed = vec![args[0].clone()]; @@ -637,52 +639,70 @@ fn format_message_block(messages: &[arey_core::completion::ChatMessage]) -> Resu #[cfg(test)] mod tests { use super::*; - use crate::cli::chat::test_utils::{MockTool, create_test_config_with_custom_agent}; + use crate::cli::chat::test_utils::{ + AnotherMockTool, MockTool, create_test_config_with_custom_agent, + }; use arey_core::completion::{ChatMessage, SenderType}; use arey_core::registry::ToolRegistry; use arey_core::tools::{Tool, ToolCall}; use std::sync::Arc; use tokio::sync::Mutex; + use yare::parameterized; + + #[parameterized( + normal_spaces = { "/system you are an expert", Some("you are an expert") }, + apostrophes = { "/system you're an expert", Some("you're an expert") }, + quotes = { "/system \"quoted text\"", Some("quoted text") }, + punctuation = { "/system hello, world!", Some("hello, world!") }, + no_args = { "/system", None }, + alias = { "/sys you're an expert", Some("you're an expert") }, + )] + fn test_system_command_parsing(input: &str, expected_prompt: Option<&str>) { + let processed_args = parse_command_line(input); + + let result = CliCommand::try_parse_from(processed_args); + assert!(result.is_ok(), "Failed to parse '{}': {:?}", input, result); + + let cli_command = result.unwrap(); + match cli_command.command { + Command::System { prompt } => { + assert_eq!( + prompt, + expected_prompt.map(|s| s.to_string()), + "Failed for input: '{}'", + input + ); + } + _ => panic!( + "Expected System command for input: '{}', got {:?}", + input, cli_command.command + ), + } + } - #[test] - fn test_system_command_parsing_edge_cases() { - let test_cases = vec![ - // Test normal case with spaces - ("/system you are an expert", Some("you are an expert")), - // Test apostrophes (the main issue) - ("/system you're an expert", Some("you're an expert")), - // Test quotes (should still work with shlex) - ("/system \"quoted text\"", Some("quoted text")), - // Test mixed punctuation - ("/system hello, world!", Some("hello, world!")), - // Test no arguments (should work) - ("/system", None), - // Test alias - ("/sys you're an expert", Some("you're an expert")), - ]; - - for (input, expected_prompt) in test_cases { - // Use the shared parsing function - let processed_args = parse_command_line(input); - - let result = CliCommand::try_parse_from(processed_args); - assert!(result.is_ok(), "Failed to parse '{}': {:?}", input, result); - - let cli_command = result.unwrap(); - match cli_command.command { - Command::System { prompt } => { - assert_eq!( - prompt, - expected_prompt.map(|s| s.to_string()), - "Failed for input: '{}'", - input - ); - } - _ => panic!( - "Expected System command for input: '{}', got {:?}", - input, cli_command.command - ), + #[parameterized( + single_tool = { "/tool search", vec!["search"] }, + multiple_tools = { "/tool search fetch", vec!["search", "fetch"] }, + multiple_tools_extra_spaces = { "/tool search fetch", vec!["search", "fetch"] }, + clear_command = { "/tool clear", vec!["clear"] }, + no_arguments = { "/tool", vec![] }, + )] + fn test_tool_command_parsing(input: &str, expected_names: Vec<&str>) { + let processed_args = parse_command_line(input); + + let result = CliCommand::try_parse_from(processed_args); + assert!(result.is_ok(), "Failed to parse '{}': {:?}", input, result); + + let cli_command = result.unwrap(); + match cli_command.command { + Command::Tool { names } => { + let expected: Vec = expected_names.iter().map(|s| s.to_string()).collect(); + assert_eq!(names, expected, "Failed for input: '{}'", input); } + _ => panic!( + "Expected Tool command for input: '{}', got {:?}", + input, cli_command.command + ), } } @@ -937,6 +957,40 @@ USER: Run tool Ok(()) } + #[tokio::test] + async fn test_tool_command_set_multiple() -> Result<()> { + // Create Chat instance with multiple mock tools + let config = create_test_config_with_custom_agent()?; + let mock_tool1: Arc = Arc::new(MockTool); + let mock_tool2: Arc = Arc::new(AnotherMockTool); + let mut tool_registry = ToolRegistry::new(); + tool_registry.register(mock_tool1)?; + tool_registry.register(mock_tool2)?; + + let mut chat = Chat::new(&config, Some("test-model-1".to_string()), tool_registry)?; + chat.load_session().await?; + let chat_session = Arc::new(Mutex::new(chat)); + + // Test setting multiple tools + let set_tool_cmd = Command::Tool { + names: vec!["mock_tool".to_string(), "another_mock_tool".to_string()], + }; + let result = set_tool_cmd.execute(chat_session.clone()).await?; + assert!(result, "execute should return true to continue REPL"); + + // Check that both tools are set + { + let chat_guard = chat_session.lock().await; + let tools = chat_guard.tools(); + assert_eq!(tools.len(), 2); + let tool_names: Vec = tools.iter().map(|t| t.name()).collect(); + assert!(tool_names.contains(&"mock_tool".to_string())); + assert!(tool_names.contains(&"another_mock_tool".to_string())); + } + + Ok(()) + } + #[tokio::test] async fn test_tool_command_clear() -> Result<()> { // Create Chat instance with a mock tool diff --git a/crates/arey/src/cli/chat/test_utils.rs b/crates/arey/src/cli/chat/test_utils.rs index babd786..cd18749 100644 --- a/crates/arey/src/cli/chat/test_utils.rs +++ b/crates/arey/src/cli/chat/test_utils.rs @@ -241,3 +241,26 @@ impl Tool for MockTool { Ok(Value::String("mock tool output".to_string())) } } + +/// Another mock tool for testing multiple tools +#[derive(Debug)] +pub struct AnotherMockTool; + +#[async_trait] +impl Tool for AnotherMockTool { + fn name(&self) -> String { + "another_mock_tool".to_string() + } + + fn description(&self) -> String { + "Another mock tool for testing".to_string() + } + + fn parameters(&self) -> Value { + Value::Object(serde_json::Map::new()) + } + + async fn execute(&self, _args: &Value) -> Result { + Ok(Value::String("another mock tool output".to_string())) + } +} From 1cc30b1f4e613b6d9bc32f9c5e65b38bbf10b7ff Mon Sep 17 00:00:00 2001 From: Arun Mahapatra Date: Tue, 14 Apr 2026 08:27:29 +0530 Subject: [PATCH 3/3] fix: use if let Err instead of is_err + unwrap_err in tools-fetch tests --- crates/tools-fetch/src/lib.rs | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/crates/tools-fetch/src/lib.rs b/crates/tools-fetch/src/lib.rs index a584c3d..669008b 100644 --- a/crates/tools-fetch/src/lib.rs +++ b/crates/tools-fetch/src/lib.rs @@ -326,8 +326,7 @@ mod integration_tests { let result = tool.execute(&args).await; - if result.is_err() { - let err = result.unwrap_err(); + if let Err(err) = result { if err.to_string().contains("Could not extract content") { return; } @@ -357,8 +356,7 @@ mod integration_tests { let result = tool.execute(&args).await; - if result.is_err() { - let err = result.unwrap_err(); + if let Err(err) = result { if err.to_string().contains("Could not extract content") { return; }