diff --git a/Cargo.lock b/Cargo.lock index d9d4085..b75fda8 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -353,6 +353,15 @@ dependencies = [ "typenum", ] +[[package]] +name = "deranged" +version = "0.3.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b42b6fa04a440b495c8b04d0e71b707c585f83cb9cb28cf8cd0d976c315e31b4" +dependencies = [ + "powerfmt", +] + [[package]] name = "digest" version = "0.10.7" @@ -877,6 +886,12 @@ version = "0.2.155" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "97b3888a4aecf77e811145cadf6eef5901f4782c53886191b2f693f24761847c" +[[package]] +name = "linked-hash-map" +version = "0.5.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0717cef1bc8b636c6e1c1bbdefc09e6322da8a9321966e8928ef80d20f7f770f" + [[package]] name = "linkme" version = "0.2.10" @@ -919,6 +934,31 @@ version = "0.4.21" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "90ed8c1e510134f979dbc4f070f87d4313098b704861a105fe34231c70a3901c" +[[package]] +name = "lopdf" +version = "0.32.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e775e4ee264e8a87d50a9efef7b67b4aa988cf94e75630859875fc347e6c872b" +dependencies = [ + "chrono", + "encoding_rs", + "flate2", + "itoa", + "linked-hash-map", + "log", + "md5", + "nom", + "rayon", + "time", + "weezl", +] + +[[package]] +name = "md5" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "490cc448043f947bae3cbee9c203358d62dbee0db12107a74be5c30ccfd09771" + [[package]] name = "memchr" version = "2.7.2" @@ -943,18 +983,20 @@ checksum = "6877bb514081ee2a7ff5ef9de3281f14a4dd4bceac4c09388074a6b5df8a139a" [[package]] name = "mini-rag" -version = "0.1.0" +version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "da87e3d727c7ad93d32c4deb529b9622f475d2ab9a7460c9ba1398a0d696c9d9" +checksum = "71facc5e6b5af46f4df9cf6ad32828e2d871420ddc94d73be7c41abbfb3fdcda" dependencies = [ "anyhow", "async-trait", "bitcode", "colored", "glob", + "lopdf", "rayon", "serde", "sha256", + "stringreader", ] [[package]] @@ -1044,6 +1086,12 @@ dependencies = [ "minimal-lexical", ] +[[package]] +name = "num-conv" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "51d515d32fb182ee37cda2ccdcb92950d6a3c2893aa280e540671c2cd0f3b1d9" + [[package]] name = "num-traits" version = "0.2.19" @@ -1215,6 +1263,12 @@ version = "0.3.30" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d231b230927b5e4ad203db57bbcbee2802f6bce620b1e4a9024a07d94e2907ec" +[[package]] +name = "powerfmt" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "439ee305def115ba05938db6eb1644ff94165c5ab5e9420d1c1bcedbba909391" + [[package]] name = "ppv-lite86" version = "0.2.17" @@ -1650,6 +1704,12 @@ version = "0.9.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6980e8d7511241f8acf4aebddbb1ff938df5eebe98691418c4468d0b72a96a67" +[[package]] +name = "stringreader" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "913e7b03d63752f6cdd2df77da36749d82669904798fe8944b9ec3d23f159905" + [[package]] name = "strsim" version = "0.11.1" @@ -1743,6 +1803,37 @@ dependencies = [ "syn 2.0.66", ] +[[package]] +name = "time" +version = "0.3.36" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5dfd88e563464686c916c7e46e623e520ddc6d79fa6641390f2e3fa86e83e885" +dependencies = [ + "deranged", + "itoa", + "num-conv", + "powerfmt", + "serde", + "time-core", + "time-macros", +] + +[[package]] +name = "time-core" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ef927ca75afb808a4d64dd374f00a2adf8d0fcff8e7b184af886c3c87ec4a3f3" + +[[package]] +name = "time-macros" +version = "0.2.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f252a68540fde3a3877aeea552b832b40ab9a69e318efd078774a01ddee1ccf" +dependencies = [ + "num-conv", + "time-core", +] + [[package]] name = "tinyvec" version = "1.6.0" @@ -2089,6 +2180,12 @@ dependencies = [ "rustls-pki-types", ] +[[package]] +name = "weezl" +version = "0.1.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "53a85b86a771b1c87058196170769dd264f66c0782acf1ae6cc51bfd64b39082" + [[package]] name = "winapi" version = "0.3.9" diff --git a/Cargo.toml b/Cargo.toml index ed331e8..761663d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -46,7 +46,7 @@ memory-stats = "1.1.0" sha256 = "1.5.0" bitcode = { version = "0.6.0", features = ["serde"] } intertrait = "0.2.2" -mini-rag = "0.1.0" +mini-rag = "0.2.1" [features] default = ["ollama", "groq", "openai", "fireworks"] diff --git a/src/agent/state/mod.rs b/src/agent/state/mod.rs index 11d11c9..724cce7 100644 --- a/src/agent/state/mod.rs +++ b/src/agent/state/mod.rs @@ -27,7 +27,7 @@ pub struct State { // list of executed actions history: History, // optional rag engine - rag: Option>, + rag: Option, // set to true when task is complete complete: bool, // runtime metrics @@ -82,16 +82,18 @@ impl State { } // add RAG namespace - let rag: Option> = - if let Some(config) = task.get_rag_config() { - let v_store = mini_rag::factory("naive", embedder, config).await?; + let rag: Option = if let Some(config) = task.get_rag_config() { + let mut v_store = mini_rag::VectorStore::new(embedder, config)?; - namespaces.push(namespaces::NAMESPACES.get("rag").unwrap()()); + // import new documents if needed + v_store.import_new_documents().await?; - Some(v_store) - } else { - None - }; + namespaces.push(namespaces::NAMESPACES.get("rag").unwrap()()); + + Some(v_store) + } else { + None + }; // add task defined actions namespaces.append(&mut task.get_functions()); @@ -148,7 +150,7 @@ impl State { &mut self, query: &str, top_k: usize, - ) -> Result> { + ) -> Result> { if let Some(rag) = &self.rag { rag.retrieve(query, top_k).await } else {