Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions check.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
#!/bin/bash -e

maturin develop
mypy

cargo test
pytest

cargo fmt
ruff format

echo "All checks passed"
29 changes: 17 additions & 12 deletions docs/docs/about/contributing.md
Original file line number Diff line number Diff line change
Expand Up @@ -26,36 +26,36 @@ Following the steps below to get cocoindex build on latest codebase locally - if
- 🦀 [Install Rust](https://rust-lang.org/tools/install)

If you don't have Rust installed, run
```bash
```sh
curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh
```
Already have Rust? Make sure it's up to date
```bash
```sh
rustup update
```

- (Recommended) Setup Python virtual environment:
```bash
- Setup Python virtual environment:
```sh
python3 -m venv .venv
```
Activate the virtual environment, before any installing / building / running:

```bash
```sh
. .venv/bin/activate
```

- Install maturin:
```bash
pip install maturin
- Install required tools:
```sh
pip install maturin mypy ruff
```

- Build the library. Run at the root of cocoindex directory:
```bash
```sh
maturin develop
```

- (Optional) Before running a specific example, set extra environment variables, for exposing extra traces, allowing dev UI, etc.
```bash
- Before running a specific example, set extra environment variables, for exposing extra traces, allowing dev UI, etc.
```sh
. ./.env.lib_debug
```

Expand All @@ -67,7 +67,12 @@ To submit your code:
1. Fork the [CocoIndex repository](https://github.com/cocoindex-io/cocoindex)
2. [Create a new branch](https://docs.github.com/en/desktop/making-changes-in-a-branch/managing-branches-in-github-desktop) on your fork
3. Make your changes
4. [Open a Pull Request (PR)](https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/proposing-changes-to-your-work-with-pull-requests/creating-a-pull-request-from-a-fork) when your work is ready for review
4. Make sure all tests and linting pass by running
```sh
./check.sh
```

5. [Open a Pull Request (PR)](https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/proposing-changes-to-your-work-with-pull-requests/creating-a-pull-request-from-a-fork) when your work is ready for review

In your PR description, please include:
- Description of the changes
Expand Down
19 changes: 8 additions & 11 deletions src/builder/flow_builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -258,17 +258,14 @@ impl FlowBuilder {
#[new]
pub fn new(name: &str) -> PyResult<Self> {
let lib_context = get_lib_context().into_py_result()?;
let existing_flow_ss = lib_context
.persistence_ctx
.as_ref()
.and_then(|ctx| {
ctx.all_setup_states
.read()
.unwrap()
.flows
.get(name)
.cloned()
});
let existing_flow_ss = lib_context.persistence_ctx.as_ref().and_then(|ctx| {
ctx.all_setup_states
.read()
.unwrap()
.flows
.get(name)
.cloned()
});
let root_op_scope = OpScope::new(
spec::ROOT_SCOPE_NAME.to_string(),
None,
Expand Down
2 changes: 0 additions & 2 deletions src/execution/db_tracking_setup.rs
Original file line number Diff line number Diff line change
Expand Up @@ -154,8 +154,6 @@ impl ResourceSetupStatus for TrackingTableSetupStatus {
(None, None) => SetupChangeType::NoChange,
}
}


}

impl TrackingTableSetupStatus {
Expand Down
2 changes: 1 addition & 1 deletion src/execution/dumper.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
use anyhow::Result;
use futures::future::try_join_all;
use futures::StreamExt;
use futures::future::try_join_all;
use indexmap::IndexMap;
use itertools::Itertools;
use serde::ser::SerializeSeq;
Expand Down
20 changes: 10 additions & 10 deletions src/execution/live_updater.rs
Original file line number Diff line number Diff line change
Expand Up @@ -51,11 +51,7 @@ impl SharedAckFn {
let ack_fn = {
let mut v = v.lock().unwrap();
v.count -= 1;
if v.count > 0 {
None
} else {
v.ack_fn.take()
}
if v.count > 0 { None } else { v.ack_fn.take() }
};
if let Some(ack_fn) = ack_fn {
ack_fn().await?;
Expand Down Expand Up @@ -108,9 +104,7 @@ async fn update_source(
} else {
trace!(
"{}.{}: {}",
flow_ctx.flow.flow_instance.name,
import_op.name,
delta
flow_ctx.flow.flow_instance.name, import_op.name, delta
);
}
};
Expand Down Expand Up @@ -252,10 +246,16 @@ impl FlowLiveUpdater {
while let Some(result) = self.tasks.join_next().await {
match result {
Err(e) if !e.is_cancelled() => {
error!("A background task in FlowLiveUpdater failed to join: {:?}", e);
error!(
"A background task in FlowLiveUpdater failed to join: {:?}",
e
);
}
Ok(Err(e)) => {
error!("Error reported by a source update task during live update: {:?}", e);
error!(
"Error reported by a source update task during live update: {:?}",
e
);
}
_ => {}
}
Expand Down
2 changes: 1 addition & 1 deletion src/execution/memoization.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use anyhow::{bail, Result};
use anyhow::{Result, bail};
use serde::{Deserialize, Serialize};
use std::{
borrow::Cow,
Expand Down
2 changes: 1 addition & 1 deletion src/execution/source_indexer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ use crate::prelude::*;

use futures::future::Ready;
use sqlx::PgPool;
use std::collections::{hash_map, HashMap};
use std::collections::{HashMap, hash_map};
use tokio::{sync::Semaphore, task::JoinSet};

use super::{
Expand Down
6 changes: 4 additions & 2 deletions src/lib_context.rs
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,9 @@ impl LibContext {
.ok_or_else(|| anyhow!("Database is required for this operation. Please set COCOINDEX_DATABASE_URL environment variable and call cocoindex.init() with database settings."))
}

pub fn require_all_setup_states(&self) -> Result<&RwLock<setup::AllSetupState<setup::ExistingMode>>> {
pub fn require_all_setup_states(
&self,
) -> Result<&RwLock<setup::AllSetupState<setup::ExistingMode>>> {
self.persistence_ctx
.as_ref()
.map(|ctx| &ctx.all_setup_states)
Expand Down Expand Up @@ -149,7 +151,7 @@ pub fn create_lib_context(settings: settings::Settings) -> Result<LibContext> {
// No database configured
None
};

Ok(LibContext {
db_pools,
persistence_ctx,
Expand Down
12 changes: 8 additions & 4 deletions src/llm/anthropic.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ use crate::llm::{
LlmGenerateRequest, LlmGenerateResponse, LlmGenerationClient, LlmSpec, OutputFormat,
ToJsonSchemaOptions,
};
use anyhow::{bail, Context, Result};
use anyhow::{Context, Result, bail};
use async_trait::async_trait;
use json5;
use serde_json::Value;
Expand Down Expand Up @@ -115,16 +115,20 @@ impl LlmGenerationClient for Client {
Ok(value) => {
println!("[Anthropic] Used permissive JSON5 parser for output");
serde_json::to_string(&value)?
},
Err(e2) => return Err(anyhow::anyhow!(format!("No structured tool output or text found in response, and permissive JSON5 parsing also failed: {e}; {e2}")))
}
Err(e2) => {
return Err(anyhow::anyhow!(format!(
"No structured tool output or text found in response, and permissive JSON5 parsing also failed: {e}; {e2}"
)));
}
}
}
}
}
_ => {
return Err(anyhow::anyhow!(
"No structured tool output or text found in response"
))
));
}
}
};
Expand Down
2 changes: 1 addition & 1 deletion src/llm/gemini.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ use crate::llm::{
LlmGenerateRequest, LlmGenerateResponse, LlmGenerationClient, LlmSpec, OutputFormat,
ToJsonSchemaOptions,
};
use anyhow::{bail, Context, Result};
use anyhow::{Context, Result, bail};
use async_trait::async_trait;
use serde_json::Value;
use urlencoding::encode;
Expand Down
2 changes: 1 addition & 1 deletion src/llm/openai.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,14 @@ use crate::api_bail;
use super::LlmGenerationClient;
use anyhow::Result;
use async_openai::{
Client as OpenAIClient,
config::OpenAIConfig,
types::{
ChatCompletionRequestMessage, ChatCompletionRequestSystemMessage,
ChatCompletionRequestSystemMessageContent, ChatCompletionRequestUserMessage,
ChatCompletionRequestUserMessageContent, CreateChatCompletionRequest, ResponseFormat,
ResponseFormatJsonSchema,
},
Client as OpenAIClient,
};
use async_trait::async_trait;

Expand Down
2 changes: 1 addition & 1 deletion src/ops/functions/extract_by_llm.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
use crate::prelude::*;

use crate::llm::{
new_llm_generation_client, LlmGenerateRequest, LlmGenerationClient, LlmSpec, OutputFormat,
LlmGenerateRequest, LlmGenerationClient, LlmSpec, OutputFormat, new_llm_generation_client,
};
use crate::ops::sdk::*;
use base::json_schema::build_json_schema;
Expand Down
2 changes: 1 addition & 1 deletion src/ops/sources/mod.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
pub mod amazon_s3;
pub mod google_drive;
pub mod local_file;
pub mod amazon_s3;
2 changes: 0 additions & 2 deletions src/ops/targets/kuzu.rs
Original file line number Diff line number Diff line change
Expand Up @@ -194,8 +194,6 @@ impl setup::ResourceSetupStatus for GraphElementDataSetupStatus {
fn change_type(&self) -> SetupChangeType {
self.actions.change_type(false)
}


}

fn append_drop_table(
Expand Down
2 changes: 0 additions & 2 deletions src/ops/targets/neo4j.rs
Original file line number Diff line number Diff line change
Expand Up @@ -853,8 +853,6 @@ impl ResourceSetupStatus for GraphElementDataSetupStatus {
fn change_type(&self) -> SetupChangeType {
self.change_type
}


}

async fn clear_graph_element_data(
Expand Down
2 changes: 0 additions & 2 deletions src/ops/targets/postgres.rs
Original file line number Diff line number Diff line change
Expand Up @@ -525,8 +525,6 @@ impl setup::ResourceSetupStatus for SetupStatus {
|| !self.actions.indexes_to_delete.is_empty();
self.actions.table_action.change_type(has_other_update)
}


}

impl SetupStatus {
Expand Down
2 changes: 0 additions & 2 deletions src/ops/targets/qdrant.rs
Original file line number Diff line number Diff line change
Expand Up @@ -141,8 +141,6 @@ impl setup::ResourceSetupStatus for SetupStatus {
(true, true) => setup::SetupChangeType::Update,
}
}


}

impl SetupStatus {
Expand Down
10 changes: 6 additions & 4 deletions src/py/convert.rs
Original file line number Diff line number Diff line change
Expand Up @@ -335,12 +335,14 @@ mod tests {
.expect("Failed to convert Rust value to Python object");

println!("Python object: {:?}", py_object);
let roundtripped_value =
value_from_py_object(value_type, &py_object)
.expect("Failed to convert Python object back to Rust value");
let roundtripped_value = value_from_py_object(value_type, &py_object)
.expect("Failed to convert Python object back to Rust value");

println!("Roundtripped value: {:?}", roundtripped_value);
assert_eq!(original_value, &roundtripped_value, "Value mismatch after roundtrip");
assert_eq!(
original_value, &roundtripped_value,
"Value mismatch after roundtrip"
);
});
}

Expand Down
18 changes: 15 additions & 3 deletions src/py/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -394,7 +394,11 @@ impl SetupStatus {
fn sync_setup(py: Python<'_>) -> PyResult<SetupStatus> {
let lib_context = get_lib_context().into_py_result()?;
let flows = lib_context.flows.lock().unwrap();
let all_setup_states = lib_context.require_all_setup_states().into_py_result()?.read().unwrap();
let all_setup_states = lib_context
.require_all_setup_states()
.into_py_result()?
.read()
.unwrap();
py.allow_threads(|| {
get_runtime()
.block_on(async {
Expand All @@ -408,7 +412,11 @@ fn sync_setup(py: Python<'_>) -> PyResult<SetupStatus> {
#[pyfunction]
fn drop_setup(py: Python<'_>, flow_names: Vec<String>) -> PyResult<SetupStatus> {
let lib_context = get_lib_context().into_py_result()?;
let all_setup_states = lib_context.require_all_setup_states().into_py_result()?.read().unwrap();
let all_setup_states = lib_context
.require_all_setup_states()
.into_py_result()?
.read()
.unwrap();
py.allow_threads(|| {
get_runtime()
.block_on(async {
Expand All @@ -422,7 +430,11 @@ fn drop_setup(py: Python<'_>, flow_names: Vec<String>) -> PyResult<SetupStatus>
#[pyfunction]
fn flow_names_with_setup() -> PyResult<Vec<String>> {
let lib_context = get_lib_context().into_py_result()?;
let all_setup_states = lib_context.require_all_setup_states().into_py_result()?.read().unwrap();
let all_setup_states = lib_context
.require_all_setup_states()
.into_py_result()?
.read()
.unwrap();
let flow_names = all_setup_states.flows.keys().cloned().collect();
Ok(flow_names)
}
Expand Down
Loading
Loading