From 935eee788b0b61f274ee437d646e77d3b9968958 Mon Sep 17 00:00:00 2001 From: LJ Date: Tue, 17 Jun 2025 23:21:03 -0700 Subject: [PATCH] ops: add a script to automatically check things before PR creation --- check.sh | 12 ++++++++++++ docs/docs/about/contributing.md | 29 +++++++++++++++++------------ src/builder/flow_builder.rs | 19 ++++++++----------- src/execution/db_tracking_setup.rs | 2 -- src/execution/dumper.rs | 2 +- src/execution/live_updater.rs | 20 ++++++++++---------- src/execution/memoization.rs | 2 +- src/execution/source_indexer.rs | 2 +- src/lib_context.rs | 6 ++++-- src/llm/anthropic.rs | 12 ++++++++---- src/llm/gemini.rs | 2 +- src/llm/openai.rs | 2 +- src/ops/functions/extract_by_llm.rs | 2 +- src/ops/sources/mod.rs | 2 +- src/ops/targets/kuzu.rs | 2 -- src/ops/targets/neo4j.rs | 2 -- src/ops/targets/postgres.rs | 2 -- src/ops/targets/qdrant.rs | 2 -- src/py/convert.rs | 10 ++++++---- src/py/mod.rs | 18 +++++++++++++++--- src/settings.rs | 10 +++++----- src/setup/db_metadata.rs | 2 -- src/utils/fingerprint.rs | 2 +- 23 files changed, 93 insertions(+), 71 deletions(-) create mode 100755 check.sh diff --git a/check.sh b/check.sh new file mode 100755 index 00000000..6ef77c54 --- /dev/null +++ b/check.sh @@ -0,0 +1,12 @@ +#!/bin/bash -e + +maturin develop +mypy + +cargo test +pytest + +cargo fmt +ruff format + +echo "All checks passed" \ No newline at end of file diff --git a/docs/docs/about/contributing.md b/docs/docs/about/contributing.md index aaa17d6d..e9e3bae5 100644 --- a/docs/docs/about/contributing.md +++ b/docs/docs/about/contributing.md @@ -26,36 +26,36 @@ Following the steps below to get cocoindex build on latest codebase locally - if - 🦀 [Install Rust](https://rust-lang.org/tools/install) If you don't have Rust installed, run - ```bash + ```sh curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh ``` Already have Rust? Make sure it's up to date - ```bash + ```sh rustup update ``` -- (Recommended) Setup Python virtual environment: - ```bash +- Setup Python virtual environment: + ```sh python3 -m venv .venv ``` Activate the virtual environment, before any installing / building / running: - ```bash + ```sh . .venv/bin/activate ``` -- Install maturin: - ```bash - pip install maturin +- Install required tools: + ```sh + pip install maturin mypy ruff ``` - Build the library. Run at the root of cocoindex directory: - ```bash + ```sh maturin develop ``` -- (Optional) Before running a specific example, set extra environment variables, for exposing extra traces, allowing dev UI, etc. - ```bash +- Before running a specific example, set extra environment variables, for exposing extra traces, allowing dev UI, etc. + ```sh . ./.env.lib_debug ``` @@ -67,7 +67,12 @@ To submit your code: 1. Fork the [CocoIndex repository](https://github.com/cocoindex-io/cocoindex) 2. [Create a new branch](https://docs.github.com/en/desktop/making-changes-in-a-branch/managing-branches-in-github-desktop) on your fork 3. Make your changes -4. [Open a Pull Request (PR)](https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/proposing-changes-to-your-work-with-pull-requests/creating-a-pull-request-from-a-fork) when your work is ready for review +4. Make sure all tests and linting pass by running + ```sh + ./check.sh + ``` + +5. [Open a Pull Request (PR)](https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/proposing-changes-to-your-work-with-pull-requests/creating-a-pull-request-from-a-fork) when your work is ready for review In your PR description, please include: - Description of the changes diff --git a/src/builder/flow_builder.rs b/src/builder/flow_builder.rs index 14c90f04..38b443e4 100644 --- a/src/builder/flow_builder.rs +++ b/src/builder/flow_builder.rs @@ -258,17 +258,14 @@ impl FlowBuilder { #[new] pub fn new(name: &str) -> PyResult { let lib_context = get_lib_context().into_py_result()?; - let existing_flow_ss = lib_context - .persistence_ctx - .as_ref() - .and_then(|ctx| { - ctx.all_setup_states - .read() - .unwrap() - .flows - .get(name) - .cloned() - }); + let existing_flow_ss = lib_context.persistence_ctx.as_ref().and_then(|ctx| { + ctx.all_setup_states + .read() + .unwrap() + .flows + .get(name) + .cloned() + }); let root_op_scope = OpScope::new( spec::ROOT_SCOPE_NAME.to_string(), None, diff --git a/src/execution/db_tracking_setup.rs b/src/execution/db_tracking_setup.rs index 82336199..9ef358aa 100644 --- a/src/execution/db_tracking_setup.rs +++ b/src/execution/db_tracking_setup.rs @@ -154,8 +154,6 @@ impl ResourceSetupStatus for TrackingTableSetupStatus { (None, None) => SetupChangeType::NoChange, } } - - } impl TrackingTableSetupStatus { diff --git a/src/execution/dumper.rs b/src/execution/dumper.rs index 0120d5eb..f7e1a710 100644 --- a/src/execution/dumper.rs +++ b/src/execution/dumper.rs @@ -1,6 +1,6 @@ use anyhow::Result; -use futures::future::try_join_all; use futures::StreamExt; +use futures::future::try_join_all; use indexmap::IndexMap; use itertools::Itertools; use serde::ser::SerializeSeq; diff --git a/src/execution/live_updater.rs b/src/execution/live_updater.rs index 5b66dd12..8c6ad67f 100644 --- a/src/execution/live_updater.rs +++ b/src/execution/live_updater.rs @@ -51,11 +51,7 @@ impl SharedAckFn { let ack_fn = { let mut v = v.lock().unwrap(); v.count -= 1; - if v.count > 0 { - None - } else { - v.ack_fn.take() - } + if v.count > 0 { None } else { v.ack_fn.take() } }; if let Some(ack_fn) = ack_fn { ack_fn().await?; @@ -108,9 +104,7 @@ async fn update_source( } else { trace!( "{}.{}: {}", - flow_ctx.flow.flow_instance.name, - import_op.name, - delta + flow_ctx.flow.flow_instance.name, import_op.name, delta ); } }; @@ -252,10 +246,16 @@ impl FlowLiveUpdater { while let Some(result) = self.tasks.join_next().await { match result { Err(e) if !e.is_cancelled() => { - error!("A background task in FlowLiveUpdater failed to join: {:?}", e); + error!( + "A background task in FlowLiveUpdater failed to join: {:?}", + e + ); } Ok(Err(e)) => { - error!("Error reported by a source update task during live update: {:?}", e); + error!( + "Error reported by a source update task during live update: {:?}", + e + ); } _ => {} } diff --git a/src/execution/memoization.rs b/src/execution/memoization.rs index 9d2444ce..b28c7ac7 100644 --- a/src/execution/memoization.rs +++ b/src/execution/memoization.rs @@ -1,4 +1,4 @@ -use anyhow::{bail, Result}; +use anyhow::{Result, bail}; use serde::{Deserialize, Serialize}; use std::{ borrow::Cow, diff --git a/src/execution/source_indexer.rs b/src/execution/source_indexer.rs index 2d8f0ac7..37a01241 100644 --- a/src/execution/source_indexer.rs +++ b/src/execution/source_indexer.rs @@ -2,7 +2,7 @@ use crate::prelude::*; use futures::future::Ready; use sqlx::PgPool; -use std::collections::{hash_map, HashMap}; +use std::collections::{HashMap, hash_map}; use tokio::{sync::Semaphore, task::JoinSet}; use super::{ diff --git a/src/lib_context.rs b/src/lib_context.rs index 05084934..c302f9be 100644 --- a/src/lib_context.rs +++ b/src/lib_context.rs @@ -110,7 +110,9 @@ impl LibContext { .ok_or_else(|| anyhow!("Database is required for this operation. Please set COCOINDEX_DATABASE_URL environment variable and call cocoindex.init() with database settings.")) } - pub fn require_all_setup_states(&self) -> Result<&RwLock>> { + pub fn require_all_setup_states( + &self, + ) -> Result<&RwLock>> { self.persistence_ctx .as_ref() .map(|ctx| &ctx.all_setup_states) @@ -149,7 +151,7 @@ pub fn create_lib_context(settings: settings::Settings) -> Result { // No database configured None }; - + Ok(LibContext { db_pools, persistence_ctx, diff --git a/src/llm/anthropic.rs b/src/llm/anthropic.rs index 1001f908..57d968aa 100644 --- a/src/llm/anthropic.rs +++ b/src/llm/anthropic.rs @@ -2,7 +2,7 @@ use crate::llm::{ LlmGenerateRequest, LlmGenerateResponse, LlmGenerationClient, LlmSpec, OutputFormat, ToJsonSchemaOptions, }; -use anyhow::{bail, Context, Result}; +use anyhow::{Context, Result, bail}; use async_trait::async_trait; use json5; use serde_json::Value; @@ -115,8 +115,12 @@ impl LlmGenerationClient for Client { Ok(value) => { println!("[Anthropic] Used permissive JSON5 parser for output"); serde_json::to_string(&value)? - }, - Err(e2) => return Err(anyhow::anyhow!(format!("No structured tool output or text found in response, and permissive JSON5 parsing also failed: {e}; {e2}"))) + } + Err(e2) => { + return Err(anyhow::anyhow!(format!( + "No structured tool output or text found in response, and permissive JSON5 parsing also failed: {e}; {e2}" + ))); + } } } } @@ -124,7 +128,7 @@ impl LlmGenerationClient for Client { _ => { return Err(anyhow::anyhow!( "No structured tool output or text found in response" - )) + )); } } }; diff --git a/src/llm/gemini.rs b/src/llm/gemini.rs index 11c34ebb..f48f839d 100644 --- a/src/llm/gemini.rs +++ b/src/llm/gemini.rs @@ -3,7 +3,7 @@ use crate::llm::{ LlmGenerateRequest, LlmGenerateResponse, LlmGenerationClient, LlmSpec, OutputFormat, ToJsonSchemaOptions, }; -use anyhow::{bail, Context, Result}; +use anyhow::{Context, Result, bail}; use async_trait::async_trait; use serde_json::Value; use urlencoding::encode; diff --git a/src/llm/openai.rs b/src/llm/openai.rs index 5675fc86..3dcc2d48 100644 --- a/src/llm/openai.rs +++ b/src/llm/openai.rs @@ -3,6 +3,7 @@ use crate::api_bail; use super::LlmGenerationClient; use anyhow::Result; use async_openai::{ + Client as OpenAIClient, config::OpenAIConfig, types::{ ChatCompletionRequestMessage, ChatCompletionRequestSystemMessage, @@ -10,7 +11,6 @@ use async_openai::{ ChatCompletionRequestUserMessageContent, CreateChatCompletionRequest, ResponseFormat, ResponseFormatJsonSchema, }, - Client as OpenAIClient, }; use async_trait::async_trait; diff --git a/src/ops/functions/extract_by_llm.rs b/src/ops/functions/extract_by_llm.rs index 060956c7..904bb234 100644 --- a/src/ops/functions/extract_by_llm.rs +++ b/src/ops/functions/extract_by_llm.rs @@ -1,7 +1,7 @@ use crate::prelude::*; use crate::llm::{ - new_llm_generation_client, LlmGenerateRequest, LlmGenerationClient, LlmSpec, OutputFormat, + LlmGenerateRequest, LlmGenerationClient, LlmSpec, OutputFormat, new_llm_generation_client, }; use crate::ops::sdk::*; use base::json_schema::build_json_schema; diff --git a/src/ops/sources/mod.rs b/src/ops/sources/mod.rs index 0e1341c7..557f44f7 100644 --- a/src/ops/sources/mod.rs +++ b/src/ops/sources/mod.rs @@ -1,3 +1,3 @@ +pub mod amazon_s3; pub mod google_drive; pub mod local_file; -pub mod amazon_s3; diff --git a/src/ops/targets/kuzu.rs b/src/ops/targets/kuzu.rs index fbe9ff3b..069f980e 100644 --- a/src/ops/targets/kuzu.rs +++ b/src/ops/targets/kuzu.rs @@ -194,8 +194,6 @@ impl setup::ResourceSetupStatus for GraphElementDataSetupStatus { fn change_type(&self) -> SetupChangeType { self.actions.change_type(false) } - - } fn append_drop_table( diff --git a/src/ops/targets/neo4j.rs b/src/ops/targets/neo4j.rs index a80f053e..90295ee2 100644 --- a/src/ops/targets/neo4j.rs +++ b/src/ops/targets/neo4j.rs @@ -853,8 +853,6 @@ impl ResourceSetupStatus for GraphElementDataSetupStatus { fn change_type(&self) -> SetupChangeType { self.change_type } - - } async fn clear_graph_element_data( diff --git a/src/ops/targets/postgres.rs b/src/ops/targets/postgres.rs index 6c19e0e1..747b60c0 100644 --- a/src/ops/targets/postgres.rs +++ b/src/ops/targets/postgres.rs @@ -525,8 +525,6 @@ impl setup::ResourceSetupStatus for SetupStatus { || !self.actions.indexes_to_delete.is_empty(); self.actions.table_action.change_type(has_other_update) } - - } impl SetupStatus { diff --git a/src/ops/targets/qdrant.rs b/src/ops/targets/qdrant.rs index 066c9c59..62e1a545 100644 --- a/src/ops/targets/qdrant.rs +++ b/src/ops/targets/qdrant.rs @@ -141,8 +141,6 @@ impl setup::ResourceSetupStatus for SetupStatus { (true, true) => setup::SetupChangeType::Update, } } - - } impl SetupStatus { diff --git a/src/py/convert.rs b/src/py/convert.rs index 4e94aec0..43afae95 100644 --- a/src/py/convert.rs +++ b/src/py/convert.rs @@ -335,12 +335,14 @@ mod tests { .expect("Failed to convert Rust value to Python object"); println!("Python object: {:?}", py_object); - let roundtripped_value = - value_from_py_object(value_type, &py_object) - .expect("Failed to convert Python object back to Rust value"); + let roundtripped_value = value_from_py_object(value_type, &py_object) + .expect("Failed to convert Python object back to Rust value"); println!("Roundtripped value: {:?}", roundtripped_value); - assert_eq!(original_value, &roundtripped_value, "Value mismatch after roundtrip"); + assert_eq!( + original_value, &roundtripped_value, + "Value mismatch after roundtrip" + ); }); } diff --git a/src/py/mod.rs b/src/py/mod.rs index 876d2da3..cf4eedc8 100644 --- a/src/py/mod.rs +++ b/src/py/mod.rs @@ -394,7 +394,11 @@ impl SetupStatus { fn sync_setup(py: Python<'_>) -> PyResult { let lib_context = get_lib_context().into_py_result()?; let flows = lib_context.flows.lock().unwrap(); - let all_setup_states = lib_context.require_all_setup_states().into_py_result()?.read().unwrap(); + let all_setup_states = lib_context + .require_all_setup_states() + .into_py_result()? + .read() + .unwrap(); py.allow_threads(|| { get_runtime() .block_on(async { @@ -408,7 +412,11 @@ fn sync_setup(py: Python<'_>) -> PyResult { #[pyfunction] fn drop_setup(py: Python<'_>, flow_names: Vec) -> PyResult { let lib_context = get_lib_context().into_py_result()?; - let all_setup_states = lib_context.require_all_setup_states().into_py_result()?.read().unwrap(); + let all_setup_states = lib_context + .require_all_setup_states() + .into_py_result()? + .read() + .unwrap(); py.allow_threads(|| { get_runtime() .block_on(async { @@ -422,7 +430,11 @@ fn drop_setup(py: Python<'_>, flow_names: Vec) -> PyResult #[pyfunction] fn flow_names_with_setup() -> PyResult> { let lib_context = get_lib_context().into_py_result()?; - let all_setup_states = lib_context.require_all_setup_states().into_py_result()?.read().unwrap(); + let all_setup_states = lib_context + .require_all_setup_states() + .into_py_result()? + .read() + .unwrap(); let flow_names = all_setup_states.flows.keys().cloned().collect(); Ok(flow_names) } diff --git a/src/settings.rs b/src/settings.rs index 79c3695c..c6368407 100644 --- a/src/settings.rs +++ b/src/settings.rs @@ -33,7 +33,7 @@ mod tests { }"#; let settings: Settings = serde_json::from_str(json).unwrap(); - + assert!(settings.database.is_some()); let db = settings.database.unwrap(); assert_eq!(db.url, "postgresql://localhost:5432/test"); @@ -49,7 +49,7 @@ mod tests { }"#; let settings: Settings = serde_json::from_str(json).unwrap(); - + assert!(settings.database.is_none()); assert_eq!(settings.app_namespace, "test_app"); } @@ -59,7 +59,7 @@ mod tests { let json = r#"{}"#; let settings: Settings = serde_json::from_str(json).unwrap(); - + assert!(settings.database.is_none()); assert_eq!(settings.app_namespace, ""); } @@ -73,7 +73,7 @@ mod tests { }"#; let settings: Settings = serde_json::from_str(json).unwrap(); - + assert!(settings.database.is_some()); let db = settings.database.unwrap(); assert_eq!(db.url, "postgresql://localhost:5432/test"); @@ -91,7 +91,7 @@ mod tests { }"#; let db_spec: DatabaseConnectionSpec = serde_json::from_str(json).unwrap(); - + assert_eq!(db_spec.url, "postgresql://localhost:5432/test"); assert_eq!(db_spec.user, Some("testuser".to_string())); assert_eq!(db_spec.password, Some("testpass".to_string())); diff --git a/src/setup/db_metadata.rs b/src/setup/db_metadata.rs index 063c54ca..17c1692e 100644 --- a/src/setup/db_metadata.rs +++ b/src/setup/db_metadata.rs @@ -348,8 +348,6 @@ impl ResourceSetupStatus for MetadataTableSetup { SetupChangeType::NoChange } } - - } impl MetadataTableSetup { diff --git a/src/utils/fingerprint.rs b/src/utils/fingerprint.rs index c6542872..19d844df 100644 --- a/src/utils/fingerprint.rs +++ b/src/utils/fingerprint.rs @@ -2,11 +2,11 @@ use anyhow::bail; use base64::prelude::*; use blake2::digest::typenum; use blake2::{Blake2b, Digest}; +use serde::Deserialize; use serde::ser::{ Serialize, SerializeMap, SerializeSeq, SerializeStruct, SerializeStructVariant, SerializeTuple, SerializeTupleStruct, SerializeTupleVariant, Serializer, }; -use serde::Deserialize; #[derive(Debug)] pub struct FingerprinterError {