Skip to content

Commit

Permalink
[ENH] Blockfile provider (chroma-core#1819)
Browse files Browse the repository at this point in the history
## Description of changes

This PR intermingles one substantial change with two smaller ones,
apologies.

*Summarize the changes made by this PR.*
 - Improvements & Bug fixes
- Adds sizing methods for Keys and Values and a Clone method that is
aware of the idiosyncrasies of Arrow
 - New functionality
- Adds a blockfile provider, which presents an open/create API for
managing blockfiles. All callers of blockfile:new() were migrated to
this way of managing blockfiles.

## Test plan
*How are these changes tested?*

- [x] Tests pass locally with `cargo test`

## Documentation Changes
None required.
  • Loading branch information
HammadB authored and atroyn committed Apr 3, 2024
1 parent 8bd0462 commit fbfae23
Show file tree
Hide file tree
Showing 8 changed files with 694 additions and 336 deletions.
376 changes: 193 additions & 183 deletions Cargo.lock

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion rust/worker/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -41,4 +41,4 @@ tantivy = "0.21.1"

[build-dependencies]
tonic-build = "0.10"
cc = "1.0"
cc = "1.0"
6 changes: 4 additions & 2 deletions rust/worker/src/blockstore/mod.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
mod positional_posting_list_value;
mod types;

pub use positional_posting_list_value::*;
pub use types::*;
pub(crate) mod provider;

pub(crate) use positional_posting_list_value::*;
pub(crate) use types::*;
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ impl PositionalPostingListBuilder {
self.positions.insert(doc_id, positions);
Ok(())
}

pub(crate) fn contains_doc_id(&self, doc_id: i32) -> bool {
self.doc_ids.contains(&doc_id)
}
Expand Down
98 changes: 98 additions & 0 deletions rust/worker/src/blockstore/provider.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
use super::types::Blockfile;
use super::types::{HashMapBlockfile, KeyType, ValueType};
use crate::errors::ChromaError;
use parking_lot::RwLock;
use std::collections::HashMap;
use std::sync::Arc;
use thiserror::Error;

// =================== Interfaces ===================

/// A trait for opening and creating blockfiles
/// # Methods
/// - new: Create a new instance of the blockfile provider. A blockfile provider returns a Box<dyn Blockfile> of a given type.
/// Currently, we support HashMap and Arrow-backed blockfiles.
/// - open: Open a blockfile at the given path, returning a Box<dyn Blockfile> and error if it does not exist
/// - create: Create a new blockfile at the given path, returning a Box<dyn Blockfile> and error if it already exists
/// # Example
/// ```ignore (TODO: This example is not runnable from outside the crate it seems. Fix this. Ignore for now.)
/// use crate::blockstore::provider::HashMapBlockfileProvider;
/// use crate::blockstore::types::{KeyType, ValueType};
/// let mut provider = HashMapBlockfileProvider::new();
/// let blockfile = provider.create("test", KeyType::String, ValueType::Int32Array);
/// ```
pub(crate) trait BlockfileProvider {
fn new() -> Self;
fn open(&self, path: &str) -> Result<Box<dyn Blockfile>, Box<OpenError>>;
fn create(
&mut self,
path: &str,
key_type: KeyType,
value_type: ValueType,
) -> Result<Box<dyn Blockfile>, Box<CreateError>>;
}

/// A BlockFileProvider that creates HashMapBlockfiles (in-memory blockfiles used for testing).
/// It bookkeeps the blockfiles locally.
/// # Note
/// This is not intended for production use.
pub(crate) struct HashMapBlockfileProvider {
files: Arc<RwLock<HashMap<String, Box<dyn Blockfile>>>>,
}

impl BlockfileProvider for HashMapBlockfileProvider {
fn new() -> Self {
Self {
files: Arc::new(RwLock::new(HashMap::new())),
}
}

fn open(&self, path: &str) -> Result<Box<dyn Blockfile>, Box<OpenError>> {
match self.files.read().get(path) {
Some(file) => Ok(file.clone()),
None => Err(Box::new(OpenError::NotFound)),
}
}

fn create(
&mut self,
path: &str,
key_type: KeyType,
value_type: ValueType,
) -> Result<Box<dyn Blockfile>, Box<CreateError>> {
let mut files = self.files.write();
match files.get(path) {
Some(_) => Err(Box::new(CreateError::AlreadyExists)),
None => {
let blockfile = Box::new(HashMapBlockfile::new());
files.insert(path.to_string(), blockfile);
Ok(files.get(path).unwrap().clone())
}
}
}
}

// =================== Errors ===================
#[derive(Error, Debug)]
pub(crate) enum OpenError {
#[error("Blockfile not found")]
NotFound,
}

impl ChromaError for OpenError {
fn code(&self) -> crate::errors::ErrorCodes {
crate::errors::ErrorCodes::NotFound
}
}

#[derive(Error, Debug)]
pub(crate) enum CreateError {
#[error("Blockfile already exists")]
AlreadyExists,
}

impl ChromaError for CreateError {
fn code(&self) -> crate::errors::ErrorCodes {
crate::errors::ErrorCodes::AlreadyExists
}
}
Loading

0 comments on commit fbfae23

Please sign in to comment.