Skip to content

Commit

Permalink
Schema now:
Browse files Browse the repository at this point in the history
- internally wrapped by an Arc, cloning aggressively is ok.
- a field in schema, redundant with metas
- read-only. Needs to be built via a SchemaBuilder.
  • Loading branch information
fulmicoton committed Aug 23, 2016
1 parent 054405a commit e6200e8
Show file tree
Hide file tree
Showing 13 changed files with 302 additions and 204 deletions.
46 changes: 28 additions & 18 deletions src/core/index.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
use Result;
use Error;
use std::path::{PathBuf, Path};
use schema::Schema;
use DocId;
Expand Down Expand Up @@ -44,13 +45,15 @@ impl fmt::Debug for Index {
pub struct Index {
metas: Arc<RwLock<IndexMeta>>,
directory: Box<Directory>,
schema: Schema,
}

impl Clone for Index {
fn clone(&self,) -> Index {
Index {
metas: self.metas.clone(),
directory: self.directory.box_clone()
directory: self.directory.box_clone(),
schema: self.schema.clone(),
}
}
}
Expand All @@ -59,6 +62,18 @@ lazy_static! {
static ref META_FILEPATH: PathBuf = PathBuf::from("meta.json");
}


fn load_metas(directory: &Directory) -> Result<IndexMeta> {
let meta_file = try!(directory.open_read(&META_FILEPATH));
let meta_content = String::from_utf8_lossy(meta_file.as_slice());
let loaded_meta = try!(
json::decode(&meta_content)
.map_err(|e| Error::CorruptedFile(META_FILEPATH.clone(), Box::new(e)))
);
Ok(loaded_meta)
}


impl Index {

pub fn create_in_ram(schema: Schema) -> Index {
Expand All @@ -78,10 +93,14 @@ impl Index {

pub fn open(directory_path: &Path) -> Result<Index> {
let directory = try!(MmapDirectory::open(directory_path));
let directory_ptr = Box::new(directory);
let mut index = Index::from_directory(directory_ptr, Schema::new());
try!(index.load_metas()); //< TODO does the directory already exists?
Ok(index)
let metas = try!(load_metas(&directory)); //< TODO does the directory already exists?
let schema = metas.schema.clone();
let locked_metas = Arc::new(RwLock::new(metas));
Ok(Index {
directory: Box::new(directory),
metas: locked_metas,
schema: schema,
})
}

pub fn docstamp(&self,) -> Result<u64> {
Expand Down Expand Up @@ -110,17 +129,16 @@ impl Index {

pub fn from_directory(directory: Box<Directory>, schema: Schema) -> Index {
Index {
metas: Arc::new(RwLock::new(IndexMeta::with_schema(schema))),
metas: Arc::new(RwLock::new(IndexMeta::with_schema(schema.clone()))),
directory: directory,
schema: schema,
}
}

pub fn schema(&self,) -> Schema {
self.metas.read().unwrap().schema.clone()
self.schema.clone()
}



/// Marks the segment as published.
// TODO find a rusty way to hide that, while keeping
// it visible for IndexWriters.
Expand Down Expand Up @@ -179,14 +197,6 @@ impl Index {
pub fn new_segment(&self,) -> Segment {
self.segment(SegmentId::new())
}

pub fn load_metas(&mut self,) -> Result<()> {
let meta_file = try!(self.directory.open_read(&META_FILEPATH));
let meta_content = String::from_utf8_lossy(meta_file.as_slice());
let loaded_meta: IndexMeta = json::decode(&meta_content).unwrap();
self.metas.write().unwrap().clone_from(&loaded_meta);
Ok(())
}

pub fn save_metas(&mut self,) -> Result<()> {
let mut w = Vec::new();
Expand Down
83 changes: 42 additions & 41 deletions src/fastfield/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,14 +26,25 @@ mod tests {
use std::path::Path;
use directory::{Directory, WritePtr, RAMDirectory};
use schema::Document;
use schema::Schema;
use schema::{Schema, SchemaBuilder};
use schema::FAST;
use test::Bencher;
use test;
use rand::Rng;
use rand::SeedableRng;
use rand::XorShiftRng;

lazy_static! {
static ref SCHEMA: Schema = {
let mut schema_builder = SchemaBuilder::new();
schema_builder.add_u32_field("field", FAST);
schema_builder.build()
};
static ref FIELD: Field = {
SCHEMA.get_field("field").unwrap()
};
}

#[test]
fn test_compute_num_bits() {
assert_eq!(compute_num_bits(1), 1u8);
Expand All @@ -55,15 +66,13 @@ mod tests {
fn test_intfastfield_small() {
let path = Path::new("test");
let mut directory: RAMDirectory = RAMDirectory::create();
let mut schema = Schema::new();
let field = schema.add_u32_field("field", FAST);
{
let write: WritePtr = directory.open_write(Path::new("test")).unwrap();
let mut serializer = FastFieldSerializer::new(write).unwrap();
let mut fast_field_writers = U32FastFieldsWriter::from_schema(&schema);
add_single_field_doc(&mut fast_field_writers, field, 13u32);
add_single_field_doc(&mut fast_field_writers, field, 14u32);
add_single_field_doc(&mut fast_field_writers, field, 2u32);
let mut fast_field_writers = U32FastFieldsWriter::from_schema(&SCHEMA);
add_single_field_doc(&mut fast_field_writers, *FIELD, 13u32);
add_single_field_doc(&mut fast_field_writers, *FIELD, 14u32);
add_single_field_doc(&mut fast_field_writers, *FIELD, 2u32);
fast_field_writers.serialize(&mut serializer).unwrap();
serializer.close().unwrap();
}
Expand All @@ -73,7 +82,7 @@ mod tests {
}
{
let fast_field_readers = U32FastFieldsReader::open(source).unwrap();
let fast_field_reader = fast_field_readers.get_field(field).unwrap();
let fast_field_reader = fast_field_readers.get_field(*FIELD).unwrap();
assert_eq!(fast_field_reader.get(0), 13u32);
assert_eq!(fast_field_reader.get(1), 14u32);
assert_eq!(fast_field_reader.get(2), 2u32);
Expand All @@ -84,21 +93,19 @@ mod tests {
fn test_intfastfield_large() {
let path = Path::new("test");
let mut directory: RAMDirectory = RAMDirectory::create();
let mut schema = Schema::new();
let field = schema.add_u32_field("field", FAST);
{
let write: WritePtr = directory.open_write(Path::new("test")).unwrap();
let mut serializer = FastFieldSerializer::new(write).unwrap();
let mut fast_field_writers = U32FastFieldsWriter::from_schema(&schema);
add_single_field_doc(&mut fast_field_writers, field, 4u32);
add_single_field_doc(&mut fast_field_writers, field, 14_082_001u32);
add_single_field_doc(&mut fast_field_writers, field, 3_052u32);
add_single_field_doc(&mut fast_field_writers, field, 9002u32);
add_single_field_doc(&mut fast_field_writers, field, 15_001u32);
add_single_field_doc(&mut fast_field_writers, field, 777u32);
add_single_field_doc(&mut fast_field_writers, field, 1_002u32);
add_single_field_doc(&mut fast_field_writers, field, 1_501u32);
add_single_field_doc(&mut fast_field_writers, field, 215u32);
let mut fast_field_writers = U32FastFieldsWriter::from_schema(&SCHEMA);
add_single_field_doc(&mut fast_field_writers, *FIELD, 4u32);
add_single_field_doc(&mut fast_field_writers, *FIELD, 14_082_001u32);
add_single_field_doc(&mut fast_field_writers, *FIELD, 3_052u32);
add_single_field_doc(&mut fast_field_writers, *FIELD, 9002u32);
add_single_field_doc(&mut fast_field_writers, *FIELD, 15_001u32);
add_single_field_doc(&mut fast_field_writers, *FIELD, 777u32);
add_single_field_doc(&mut fast_field_writers, *FIELD, 1_002u32);
add_single_field_doc(&mut fast_field_writers, *FIELD, 1_501u32);
add_single_field_doc(&mut fast_field_writers, *FIELD, 215u32);
fast_field_writers.serialize(&mut serializer).unwrap();
serializer.close().unwrap();
}
Expand All @@ -108,7 +115,7 @@ mod tests {
}
{
let fast_field_readers = U32FastFieldsReader::open(source).unwrap();
let fast_field_reader = fast_field_readers.get_field(field).unwrap();
let fast_field_reader = fast_field_readers.get_field(*FIELD).unwrap();
assert_eq!(fast_field_reader.get(0), 4u32);
assert_eq!(fast_field_reader.get(1), 14_082_001u32);
assert_eq!(fast_field_reader.get(2), 3_052u32);
Expand All @@ -125,14 +132,14 @@ mod tests {
fn test_intfastfield_null_amplitude() {
let path = Path::new("test");
let mut directory: RAMDirectory = RAMDirectory::create();
let mut schema = Schema::new();
let field = schema.add_u32_field("field", FAST);


{
let write: WritePtr = directory.open_write(Path::new("test")).unwrap();
let mut serializer = FastFieldSerializer::new(write).unwrap();
let mut fast_field_writers = U32FastFieldsWriter::from_schema(&schema);
let mut fast_field_writers = U32FastFieldsWriter::from_schema(&SCHEMA);
for _ in 0..10_000 {
add_single_field_doc(&mut fast_field_writers, field, 100_000u32);
add_single_field_doc(&mut fast_field_writers, *FIELD, 100_000u32);
}
fast_field_writers.serialize(&mut serializer).unwrap();
serializer.close().unwrap();
Expand All @@ -143,7 +150,7 @@ mod tests {
}
{
let fast_field_readers = U32FastFieldsReader::open(source).unwrap();
let fast_field_reader = fast_field_readers.get_field(field).unwrap();
let fast_field_reader = fast_field_readers.get_field(*FIELD).unwrap();
for doc in 0..10_000 {
assert_eq!(fast_field_reader.get(doc), 100_000u32);
}
Expand All @@ -164,22 +171,20 @@ mod tests {
let permutation = generate_permutation();
let n = permutation.len();
let mut directory = RAMDirectory::create();
let mut schema = Schema::new();
let field = schema.add_u32_field("field", FAST);
{
let write: WritePtr = directory.open_write(Path::new("test")).unwrap();
let mut serializer = FastFieldSerializer::new(write).unwrap();
let mut fast_field_writers = U32FastFieldsWriter::from_schema(&schema);
let mut fast_field_writers = U32FastFieldsWriter::from_schema(&SCHEMA);
for x in &permutation {
add_single_field_doc(&mut fast_field_writers, field, *x);
add_single_field_doc(&mut fast_field_writers, *FIELD, *x);
}
fast_field_writers.serialize(&mut serializer).unwrap();
serializer.close().unwrap();
}
let source = directory.open_read(&path).unwrap();
{
let fast_field_readers = U32FastFieldsReader::open(source).unwrap();
let fast_field_reader = fast_field_readers.get_field(field).unwrap();
let fast_field_reader = fast_field_readers.get_field(*FIELD).unwrap();
let mut a = 0u32;
for _ in 0..n {
assert_eq!(fast_field_reader.get(a as u32), permutation[a as usize]);
Expand Down Expand Up @@ -219,22 +224,20 @@ mod tests {
let path = Path::new("test");
let permutation = generate_permutation();
let mut directory: RAMDirectory = RAMDirectory::create();
let mut schema = Schema::new();
let field = schema.add_u32_field("field", FAST);
{
let write: WritePtr = directory.open_write(Path::new("test")).unwrap();
let mut serializer = FastFieldSerializer::new(write).unwrap();
let mut fast_field_writers = U32FastFieldsWriter::from_schema(&schema);
let mut fast_field_writers = U32FastFieldsWriter::from_schema(&SCHEMA);
for x in &permutation {
add_single_field_doc(&mut fast_field_writers, field, *x);
add_single_field_doc(&mut fast_field_writers, *FIELD, *x);
}
fast_field_writers.serialize(&mut serializer).unwrap();
serializer.close().unwrap();
}
let source = directory.open_read(&path).unwrap();
{
let fast_field_readers = U32FastFieldsReader::open(source).unwrap();
let fast_field_reader = fast_field_readers.get_field(field).unwrap();
let fast_field_reader = fast_field_readers.get_field(*FIELD).unwrap();
b.iter(|| {
let n = test::black_box(7000u32);
let mut a = 0u32;
Expand All @@ -251,22 +254,20 @@ mod tests {
let path = Path::new("test");
let permutation = generate_permutation();
let mut directory: RAMDirectory = RAMDirectory::create();
let mut schema = Schema::new();
let field = schema.add_u32_field("field", FAST);
{
let write: WritePtr = directory.open_write(Path::new("test")).unwrap();
let mut serializer = FastFieldSerializer::new(write).unwrap();
let mut fast_field_writers = U32FastFieldsWriter::from_schema(&schema);
let mut fast_field_writers = U32FastFieldsWriter::from_schema(&SCHEMA);
for x in &permutation {
add_single_field_doc(&mut fast_field_writers, field, *x);
add_single_field_doc(&mut fast_field_writers, *FIELD, *x);
}
fast_field_writers.serialize(&mut serializer).unwrap();
serializer.close().unwrap();
}
let source = directory.open_read(&path).unwrap();
{
let fast_field_readers = U32FastFieldsReader::open(source).unwrap();
let fast_field_reader = fast_field_readers.get_field(field).unwrap();
let fast_field_reader = fast_field_readers.get_field(*FIELD).unwrap();
b.iter(|| {
let n = test::black_box(1000u32);
let mut a = 0u32;
Expand Down
8 changes: 4 additions & 4 deletions src/indexer/merger.rs
Original file line number Diff line number Diff line change
Expand Up @@ -284,12 +284,12 @@ mod tests {

#[test]
fn test_index_merger() {
let mut schema = schema::Schema::new();
let mut schema_builder = schema::SchemaBuilder::new();
let text_fieldtype = schema::TextOptions::new().set_indexing_options(TextIndexingOptions::TokenizedWithFreq).set_stored();
let text_field = schema.add_text_field("text", text_fieldtype);
let text_field = schema_builder.add_text_field("text", text_fieldtype);
let score_fieldtype = schema::U32Options::new().set_fast();
let score_field = schema.add_u32_field("score", score_fieldtype);
let index = Index::create_in_ram(schema);
let score_field = schema_builder.add_u32_field("score", score_fieldtype);
let index = Index::create_in_ram(schema_builder.build());

{
{
Expand Down
6 changes: 3 additions & 3 deletions src/indexer/writer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -265,9 +265,9 @@ mod tests {

#[test]
fn test_commit_and_rollback() {
let mut schema = schema::Schema::new();
let text_field = schema.add_text_field("text", schema::TEXT);
let index = Index::create_in_ram(schema);
let mut schema_builder = schema::SchemaBuilder::new();
let text_field = schema_builder.add_text_field("text", schema::TEXT);
let index = Index::create_in_ram(schema_builder.build());


let num_docs_containing = |s: &str| {
Expand Down

0 comments on commit e6200e8

Please sign in to comment.