Skip to content

Commit

Permalink
issue/50 Switched to BooleanQueryScorer
Browse files Browse the repository at this point in the history
  • Loading branch information
fulmicoton committed Oct 31, 2016
1 parent 7421e0a commit 0f36c04
Show file tree
Hide file tree
Showing 9 changed files with 104 additions and 99 deletions.
19 changes: 14 additions & 5 deletions src/query/boolean_query/boolean_scorer.rs
Expand Up @@ -82,13 +82,20 @@ pub struct BooleanScorer<TScorer: Scorer> {

impl<TScorer: Scorer> BooleanScorer<TScorer> {

fn new(postings: Vec<TScorer>, filter: OccurFilter) -> BooleanScorer<TScorer> {
pub fn new(postings: Vec<TScorer>, filter: OccurFilter) -> BooleanScorer<TScorer> {
let num_postings = postings.len();
let query_coords: Vec<Score> = (0..num_postings + 1)
.map(|i| (i as Score) / (num_postings as Score))
.collect();
let score_combiner = ScoreCombiner::from(query_coords);
let heap_items: Vec<HeapItem> = postings
let mut non_empty_postings: Vec<TScorer> = Vec::new();
for mut posting in postings {
let non_empty = posting.advance();
if non_empty {
non_empty_postings.push(posting);
}
}
let heap_items: Vec<HeapItem> = non_empty_postings
.iter()
.map(|posting| posting.doc())
.enumerate()
Expand All @@ -100,7 +107,7 @@ impl<TScorer: Scorer> BooleanScorer<TScorer> {
})
.collect();
BooleanScorer {
postings: postings,
postings: non_empty_postings,
queue: BinaryHeap::from(heap_items),
doc: 0u32,
score_combiner: score_combiner,
Expand Down Expand Up @@ -135,13 +142,15 @@ impl<TScorer: Scorer> BooleanScorer<TScorer> {

impl<TScorer: Scorer> DocSet for BooleanScorer<TScorer> {
fn advance(&mut self,) -> bool {
println!("advance");
loop {
self.score_combiner.clear();
let mut ord_bitset = 0u64;
match self.queue.peek() {
Some(heap_item) => {
let ord = heap_item.ord as usize;
self.doc = heap_item.doc;
println!("dopc {}", self.doc);
let score = self.postings[ord].score();
self.score_combiner.update(score);
ord_bitset |= 1 << ord;
Expand Down Expand Up @@ -170,14 +179,14 @@ impl<TScorer: Scorer> DocSet for BooleanScorer<TScorer> {
}

fn doc(&self,) -> DocId {
panic!("a");
self.doc
}
}

impl<TScorer: Scorer> Scorer for BooleanScorer<TScorer> {

fn score(&self,) -> f32 {
panic!("");
self.score_combiner.score()
}
}

6 changes: 3 additions & 3 deletions src/query/boolean_query/boolean_weight.rs
Expand Up @@ -6,15 +6,15 @@ use Result;

pub struct BooleanWeight {
weights: Vec<Box<Weight>>,
filter: OccurFilter,
occur_filter: OccurFilter,
}

impl BooleanWeight {
pub fn new(weights: Vec<Box<Weight>>,
filter: OccurFilter) -> BooleanWeight {
occur_filter: OccurFilter) -> BooleanWeight {
BooleanWeight {
weights: weights,
filter: filter,
occur_filter: occur_filter,
}
}
}
Expand Down
1 change: 1 addition & 0 deletions src/query/boolean_query/mod.rs
Expand Up @@ -5,3 +5,4 @@ mod boolean_weight;

pub use self::boolean_query::BooleanQuery;
pub use self::boolean_clause::BooleanClause;
pub use self::boolean_scorer::BooleanScorer;
116 changes: 44 additions & 72 deletions src/query/multi_term_query.rs
@@ -1,52 +1,43 @@
use Result;
use super::Weight;
use std::any::Any;
use Error;
use schema::Term;
use query::Query;
use core::searcher::Searcher;
use core::SegmentReader;
use query::TfIdf;
use query::Scorer;
use query::occur::Occur;
use postings::SegmentPostingsOption;
use query::DAATMultiTermScorer;

use query::occur_filter::OccurFilter;
use query::term_query::{TermQuery, TermWeight, TermScorer};
use query::boolean_query::BooleanScorer;


struct MultiTermWeight {
query: MultiTermQuery,
similitude: TfIdf,
weights: Vec<TermWeight>,
occur_filter: OccurFilter,
}


impl Weight for MultiTermWeight {

fn scorer<'a>(&'a self, reader: &'a SegmentReader) -> Result<Box<Scorer + 'a>> {


let mut postings_and_fieldnorms = Vec::with_capacity(self.query.num_terms());
{
for &(occur, ref term) in &self.query.occur_terms {
if let Some(postings) = reader.read_postings(term, SegmentPostingsOption::Freq) {
let field = term.field();
let fieldnorm_reader = try!(reader.get_fieldnorms_reader(field));
postings_and_fieldnorms.push((occur, postings, fieldnorm_reader));
}
let mut term_scorers: Vec<TermScorer<'a>> = Vec::new();
for term_weight in &self.weights {
let term_scorer_option = try!(term_weight.specialized_scorer(reader));
if let Some(term_scorer) = term_scorer_option {
term_scorers.push(term_scorer);
}
}
if postings_and_fieldnorms.len() > 64 {
// TODO putting the SHOULD at the end of the list should push the limit.
return Err(Error::InvalidArgument(String::from("Limit of 64 terms was exceeded.")));
}
Ok(box DAATMultiTermScorer::new(postings_and_fieldnorms, self.similitude.clone()))
Ok(box BooleanScorer::new(term_scorers, self.occur_filter.clone()))
}
}

/// Query involving one or more terms.

#[derive(Eq, Clone, PartialEq, Debug)]
pub struct MultiTermQuery {
occur_terms: Vec<(Occur, Term)>,
pub struct MultiTermQuery {
// TODO need a better Debug
occur_terms: Vec<(Occur, Term)>
}

impl MultiTermQuery {
Expand All @@ -55,73 +46,54 @@ impl MultiTermQuery {
pub fn num_terms(&self,) -> usize {
self.occur_terms.len()
}

}



impl Query for MultiTermQuery {

fn as_any(&self) -> &Any {
self
}

/// Builds the similitude object
fn similitude(&self, searcher: &Searcher) -> TfIdf {
let num_terms = self.num_terms();
let num_docs = searcher.num_docs() as f32;
let idfs: Vec<f32> = self.occur_terms
fn weight(&self, searcher: &Searcher) -> Result<Box<Weight>> {
let term_queries: Vec<TermQuery> = self.occur_terms
.iter()
.map(|&(_, ref term)| searcher.doc_freq(term))
.map(|doc_freq| {
if doc_freq == 0 {
1.
}
else {
1. + ( num_docs / (doc_freq as f32) ).ln()
}
})
.map(|&(_, ref term)| TermQuery::from(term.clone()))
.collect();
let query_coords = (0..num_terms + 1)
.map(|i| (i as f32) / (num_terms as f32))
.collect();
// TODO have the actual terms in these names
let term_names = self.occur_terms
let occurs: Vec<Occur> = self.occur_terms
.iter()
.map(|&(_, ref term)| format!("{:?}", &term))
.map(|&(occur, _) | occur.clone())
.collect();
let occur_filter = OccurFilter::new(&occurs);
let weights = term_queries.iter()
.map(|term_query| term_query.specialized_weight(searcher))
.collect();
let mut tfidf = TfIdf::new(query_coords, idfs);
tfidf.set_term_names(term_names);
tfidf
Ok(
Box::new(MultiTermWeight {
weights: weights,
occur_filter: occur_filter,
})
)
}
}


impl From<Vec<(Occur, Term)>> for MultiTermQuery {
fn from(occur_terms: Vec<(Occur, Term)>) -> MultiTermQuery {
MultiTermQuery {
occur_terms: occur_terms,
occur_terms: occur_terms
}
}
}

impl From<Vec<Term>> for MultiTermQuery {
fn from(terms: Vec<Term>) -> MultiTermQuery {
let should_terms = terms
let should_terms: Vec<(Occur, Term)> = terms
.into_iter()
.map(|term| (Occur::Should, term))
.collect();
MultiTermQuery {
occur_terms: should_terms,
}
}
}

impl Query for MultiTermQuery {

fn as_any(&self) -> &Any {
self
MultiTermQuery::from(should_terms)
}

fn weight(&self, searcher: &Searcher) -> Result<Box<Weight>> {
let similitude = self.similitude(searcher);
Ok(
Box::new(MultiTermWeight {
query: self.clone(),
similitude: similitude
})
)
}

}

}
1 change: 1 addition & 0 deletions src/query/occur_filter.rs
@@ -1,5 +1,6 @@
use query::Occur;

#[derive(Clone)]
pub struct OccurFilter {
and_mask: u64,
result: u64,
Expand Down
1 change: 0 additions & 1 deletion src/query/query.rs
Expand Up @@ -82,5 +82,4 @@ pub trait Query: fmt::Debug {
}
Ok(timer_tree)
}

}
4 changes: 3 additions & 1 deletion src/query/term_query/mod.rs
Expand Up @@ -2,4 +2,6 @@ mod term_query;
mod term_weight;
mod term_scorer;

pub use self::term_query::TermQuery;
pub use self::term_query::TermQuery;
pub use self::term_weight::TermWeight;
pub use self::term_scorer::TermScorer;
17 changes: 12 additions & 5 deletions src/query/term_query/term_query.rs
Expand Up @@ -11,6 +11,16 @@ pub struct TermQuery {
term: Term,
}

impl TermQuery {
pub fn specialized_weight(&self, searcher: &Searcher) -> TermWeight {
let doc_freq = searcher.doc_freq(&self.term);
TermWeight {
doc_freq: doc_freq,
term: self.term.clone()
}
}
}

impl From<Term> for TermQuery {
fn from(term: Term) -> TermQuery {
TermQuery {
Expand All @@ -25,10 +35,7 @@ impl Query for TermQuery {
}

fn weight(&self, searcher: &Searcher) -> Result<Box<Weight>> {
let doc_freq = searcher.doc_freq(&self.term);
Ok(box TermWeight {
doc_freq: doc_freq,
term: self.term.clone()
})
Ok(box self.specialized_weight(searcher))
}

}
38 changes: 26 additions & 12 deletions src/query/term_query/term_weight.rs
@@ -1,5 +1,4 @@
use Term;
use Score;
use query::Weight;
use core::SegmentReader;
use query::Scorer;
Expand All @@ -17,19 +16,34 @@ pub struct TermWeight {
impl Weight for TermWeight {

fn scorer<'a>(&'a self, reader: &'a SegmentReader) -> Result<Box<Scorer + 'a>> {
let specialized_scorer_option = try!(self.specialized_scorer(reader));
match specialized_scorer_option {
Some(term_scorer) => {
Ok(box term_scorer)
}
None => {
Ok(box EmptyScorer)
}
}
}

}

impl TermWeight {

pub fn specialized_scorer<'a>(&'a self, reader: &'a SegmentReader) -> Result<Option<TermScorer<'a>>> {
let field = self.term.field();
let fieldnorm_reader = try!(reader.get_fieldnorms_reader(field));
if let Some(segment_postings) = reader.read_postings(&self.term, SegmentPostingsOption::Freq) {
let scorer: TermScorer = TermScorer {
idf: 1f32 / (self.doc_freq as f32),
fieldnorm_reader: fieldnorm_reader,
segment_postings: segment_postings,
};
Ok(box scorer)
}
else {
Ok(box EmptyScorer)
}
Ok(
reader.read_postings(&self.term, SegmentPostingsOption::Freq)
.map(|segment_postings|
TermScorer {
idf: 1f32 / (self.doc_freq as f32),
fieldnorm_reader: fieldnorm_reader,
segment_postings: segment_postings,
}
)
)
}

}

0 comments on commit 0f36c04

Please sign in to comment.