From b8b4b8eff29ada478c0ae029c0d6673741674a70 Mon Sep 17 00:00:00 2001 From: Marvin Humphrey Date: Thu, 10 Sep 2015 18:37:38 -0700 Subject: [PATCH 01/13] Tune and Test SeriesMatcher Go binding. --- go/build.go | 4 ++++ go/lucy/search.go | 12 ++++++++++++ go/lucy/search_test.go | 8 ++++++++ 3 files changed, 24 insertions(+) diff --git a/go/build.go b/go/build.go index 7596f7b7c..10ec5cf90 100644 --- a/go/build.go +++ b/go/build.go @@ -180,6 +180,10 @@ func specClasses(parcel *cfc.Parcel) { orScorerBinding.SetSuppressCtor(true) orScorerBinding.Register() + seriesMatcherBinding := cfc.NewGoClass(parcel, "Lucy::Search::SeriesMatcher") + seriesMatcherBinding.SetSuppressCtor(true) + seriesMatcherBinding.Register() + bitVecBinding := cfc.NewGoClass(parcel, "Lucy::Object::BitVector") bitVecBinding.SpecMethod("To_Array", "ToArray() []bool") bitVecBinding.Register() diff --git a/go/lucy/search.go b/go/lucy/search.go index 2785d5178..18ac6bc13 100644 --- a/go/lucy/search.go +++ b/go/lucy/search.go @@ -26,6 +26,7 @@ package lucy #include "Lucy/Search/ORQuery.h" #include "Lucy/Search/ANDMatcher.h" #include "Lucy/Search/ORMatcher.h" +#include "Lucy/Search/SeriesMatcher.h" #include "Lucy/Document/HitDoc.h" #include "LucyX/Search/MockMatcher.h" #include "Clownfish/Blob.h" @@ -226,6 +227,17 @@ func NewORScorer(children []Matcher, sim Similarity) ORScorer { return WRAPORScorer(unsafe.Pointer(cfObj)) } +func NewSeriesMatcher(matchers []Matcher, offsets []int32) SeriesMatcher { + vec := clownfish.NewVector(len(matchers)) + for _, child := range matchers { + vec.Push(child) + } + i32arr := NewI32Array(offsets) + cfObj := C.lucy_SeriesMatcher_new(((*C.cfish_Vector)(clownfish.Unwrap(vec, "matchers"))), + ((*C.lucy_I32Array)(clownfish.Unwrap(i32arr, "offsets")))) + return WRAPSeriesMatcher(unsafe.Pointer(cfObj)) +} + func newMockMatcher(docIDs []int32, scores []float32) MockMatcher { docIDsconv := NewI32Array(docIDs) docIDsCF := (*C.lucy_I32Array)(unsafe.Pointer(docIDsconv.TOPTR())) diff --git a/go/lucy/search_test.go b/go/lucy/search_test.go index 492ab1f13..83fdc719c 100644 --- a/go/lucy/search_test.go +++ b/go/lucy/search_test.go @@ -319,3 +319,11 @@ func TestNOTMatcherBasics(t *testing.T) { matcher := NewANDMatcher([]Matcher{a, notB}, nil) checkMatcher(t, matcher, false) } + +func TestSeriesMatcherBasics(t *testing.T) { + a := newMockMatcher([]int32{42}, nil) + b := newMockMatcher([]int32{1, 4}, nil) + c := newMockMatcher([]int32{20}, nil) + matcher := NewSeriesMatcher([]Matcher{a, b, c}, []int32{0, 42, 80}) + checkMatcher(t, matcher, false) +} From d0388c8c705dba826ab828c35d0690498da30dd1 Mon Sep 17 00:00:00 2001 From: Marvin Humphrey Date: Thu, 10 Sep 2015 19:37:45 -0700 Subject: [PATCH 02/13] Tune and test TopDocs Go binding. Use slice of MatchDocs rather than Vector. --- go/build.go | 6 ++++++ go/lucy/search.go | 33 +++++++++++++++++++++++++++++++++ go/lucy/search_test.go | 27 +++++++++++++++++++++++++++ 3 files changed, 66 insertions(+) diff --git a/go/build.go b/go/build.go index 10ec5cf90..f18252e0b 100644 --- a/go/build.go +++ b/go/build.go @@ -191,6 +191,12 @@ func specClasses(parcel *cfc.Parcel) { mockMatcherBinding := cfc.NewGoClass(parcel, "LucyX::Search::MockMatcher") mockMatcherBinding.SetSuppressCtor(true) mockMatcherBinding.Register() + + topDocsBinding := cfc.NewGoClass(parcel, "Lucy::Search::TopDocs") + topDocsBinding.SetSuppressCtor(true) + topDocsBinding.SpecMethod("Set_Match_Docs", "SetMatchDocs([]MatchDoc)") + topDocsBinding.SpecMethod("Get_Match_Docs", "GetMatchDocs() []MatchDoc") + topDocsBinding.Register() } func build() { diff --git a/go/lucy/search.go b/go/lucy/search.go index 18ac6bc13..486507264 100644 --- a/go/lucy/search.go +++ b/go/lucy/search.go @@ -27,11 +27,13 @@ package lucy #include "Lucy/Search/ANDMatcher.h" #include "Lucy/Search/ORMatcher.h" #include "Lucy/Search/SeriesMatcher.h" +#include "Lucy/Search/TopDocs.h" #include "Lucy/Document/HitDoc.h" #include "LucyX/Search/MockMatcher.h" #include "Clownfish/Blob.h" #include "Clownfish/Hash.h" #include "Clownfish/HashIterator.h" +#include "Clownfish/Vector.h" static inline void float32_set(float *floats, size_t i, float value) { @@ -175,6 +177,37 @@ func (obj *HitsIMP) Error() error { return obj.err } +func NewTopDocs(matchDocs []MatchDoc, totalHits uint32) TopDocs { + vec := clownfish.NewVector(len(matchDocs)) + for _, matchDoc := range matchDocs { + vec.Push(matchDoc) + } + cfObj := C.lucy_TopDocs_new(((*C.cfish_Vector)(clownfish.Unwrap(vec, "matchDocs"))), + C.uint32_t(totalHits)) + return WRAPTopDocs(unsafe.Pointer(cfObj)) +} + +func (td *TopDocsIMP) SetMatchDocs(matchDocs []MatchDoc) { + self := (*C.lucy_TopDocs)(clownfish.Unwrap(td, "td")) + vec := clownfish.NewVector(len(matchDocs)) + for _, matchDoc := range matchDocs { + vec.Push(matchDoc) + } + C.LUCY_TopDocs_Set_Match_Docs(self, (*C.cfish_Vector)(clownfish.Unwrap(vec, "matchDocs"))) +} + +func (td *TopDocsIMP) GetMatchDocs() []MatchDoc { + self := (*C.lucy_TopDocs)(clownfish.Unwrap(td, "td")) + vec := C.LUCY_TopDocs_Get_Match_Docs(self) + length := int(C.CFISH_Vec_Get_Size(vec)) + slice := make([]MatchDoc, length) + for i := 0; i < length; i++ { + elem := C.cfish_incref(unsafe.Pointer(C.CFISH_Vec_Fetch(vec, C.size_t(i)))) + slice[i] = WRAPMatchDoc(unsafe.Pointer(elem)) + } + return slice +} + func NewANDQuery(children []Query) ANDQuery { vec := clownfish.NewVector(len(children)) for _, child := range children { diff --git a/go/lucy/search_test.go b/go/lucy/search_test.go index 83fdc719c..4225799dd 100644 --- a/go/lucy/search_test.go +++ b/go/lucy/search_test.go @@ -327,3 +327,30 @@ func TestSeriesMatcherBasics(t *testing.T) { matcher := NewSeriesMatcher([]Matcher{a, b, c}, []int32{0, 42, 80}) checkMatcher(t, matcher, false) } + +func TestTopDocsBasics(t *testing.T) { + matchDocs := []MatchDoc{ + NewMatchDoc(42, 2.0, nil), + NewMatchDoc(100, 3.0, nil), + } + td := NewTopDocs(matchDocs, 50) + td.SetTotalHits(20) + if totalHits := td.GetTotalHits(); totalHits != 20 { + t.Errorf("Expected 20 total hits, got %d", totalHits) + } + td.SetMatchDocs(matchDocs) + fetched := td.GetMatchDocs() + if docID := fetched[0].GetDocID(); docID != 42 { + t.Errorf("Set/Get MatchDocs expected 42, got %d", docID) + } + + folder := NewRAMFolder("") + outstream := folder.OpenOut("foo") + td.Serialize(outstream) + outstream.Close() + inStream := folder.OpenIn("foo") + dupe := clownfish.GetClass(td).MakeObj().(TopDocs).Deserialize(inStream) + if dupe.GetTotalHits() != td.GetTotalHits() { + t.Errorf("Failed round-trip serializetion of TopDocs") + } +} From 295662d3570b0aa47ddebe69c12322e72d7cd107 Mon Sep 17 00:00:00 2001 From: Marvin Humphrey Date: Thu, 10 Sep 2015 20:29:09 -0700 Subject: [PATCH 03/13] Custom Go bindings for SortRule, SortSpec. Custom constructors. Have SortSpec handle `rules` as a slice of SortRules rather than a Vector. --- go/build.go | 5 +++++ go/lucy/search.go | 39 +++++++++++++++++++++++++++++++++++++++ go/lucy/search_test.go | 42 ++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 86 insertions(+) diff --git a/go/build.go b/go/build.go index f18252e0b..11615aa7a 100644 --- a/go/build.go +++ b/go/build.go @@ -197,6 +197,11 @@ func specClasses(parcel *cfc.Parcel) { topDocsBinding.SpecMethod("Set_Match_Docs", "SetMatchDocs([]MatchDoc)") topDocsBinding.SpecMethod("Get_Match_Docs", "GetMatchDocs() []MatchDoc") topDocsBinding.Register() + + sortSpecBinding := cfc.NewGoClass(parcel, "Lucy::Search::SortSpec") + sortSpecBinding.SetSuppressCtor(true) + sortSpecBinding.SpecMethod("Get_Rules", "GetRules() []SortRule") + sortSpecBinding.Register() } func build() { diff --git a/go/lucy/search.go b/go/lucy/search.go index 486507264..27232656f 100644 --- a/go/lucy/search.go +++ b/go/lucy/search.go @@ -27,6 +27,8 @@ package lucy #include "Lucy/Search/ANDMatcher.h" #include "Lucy/Search/ORMatcher.h" #include "Lucy/Search/SeriesMatcher.h" +#include "Lucy/Search/SortRule.h" +#include "Lucy/Search/SortSpec.h" #include "Lucy/Search/TopDocs.h" #include "Lucy/Document/HitDoc.h" #include "LucyX/Search/MockMatcher.h" @@ -177,6 +179,43 @@ func (obj *HitsIMP) Error() error { return obj.err } +func NewFieldSortRule(field string, reverse bool) SortRule { + fieldC := clownfish.GoToClownfish(field, unsafe.Pointer(C.CFISH_STRING), false) + cfObj := C.lucy_SortRule_new(C.lucy_SortRule_FIELD, (*C.cfish_String)(fieldC), C.bool(reverse)) + return WRAPSortRule(unsafe.Pointer(cfObj)) +} + +func NewDocIDSortRule(reverse bool) SortRule { + cfObj := C.lucy_SortRule_new(C.lucy_SortRule_DOC_ID, nil, C.bool(reverse)) + return WRAPSortRule(unsafe.Pointer(cfObj)) +} + +func NewScoreSortRule(reverse bool) SortRule { + cfObj := C.lucy_SortRule_new(C.lucy_SortRule_SCORE, nil, C.bool(reverse)) + return WRAPSortRule(unsafe.Pointer(cfObj)) +} + +func NewSortSpec(rules []SortRule) SortSpec { + vec := clownfish.NewVector(len(rules)) + for _, rule := range rules { + vec.Push(rule) + } + cfObj := C.lucy_SortSpec_new((*C.cfish_Vector)(clownfish.Unwrap(vec, "rules"))) + return WRAPSortSpec(unsafe.Pointer(cfObj)) +} + +func (spec *SortSpecIMP) GetRules() []SortRule { + self := (*C.lucy_SortSpec)(clownfish.Unwrap(spec, "spec")) + vec := C.LUCY_SortSpec_Get_Rules(self) + length := int(C.CFISH_Vec_Get_Size(vec)) + slice := make([]SortRule, length) + for i := 0; i < length; i++ { + elem := C.cfish_incref(unsafe.Pointer(C.CFISH_Vec_Fetch(vec, C.size_t(i)))) + slice[i] = WRAPSortRule(unsafe.Pointer(elem)) + } + return slice +} + func NewTopDocs(matchDocs []MatchDoc, totalHits uint32) TopDocs { vec := clownfish.NewVector(len(matchDocs)) for _, matchDoc := range matchDocs { diff --git a/go/lucy/search_test.go b/go/lucy/search_test.go index 4225799dd..6e40670e1 100644 --- a/go/lucy/search_test.go +++ b/go/lucy/search_test.go @@ -354,3 +354,45 @@ func TestTopDocsBasics(t *testing.T) { t.Errorf("Failed round-trip serializetion of TopDocs") } } + +type simpleTestDoc struct { + Content string +} + +func TestSortSpecBasics(t *testing.T) { + folder := NewRAMFolder("") + schema := NewSchema() + fieldType := NewFullTextType(NewStandardTokenizer()) + fieldType.SetSortable(true) + schema.SpecField("content", fieldType) + args := &OpenIndexerArgs{Index: folder, Schema: schema, Create: true} + indexer, err := OpenIndexer(args) + if err != nil { + panic(err) + } + for _, fieldVal := range []string{"a b", "a a"} { + indexer.AddDoc(&simpleTestDoc{fieldVal}) + } + indexer.Commit() + + rules := []SortRule{ + NewFieldSortRule("content", false), + } + sortSpec := NewSortSpec(rules) + searcher, _ := OpenIndexSearcher(folder) + hits, _ := searcher.Hits("a", 0, 1, sortSpec) + var doc simpleTestDoc + hits.Next(&doc) + if doc.Content != "a a" { + t.Error("Sort by field value") + } + + outstream := folder.OpenOut("foo") + sortSpec.Serialize(outstream) + outstream.Close() + inStream := folder.OpenIn("foo") + dupe := clownfish.GetClass(sortSpec).MakeObj().(SortSpec).Deserialize(inStream) + if len(dupe.GetRules()) != len(rules) { + t.Errorf("Failed round-trip serializetion of SortSpec") + } +} From 8c1e020e239e03446196bbd810a7f5c0c9f424c4 Mon Sep 17 00:00:00 2001 From: Marvin Humphrey Date: Thu, 10 Sep 2015 20:52:41 -0700 Subject: [PATCH 04/13] Test HitQueue Go bindings. --- go/lucy/search_test.go | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/go/lucy/search_test.go b/go/lucy/search_test.go index 6e40670e1..1f34df9f4 100644 --- a/go/lucy/search_test.go +++ b/go/lucy/search_test.go @@ -396,3 +396,31 @@ func TestSortSpecBasics(t *testing.T) { t.Errorf("Failed round-trip serializetion of SortSpec") } } + +func TestHitQueueBasics(t *testing.T) { + hitQ := NewHitQueue(nil, nil, 1) + fortyTwo := NewMatchDoc(42, 1.0, nil) + fortyThree := NewMatchDoc(43, 1.0, nil) + if !hitQ.LessThan(fortyThree, fortyTwo) { + t.Error("LessThan") + } + if !hitQ.Insert(fortyTwo) { + t.Error("Insert") + } + if hitQ.GetSize() != 1 { + t.Error("GetSize") + } + if bumped := hitQ.Jostle(fortyThree); bumped.(MatchDoc).GetDocID() != 43 { + t.Error("Jostle") + } + if peeked := hitQ.Peek(); peeked.(MatchDoc).GetDocID() != 42 { + t.Error("Peek") + } + if popped := hitQ.Pop(); popped.(MatchDoc).GetDocID() != 42 { + t.Error("Pop") + } + hitQ.Insert(fortyTwo) + if got := hitQ.PopAll(); got[0].(MatchDoc).GetDocID() != 42 { + t.Error("PopAll") + } +} From d1f3764ad447867618554af0c7e453716bfb47ee Mon Sep 17 00:00:00 2001 From: Marvin Humphrey Date: Thu, 10 Sep 2015 21:01:07 -0700 Subject: [PATCH 05/13] Test Span Go bindings. --- go/lucy/search_test.go | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/go/lucy/search_test.go b/go/lucy/search_test.go index 1f34df9f4..b082c51b1 100644 --- a/go/lucy/search_test.go +++ b/go/lucy/search_test.go @@ -424,3 +424,29 @@ func TestHitQueueBasics(t *testing.T) { t.Error("PopAll") } } + +func TestSpanBasics(t *testing.T) { + a := NewSpan(42, 1, 0.0) + b := NewSpan(42, 2, 0.0) + if !a.Equals(a) { + t.Error("Equals self") + } + if a.Equals(b) { + t.Error("Equals should return false for non-equal spans") + } + if got := a.CompareTo(b); got >= 0 { + t.Errorf("CompareTo returned %d", got) + } + a.SetOffset(21) + if got := a.GetOffset(); got != 21 { + t.Errorf("Set/Get offset: %d", got) + } + a.SetLength(10) + if got := a.GetLength(); got != 10 { + t.Errorf("Set/Get length: %d", got) + } + a.SetWeight(1.5) + if got := a.GetWeight(); got != 1.5 { + t.Errorf("Set/Get weight: %f", got) + } +} From be4b75da3b26757534fc82e00e061b983b38a8d6 Mon Sep 17 00:00:00 2001 From: Marvin Humphrey Date: Fri, 11 Sep 2015 17:18:00 -0700 Subject: [PATCH 06/13] Test Hits Go bindings. --- go/lucy/search_test.go | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/go/lucy/search_test.go b/go/lucy/search_test.go index b082c51b1..02881d2f4 100644 --- a/go/lucy/search_test.go +++ b/go/lucy/search_test.go @@ -359,6 +359,29 @@ type simpleTestDoc struct { Content string } +func TestHitsBasics(t *testing.T) { + index := createTestIndex("a", "b") + searcher, _ := OpenIndexSearcher(index) + topDocs := searcher.TopDocs(NewTermQuery("content", "a"), 10, nil) + hits := NewHits(searcher, topDocs, 0) + if got := hits.TotalHits(); got != topDocs.GetTotalHits() { + t.Errorf("TotalHits is off: %d", got) + } + var doc simpleTestDoc + if !hits.Next(&doc) { + t.Error("Hits.Next") + } + if doc.Content != "a" { + t.Errorf("Bad doc content after Next: %s", doc.Content) + } + if hits.Next(&doc) { + t.Error("Hits iterator should be exhausted"); + } + if err := hits.Error(); err != nil { + t.Error("Hits.Error() not nil: %v", err) + } +} + func TestSortSpecBasics(t *testing.T) { folder := NewRAMFolder("") schema := NewSchema() From ab62643f19ca164cb58781ea33812dd27dd1f71b Mon Sep 17 00:00:00 2001 From: Marvin Humphrey Date: Fri, 11 Sep 2015 18:06:56 -0700 Subject: [PATCH 07/13] Test MatchAllMatcher, NoMatchMatcher Go bindings. --- go/lucy/search_test.go | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/go/lucy/search_test.go b/go/lucy/search_test.go index 02881d2f4..d227e5f76 100644 --- a/go/lucy/search_test.go +++ b/go/lucy/search_test.go @@ -328,6 +328,37 @@ func TestSeriesMatcherBasics(t *testing.T) { checkMatcher(t, matcher, false) } +func TestMatchAllMatcherBasics(t *testing.T) { + matcher := NewMatchAllMatcher(1.5, 42) + matcher.Next() + if docID := matcher.Next(); docID != 2 { + t.Errorf("Unexpected return value for Next: %d", docID) + } + if docID := matcher.GetDocID(); docID != 2 { + t.Errorf("Unexpected return value for GetDocID: %d", docID) + } + if docID := matcher.Advance(42); docID != 42 { + t.Errorf("Advance returned %d", docID) + } + if score := matcher.Score(); score != 1.5 { + t.Errorf("Unexpected score: %f", score) + } + if matcher.Next() != 0 { + t.Error("Matcher should be exhausted") + } +} + +func TestNoMatchMatcherBasics(t *testing.T) { + matcher := NewNoMatchMatcher() + if matcher.Next() != 0 { + t.Error("Next should return false") + } + matcher = NewNoMatchMatcher() + if matcher.Advance(3) != 0 { + t.Error("Advance should return false") + } +} + func TestTopDocsBasics(t *testing.T) { matchDocs := []MatchDoc{ NewMatchDoc(42, 2.0, nil), From 888c080217fbaa7117ec1c41e989f7d775b476d3 Mon Sep 17 00:00:00 2001 From: Marvin Humphrey Date: Fri, 11 Sep 2015 18:37:08 -0700 Subject: [PATCH 08/13] Test RangeMatcher Go bindings. --- go/lucy/lucy_test.go | 1 + go/lucy/search_test.go | 25 +++++++++++++++++++++++++ 2 files changed, 26 insertions(+) diff --git a/go/lucy/lucy_test.go b/go/lucy/lucy_test.go index c0c743e86..8f11a3dbb 100644 --- a/go/lucy/lucy_test.go +++ b/go/lucy/lucy_test.go @@ -59,6 +59,7 @@ func createTestSchema() Schema { analyzer := NewStandardTokenizer() fieldType := NewFullTextType(analyzer) fieldType.SetHighlightable(true) + fieldType.SetSortable(true) schema.SpecField("content", fieldType) return schema } diff --git a/go/lucy/search_test.go b/go/lucy/search_test.go index d227e5f76..6d0c42d39 100644 --- a/go/lucy/search_test.go +++ b/go/lucy/search_test.go @@ -359,6 +359,31 @@ func TestNoMatchMatcherBasics(t *testing.T) { } } +func TestRangeMatcherBasics(t *testing.T) { + index := createTestIndex("d", "c", "b", "a", "a", "a", "a") + searcher, _ := OpenIndexSearcher(index) + segReaders := searcher.GetReader().SegReaders() + segReader := segReaders[0].(SegReader) + sortReader := segReader.Obtain("Lucy::Index::SortReader").(SortReader) + sortCache := sortReader.FetchSortCache("content") + matcher := NewRangeMatcher(0, 0, sortCache, segReader.DocMax()) + if docID := matcher.Next(); docID != 4 { + t.Errorf("Next: %d", docID) + } + if docID := matcher.GetDocID(); docID != 4 { + t.Errorf("GetDocID: %d", docID) + } + if score := matcher.Score(); score != 0.0 { + t.Errorf("Score: %f", score) + } + if docID := matcher.Advance(7); docID != 7 { + t.Errorf("Advance: %d", docID) + } + if docID := matcher.Next(); docID != 0 { + t.Errorf("Matcher should be exhausted: %d", docID) + } +} + func TestTopDocsBasics(t *testing.T) { matchDocs := []MatchDoc{ NewMatchDoc(42, 2.0, nil), From d850571684b0b5f4b68d3a5deed72feac9d7fb45 Mon Sep 17 00:00:00 2001 From: Marvin Humphrey Date: Fri, 11 Sep 2015 19:20:12 -0700 Subject: [PATCH 09/13] Add missing declaration for OffsetColl_new. --- core/Lucy/Search/Collector.cfh | 3 +++ 1 file changed, 3 insertions(+) diff --git a/core/Lucy/Search/Collector.cfh b/core/Lucy/Search/Collector.cfh index 3d88136ca..2bb44ddbc 100644 --- a/core/Lucy/Search/Collector.cfh +++ b/core/Lucy/Search/Collector.cfh @@ -85,6 +85,9 @@ public class Lucy::Search::Collector::BitCollector nickname BitColl BitVector *bit_vec; + public inert incremented BitCollector* + new(BitVector *bit_vector); + /** * @param bit_vector A Lucy::Object::BitVector. */ From 40a1eecbec1b212d6d2b7742bfeba7d546467d15 Mon Sep 17 00:00:00 2001 From: Marvin Humphrey Date: Fri, 11 Sep 2015 19:21:05 -0700 Subject: [PATCH 10/13] Customize and test Collector Go bindings. * Custom binding for SortCollector's PopMatchDocs. * Tests for BitCollector, OffsetCollector, SortCollector --- go/build.go | 4 ++++ go/lucy/search.go | 14 ++++++++++++++ go/lucy/search_test.go | 38 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 56 insertions(+) diff --git a/go/build.go b/go/build.go index 11615aa7a..15fbd212a 100644 --- a/go/build.go +++ b/go/build.go @@ -202,6 +202,10 @@ func specClasses(parcel *cfc.Parcel) { sortSpecBinding.SetSuppressCtor(true) sortSpecBinding.SpecMethod("Get_Rules", "GetRules() []SortRule") sortSpecBinding.Register() + + sortCollBinding := cfc.NewGoClass(parcel, "Lucy::Search::Collector::SortCollector") + sortCollBinding.SpecMethod("Pop_Match_Docs", "PopMatchDocs() []MatchDoc") + sortCollBinding.Register() } func build() { diff --git a/go/lucy/search.go b/go/lucy/search.go index 27232656f..b47b34088 100644 --- a/go/lucy/search.go +++ b/go/lucy/search.go @@ -18,6 +18,7 @@ package lucy /* #include "Lucy/Search/Collector.h" +#include "Lucy/Search/Collector/SortCollector.h" #include "Lucy/Search/Hits.h" #include "Lucy/Search/IndexSearcher.h" #include "Lucy/Search/Query.h" @@ -326,3 +327,16 @@ func newMockMatcher(docIDs []int32, scores []float32) MockMatcher { matcher := C.lucy_MockMatcher_new(docIDsCF, blob) return WRAPMockMatcher(unsafe.Pointer(matcher)) } + +func (sc *SortCollectorIMP) PopMatchDocs() []MatchDoc { + self := (*C.lucy_SortCollector)(clownfish.Unwrap(sc, "sc")) + matchDocsC := C.LUCY_SortColl_Pop_Match_Docs(self) + defer C.cfish_decref(unsafe.Pointer(matchDocsC)) + length := int(C.CFISH_Vec_Get_Size(matchDocsC)) + slice := make([]MatchDoc, length) + for i := 0; i < length; i++ { + elem := C.cfish_incref(unsafe.Pointer(C.CFISH_Vec_Fetch(matchDocsC, C.size_t(i)))) + slice[i] = WRAPMatchDoc(unsafe.Pointer(elem)) + } + return slice +} diff --git a/go/lucy/search_test.go b/go/lucy/search_test.go index 6d0c42d39..d084f5365 100644 --- a/go/lucy/search_test.go +++ b/go/lucy/search_test.go @@ -529,3 +529,41 @@ func TestSpanBasics(t *testing.T) { t.Errorf("Set/Get weight: %f", got) } } + +func TestBitCollectorBasics(t *testing.T) { + index := createTestIndex("a", "b", "c", "a") + searcher, _ := OpenIndexSearcher(index) + bitVec := NewBitVector(5) + collector := NewBitCollector(bitVec) + searcher.Collect(NewTermQuery("content", "a"), collector) + expected := []bool{false, true, false, false, true, false, false, false} + if got := bitVec.ToArray(); !reflect.DeepEqual(got,expected) { + t.Errorf("Unexpected result set: %v", got) + } +} + +func TestOffsetCollectorBasics(t *testing.T) { + index := createTestIndex("a", "b", "c") + searcher, _ := OpenIndexSearcher(index) + bitVec := NewBitVector(64) + bitColl := NewBitCollector(bitVec) + offsetColl := NewOffsetCollector(bitColl, 40) + searcher.Collect(NewTermQuery("content", "b"), offsetColl) + if got := bitVec.NextHit(0); got != 42 { + t.Errorf("Unexpected docID: %d", got) + } +} + +func TestSortCollectorBasics(t *testing.T) { + index := createTestIndex("a", "b", "c", "a") + searcher, _ := OpenIndexSearcher(index) + collector := NewSortCollector(nil, nil, 1) + searcher.Collect(NewTermQuery("content", "a"), collector) + if totalHits := collector.GetTotalHits(); totalHits != 2 { + t.Errorf("Unexpected TotalHits: %d", totalHits) + } + matchDocs := collector.PopMatchDocs() + if docID := matchDocs[0].GetDocID(); docID != 1 { + t.Errorf("Weird MatchDoc: %d", docID) + } +} From 63fd5b8b89ba40cb51934fe087cd3d2c8e8ca3d8 Mon Sep 17 00:00:00 2001 From: Marvin Humphrey Date: Tue, 15 Sep 2015 13:33:37 -0700 Subject: [PATCH 11/13] Generalize `query` param in `Hits`. Allow both Query objects and query strings. --- go/lucy/search.go | 28 ++++++++++------------------ 1 file changed, 10 insertions(+), 18 deletions(-) diff --git a/go/lucy/search.go b/go/lucy/search.go index b47b34088..e18eee7d9 100644 --- a/go/lucy/search.go +++ b/go/lucy/search.go @@ -83,25 +83,17 @@ func doClose(obj Searcher) error { }) } -func doHits(obj Searcher, query interface{}, offset uint32, numWanted uint32, +func doHits(s Searcher, query interface{}, offset uint32, numWanted uint32, sortSpec SortSpec) (hits Hits, err error) { - self := ((*C.lucy_Searcher)(unsafe.Pointer(obj.TOPTR()))) - var sortSpecC *C.lucy_SortSpec - if sortSpec != nil { - sortSpecC = (*C.lucy_SortSpec)(unsafe.Pointer(sortSpec.TOPTR())) - } - switch query.(type) { - case string: - queryStringC := clownfish.NewString(query.(string)) - err = clownfish.TrapErr(func() { - hitsC := C.LUCY_Searcher_Hits(self, - (*C.cfish_Obj)(unsafe.Pointer(queryStringC.TOPTR())), - C.uint32_t(offset), C.uint32_t(numWanted), sortSpecC) - hits = WRAPHits(unsafe.Pointer(hitsC)) - }) - default: - panic("TODO: support Query objects") - } + self := (*C.lucy_Searcher)(clownfish.Unwrap(s, "s")) + sortSpecC := (*C.lucy_SortSpec)(clownfish.UnwrapNullable(sortSpec)) + queryC := (*C.cfish_Obj)(clownfish.GoToClownfish(query, unsafe.Pointer(C.CFISH_OBJ), false)) + defer C.cfish_decref(unsafe.Pointer(queryC)) + err = clownfish.TrapErr(func() { + hitsC := C.LUCY_Searcher_Hits(self, queryC, + C.uint32_t(offset), C.uint32_t(numWanted), sortSpecC) + hits = WRAPHits(unsafe.Pointer(hitsC)) + }) return hits, err } From faab39e04c989d96b4e5fc6b069863e4ce06cb0d Mon Sep 17 00:00:00 2001 From: Marvin Humphrey Date: Tue, 15 Sep 2015 13:34:34 -0700 Subject: [PATCH 12/13] Test IndexSearcher Go bindings. --- go/lucy/search_test.go | 55 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 55 insertions(+) diff --git a/go/lucy/search_test.go b/go/lucy/search_test.go index d084f5365..00d68c1f6 100644 --- a/go/lucy/search_test.go +++ b/go/lucy/search_test.go @@ -567,3 +567,58 @@ func TestSortCollectorBasics(t *testing.T) { t.Errorf("Weird MatchDoc: %d", docID) } } + +func TestIndexSearcherMisc(t *testing.T) { + index := createTestIndex("a", "b", "c", "a a") + searcher, _ := OpenIndexSearcher(index) + if got := searcher.DocFreq("content", "a"); got != 2 { + t.Errorf("DocFreq expected 2, got %d", got) + } + if got := searcher.DocMax(); got != 4 { + t.Errorf("DocMax expected 4, got %d", got) + } + if _, ok := searcher.GetReader().(PolyReader); !ok { + t.Error("GetReader") + } + if _, ok := searcher.FetchDocVec(4).(DocVector); !ok { + t.Error("DocVector") + } +} + +func TestIndexSearcherOpenClose(t *testing.T) { + if _, err := OpenIndexSearcher(NewRAMFolder("")); err == nil { + t.Error("Open non-existent index") + } + if _, err := OpenIndexSearcher(42); err == nil { + t.Error("Garbage 'index' argument") + } + index := createTestIndex("a", "b", "c") + searcher, _ := OpenIndexSearcher(index) + searcher.Close() +} + +func TestIndexSearcherHits(t *testing.T) { + index := createTestIndex("a", "b", "c", "a a") + searcher, _ := OpenIndexSearcher(index) + if got, _ := searcher.Hits("a", 0, 1, nil); got.TotalHits() != 2 { + t.Errorf("Hits() with query string: %d", got.TotalHits()) + } + termQuery := NewTermQuery("content", "a") + if got, _ := searcher.Hits(termQuery, 0, 1, nil); got.TotalHits() != 2 { + t.Errorf("Hits() with TermQuery object: %d", got.TotalHits()) + } + + if _, err := searcher.Hits(42, 0, 1, nil); err == nil { + t.Error("Garbage 'query' argument") + } +} + +func TestIndexSearcherTopDocs(t *testing.T) { + index := createTestIndex("a", "b") + searcher, _ := OpenIndexSearcher(index) + topDocs := searcher.TopDocs(NewTermQuery("content", "b"), 10, nil) + matchDocs := topDocs.GetMatchDocs() + if docID := matchDocs[0].GetDocID(); docID != 2 { + t.Errorf("TopDocs expected 2, got %d", docID) + } +} From 2aa24c199b204a254705caf94bf8013011f98699 Mon Sep 17 00:00:00 2001 From: Marvin Humphrey Date: Tue, 15 Sep 2015 16:50:57 -0700 Subject: [PATCH 13/13] Test Go bindings for MatchDoc. --- go/lucy/search_test.go | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/go/lucy/search_test.go b/go/lucy/search_test.go index 00d68c1f6..b28147709 100644 --- a/go/lucy/search_test.go +++ b/go/lucy/search_test.go @@ -622,3 +622,34 @@ func TestIndexSearcherTopDocs(t *testing.T) { t.Errorf("TopDocs expected 2, got %d", docID) } } + +func TestMatchDocBasics(t *testing.T) { + matchDoc := NewMatchDoc(0, 1.0, nil) + matchDoc.SetDocID(42) + if got := matchDoc.GetDocID(); got != 42 { + t.Errorf("Set/GetDocID: %d", got) + } + matchDoc.SetScore(1.5) + if got := matchDoc.GetScore(); got != 1.5 { + t.Errorf("Set/GetScore: %f", got) + } + values := []interface{}{"foo", int64(42)} + matchDoc.SetValues(values) + if got := matchDoc.GetValues(); !reflect.DeepEqual(got, values) { + t.Error("Get/SetValues") + } +} + +func TestMatchDocSerialization(t *testing.T) { + values := []interface{}{"foo", int64(42)} + matchDoc := NewMatchDoc(100, 1.5, values) + folder := NewRAMFolder("") + outstream := folder.OpenOut("foo") + matchDoc.Serialize(outstream) + outstream.Close() + inStream := folder.OpenIn("foo") + dupe := clownfish.GetClass(matchDoc).MakeObj().(MatchDoc).Deserialize(inStream) + if got := dupe.GetValues(); !reflect.DeepEqual(got, values) { + t.Errorf("Failed round-trip serializetion of MatchDoc") + } +}