From 900fc89ed074ad9a126e959108ec3573efc1cb31 Mon Sep 17 00:00:00 2001 From: Marvin Humphrey Date: Fri, 25 Sep 2015 17:16:08 -0700 Subject: [PATCH 1/8] Doc, HitDoc constructors Go bindings. Add `NewHitDoc`, move `NewDoc`. --- go/lucy/document.go | 34 ++++++++++++++++++++++++++++++++++ go/lucy/lucy.go | 5 ----- 2 files changed, 34 insertions(+), 5 deletions(-) create mode 100644 go/lucy/document.go diff --git a/go/lucy/document.go b/go/lucy/document.go new file mode 100644 index 000000000..02a8f8259 --- /dev/null +++ b/go/lucy/document.go @@ -0,0 +1,34 @@ +/* Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package lucy + +/* +#include "Lucy/Document/Doc.h" +#include "Lucy/Document/HitDoc.h" +*/ +import "C" +import "unsafe" + +func NewDoc(docID int32) Doc { + retvalCF := C.lucy_Doc_new(nil, C.int32_t(docID)) + return WRAPDoc(unsafe.Pointer(retvalCF)) +} + +func NewHitDoc(docID int32, score float32) HitDoc { + retvalCF := C.lucy_HitDoc_new(nil, C.int32_t(docID), C.float(score)) + return WRAPHitDoc(unsafe.Pointer(retvalCF)) +} diff --git a/go/lucy/lucy.go b/go/lucy/lucy.go index 37d7f1ce7..e67ead2b9 100644 --- a/go/lucy/lucy.go +++ b/go/lucy/lucy.go @@ -249,11 +249,6 @@ func GOLUCY_RegexTokenizer_Tokenize_Utf8(rt *C.lucy_RegexTokenizer, str *C.char, } } -func NewDoc(docID int32) Doc { - retvalCF := C.lucy_Doc_new(nil, C.int32_t(docID)) - return WRAPDoc(unsafe.Pointer(retvalCF)) -} - //export GOLUCY_Doc_init func GOLUCY_Doc_init(d *C.lucy_Doc, fields unsafe.Pointer, docID C.int32_t) *C.lucy_Doc { ivars := C.lucy_Doc_IVARS(d) From a41f86cbcc63b711c8087a4861e70ec803a69ebf Mon Sep 17 00:00:00 2001 From: Marvin Humphrey Date: Tue, 15 Sep 2015 18:54:11 -0700 Subject: [PATCH 2/8] Change Doc to use registry internally. Instead of storing a pointer to a raw Clownfish Hash in a doc object, wrap the Hash in a Go struct, store it in the registry and store the ID returned by the registry in the Doc. This commit is a transition from using a Clownfish Hash as Doc's fields to using a Go `map[string]interface{}`. --- go/lucy/document.go | 16 ++++++++++++++++ go/lucy/index.go | 2 +- go/lucy/lucy.go | 45 ++++++++++++++++++++------------------------- go/lucy/search.go | 2 +- 4 files changed, 38 insertions(+), 27 deletions(-) diff --git a/go/lucy/document.go b/go/lucy/document.go index 02a8f8259..3196986dd 100644 --- a/go/lucy/document.go +++ b/go/lucy/document.go @@ -17,11 +17,17 @@ package lucy /* +#define C_LUCY_DOC + #include "Lucy/Document/Doc.h" #include "Lucy/Document/HitDoc.h" */ import "C" import "unsafe" +import "fmt" + +import "git-wip-us.apache.org/repos/asf/lucy-clownfish.git/runtime/go/clownfish" + func NewDoc(docID int32) Doc { retvalCF := C.lucy_Doc_new(nil, C.int32_t(docID)) @@ -32,3 +38,13 @@ func NewHitDoc(docID int32, score float32) HitDoc { retvalCF := C.lucy_HitDoc_new(nil, C.int32_t(docID), C.float(score)) return WRAPHitDoc(unsafe.Pointer(retvalCF)) } + +func fetchDocFields(d *C.lucy_Doc) *C.cfish_Hash { + ivars := C.lucy_Doc_IVARS(d) + fieldsID := uintptr(ivars.fields) + fieldsGo, ok := registry.fetch(fieldsID).(clownfish.Hash) + if !ok { + panic(clownfish.NewErr(fmt.Sprintf("Failed to fetch doc %d from registry ", fieldsID))) + } + return (*C.cfish_Hash)(clownfish.Unwrap(fieldsGo, "fieldsGo")) +} diff --git a/go/lucy/index.go b/go/lucy/index.go index 4aab0a24f..6543c99a9 100644 --- a/go/lucy/index.go +++ b/go/lucy/index.go @@ -73,7 +73,7 @@ func (obj *IndexerIMP) Close() error { func (obj *IndexerIMP) AddDoc(doc interface{}) error { self := ((*C.lucy_Indexer)(unsafe.Pointer(obj.TOPTR()))) stockDoc := C.LUCY_Indexer_Get_Stock_Doc(self) - docFields := (*C.cfish_Hash)(C.LUCY_Doc_Get_Fields(stockDoc)) + docFields := fetchDocFields(stockDoc) C.CFISH_Hash_Clear(docFields) // TODO: Support map as doc in addition to struct as doc. diff --git a/go/lucy/lucy.go b/go/lucy/lucy.go index e67ead2b9..50ac2c9c5 100644 --- a/go/lucy/lucy.go +++ b/go/lucy/lucy.go @@ -252,10 +252,11 @@ func GOLUCY_RegexTokenizer_Tokenize_Utf8(rt *C.lucy_RegexTokenizer, str *C.char, //export GOLUCY_Doc_init func GOLUCY_Doc_init(d *C.lucy_Doc, fields unsafe.Pointer, docID C.int32_t) *C.lucy_Doc { ivars := C.lucy_Doc_IVARS(d) - if fields != nil { - ivars.fields = unsafe.Pointer(C.cfish_inc_refcount(fields)) + if fields == nil { + fieldsID := registry.store(clownfish.NewHash(0)) + ivars.fields = unsafe.Pointer(fieldsID) } else { - ivars.fields = unsafe.Pointer(C.cfish_Hash_new(0)) + ivars.fields = fields } ivars.doc_id = docID return d @@ -263,30 +264,25 @@ func GOLUCY_Doc_init(d *C.lucy_Doc, fields unsafe.Pointer, docID C.int32_t) *C.l //export GOLUCY_Doc_Set_Fields func GOLUCY_Doc_Set_Fields(d *C.lucy_Doc, fields unsafe.Pointer) { - ivars := C.lucy_Doc_IVARS(d) - temp := ivars.fields - ivars.fields = unsafe.Pointer(C.cfish_inc_refcount(fields)) - C.cfish_decref(temp) + panic(clownfish.NewErr("Set_Fields unsupported in Go bindings")) } //export GOLUCY_Doc_Get_Size func GOLUCY_Doc_Get_Size(d *C.lucy_Doc) C.uint32_t { - ivars := C.lucy_Doc_IVARS(d) - hash := ((*C.cfish_Hash)(ivars.fields)) + hash := fetchDocFields(d) return C.uint32_t(C.CFISH_Hash_Get_Size(hash)) } //export GOLUCY_Doc_Store func GOLUCY_Doc_Store(d *C.lucy_Doc, field *C.cfish_String, value *C.cfish_Obj) { - ivars := C.lucy_Doc_IVARS(d) - hash := (*C.cfish_Hash)(ivars.fields) + hash := fetchDocFields(d) C.CFISH_Hash_Store(hash, field, C.cfish_inc_refcount(unsafe.Pointer(value))) } //export GOLUCY_Doc_Serialize func GOLUCY_Doc_Serialize(d *C.lucy_Doc, outstream *C.lucy_OutStream) { ivars := C.lucy_Doc_IVARS(d) - hash := (*C.cfish_Hash)(ivars.fields) + hash := fetchDocFields(d) C.lucy_Freezer_serialize_hash(hash, outstream) C.LUCY_OutStream_Write_C32(outstream, C.uint32_t(ivars.doc_id)) } @@ -294,23 +290,23 @@ func GOLUCY_Doc_Serialize(d *C.lucy_Doc, outstream *C.lucy_OutStream) { //export GOLUCY_Doc_Deserialize func GOLUCY_Doc_Deserialize(d *C.lucy_Doc, instream *C.lucy_InStream) *C.lucy_Doc { ivars := C.lucy_Doc_IVARS(d) - ivars.fields = unsafe.Pointer(C.lucy_Freezer_read_hash(instream)) + hash := unsafe.Pointer(C.lucy_Freezer_read_hash(instream)) + fieldsID := registry.store(clownfish.WRAPAny(hash)) + ivars.fields = unsafe.Pointer(fieldsID) ivars.doc_id = C.int32_t(C.LUCY_InStream_Read_C32(instream)) return d } //export GOLUCY_Doc_Extract func GOLUCY_Doc_Extract(d *C.lucy_Doc, field *C.cfish_String) *C.cfish_Obj { - ivars := C.lucy_Doc_IVARS(d) - hash := (*C.cfish_Hash)(ivars.fields) + hash := fetchDocFields(d) val := C.CFISH_Hash_Fetch(hash, field) return C.cfish_inc_refcount(unsafe.Pointer(val)) } //export GOLUCY_Doc_Field_Names func GOLUCY_Doc_Field_Names(d *C.lucy_Doc) *C.cfish_Vector { - ivars := C.lucy_Doc_IVARS(d) - hash := (*C.cfish_Hash)(ivars.fields) + hash := fetchDocFields(d) return C.CFISH_Hash_Keys(hash) } @@ -323,17 +319,16 @@ func GOLUCY_Doc_Equals(d *C.lucy_Doc, other *C.cfish_Obj) C.bool { if !C.cfish_Obj_is_a(other, C.LUCY_DOC) { return false } - ivars := C.lucy_Doc_IVARS(d) - ovars := C.lucy_Doc_IVARS(twin) - hash := (*C.cfish_Hash)(ivars.fields) - otherHash := (*C.cfish_Obj)(ovars.fields) + hash := fetchDocFields(d) + otherHash := (*C.cfish_Obj)(unsafe.Pointer(fetchDocFields(twin))) return C.CFISH_Hash_Equals(hash, otherHash) } //export GOLUCY_Doc_Destroy func GOLUCY_Doc_Destroy(d *C.lucy_Doc) { ivars := C.lucy_Doc_IVARS(d) - C.cfish_decref(unsafe.Pointer(ivars.fields)) + fieldsID := uintptr(ivars.fields) + registry.delete(fieldsID) C.cfish_super_destroy(unsafe.Pointer(d), C.LUCY_DOC) } @@ -432,15 +427,15 @@ func GOLUCY_DefDocReader_Fetch_Doc(ddr *C.lucy_DefaultDocReader, } C.free(unsafe.Pointer(fieldName)) - retval := C.lucy_HitDoc_new(unsafe.Pointer(fields), docID, 0.0) - C.cfish_dec_refcount(unsafe.Pointer(fields)) + fieldsID := registry.store(clownfish.WRAPAny(unsafe.Pointer(fields))) + retval := C.lucy_HitDoc_new(unsafe.Pointer(fieldsID), docID, 0.0) return retval } //export GOLUCY_Inverter_Invert_Doc func GOLUCY_Inverter_Invert_Doc(inverter *C.lucy_Inverter, doc *C.lucy_Doc) { ivars := C.lucy_Inverter_IVARS(inverter) - fields := (*C.cfish_Hash)(C.LUCY_Doc_Get_Fields(doc)) + fields := fetchDocFields(doc) // Prepare for the new doc. C.LUCY_Inverter_Set_Doc(inverter, doc) diff --git a/go/lucy/search.go b/go/lucy/search.go index e18eee7d9..7f6ce76df 100644 --- a/go/lucy/search.go +++ b/go/lucy/search.go @@ -149,7 +149,7 @@ func (obj *HitsIMP) Next(hit interface{}) bool { } defer C.cfish_dec_refcount(unsafe.Pointer(docC)) - fields := (*C.cfish_Hash)(unsafe.Pointer(C.LUCY_HitDoc_Get_Fields(docC))) + fields := fetchDocFields((*C.lucy_Doc)(unsafe.Pointer(docC))) iterator := C.cfish_HashIter_new(fields) defer C.cfish_dec_refcount(unsafe.Pointer(iterator)) for C.CFISH_HashIter_Next(iterator) { From 8223307d126898cee43286e1e7bafb6f6d149c25 Mon Sep 17 00:00:00 2001 From: Marvin Humphrey Date: Tue, 15 Sep 2015 20:25:31 -0700 Subject: [PATCH 3/8] Switch Doc to `map[string]interface{}`. Change from Clownfish Hash to Go map. --- go/lucy/document.go | 6 +-- go/lucy/index.go | 24 +++++----- go/lucy/lucy.go | 107 +++++++++++++++++++++----------------------- go/lucy/search.go | 11 ++--- 4 files changed, 69 insertions(+), 79 deletions(-) diff --git a/go/lucy/document.go b/go/lucy/document.go index 3196986dd..1221810d6 100644 --- a/go/lucy/document.go +++ b/go/lucy/document.go @@ -39,12 +39,12 @@ func NewHitDoc(docID int32, score float32) HitDoc { return WRAPHitDoc(unsafe.Pointer(retvalCF)) } -func fetchDocFields(d *C.lucy_Doc) *C.cfish_Hash { +func fetchDocFields(d *C.lucy_Doc) map[string]interface{} { ivars := C.lucy_Doc_IVARS(d) fieldsID := uintptr(ivars.fields) - fieldsGo, ok := registry.fetch(fieldsID).(clownfish.Hash) + fieldsGo, ok := registry.fetch(fieldsID).(map[string]interface{}) if !ok { panic(clownfish.NewErr(fmt.Sprintf("Failed to fetch doc %d from registry ", fieldsID))) } - return (*C.cfish_Hash)(clownfish.Unwrap(fieldsGo, "fieldsGo")) + return fieldsGo } diff --git a/go/lucy/index.go b/go/lucy/index.go index 6543c99a9..1662d3e78 100644 --- a/go/lucy/index.go +++ b/go/lucy/index.go @@ -35,7 +35,7 @@ import "git-wip-us.apache.org/repos/asf/lucy-clownfish.git/runtime/go/clownfish" type IndexerIMP struct { clownfish.ObjIMP - fieldNames map[string]clownfish.String + fieldNames map[string]string } type OpenIndexerArgs struct { @@ -74,7 +74,9 @@ func (obj *IndexerIMP) AddDoc(doc interface{}) error { self := ((*C.lucy_Indexer)(unsafe.Pointer(obj.TOPTR()))) stockDoc := C.LUCY_Indexer_Get_Stock_Doc(self) docFields := fetchDocFields(stockDoc) - C.CFISH_Hash_Clear(docFields) + for field := range docFields { + delete(docFields, field) + } // TODO: Support map as doc in addition to struct as doc. @@ -96,11 +98,8 @@ func (obj *IndexerIMP) AddDoc(doc interface{}) error { for i := 0; i < docValue.NumField(); i++ { field := docType.Field(i).Name value := docValue.Field(i).String() - fieldC := obj.findFieldC(field) - valueC := clownfish.NewString(value) - C.CFISH_Hash_Store(docFields, - (*C.cfish_String)(unsafe.Pointer(fieldC)), - C.cfish_inc_refcount(unsafe.Pointer(valueC.TOPTR()))) + realField := obj.findRealField(field) + docFields[realField] = value } // TODO create an additional method AddDocWithBoost which allows the @@ -113,10 +112,10 @@ func (obj *IndexerIMP) AddDoc(doc interface{}) error { return err } -func (obj *IndexerIMP) findFieldC(name string) *C.cfish_String { +func (obj *IndexerIMP) findRealField(name string) string { self := ((*C.lucy_Indexer)(unsafe.Pointer(obj.TOPTR()))) if obj.fieldNames == nil { - obj.fieldNames = make(map[string]clownfish.String) + obj.fieldNames = make(map[string]string) } f, ok := obj.fieldNames[name] if !ok { @@ -127,13 +126,12 @@ func (obj *IndexerIMP) findFieldC(name string) *C.cfish_String { cfString := unsafe.Pointer(C.CFISH_Vec_Fetch(fieldList, C.size_t(i))) field := clownfish.CFStringToGo(cfString) if strings.EqualFold(name, field) { - C.cfish_inc_refcount(cfString) - f = clownfish.WRAPString(cfString) - obj.fieldNames[name] = f + f = field + obj.fieldNames[name] = field } } } - return (*C.cfish_String)(unsafe.Pointer(f.TOPTR())) + return f } func (obj *IndexerIMP) Commit() error { diff --git a/go/lucy/lucy.go b/go/lucy/lucy.go index 50ac2c9c5..36868bd88 100644 --- a/go/lucy/lucy.go +++ b/go/lucy/lucy.go @@ -183,6 +183,7 @@ import "C" import "unsafe" import "fmt" import "regexp" +import "reflect" import "git-wip-us.apache.org/repos/asf/lucy-clownfish.git/runtime/go/clownfish" var registry *objRegistry @@ -253,7 +254,8 @@ func GOLUCY_RegexTokenizer_Tokenize_Utf8(rt *C.lucy_RegexTokenizer, str *C.char, func GOLUCY_Doc_init(d *C.lucy_Doc, fields unsafe.Pointer, docID C.int32_t) *C.lucy_Doc { ivars := C.lucy_Doc_IVARS(d) if fields == nil { - fieldsID := registry.store(clownfish.NewHash(0)) + fieldsGo := make(map[string]interface{}) + fieldsID := registry.store(fieldsGo) ivars.fields = unsafe.Pointer(fieldsID) } else { ivars.fields = fields @@ -269,21 +271,25 @@ func GOLUCY_Doc_Set_Fields(d *C.lucy_Doc, fields unsafe.Pointer) { //export GOLUCY_Doc_Get_Size func GOLUCY_Doc_Get_Size(d *C.lucy_Doc) C.uint32_t { - hash := fetchDocFields(d) - return C.uint32_t(C.CFISH_Hash_Get_Size(hash)) + fields := fetchDocFields(d) + return C.uint32_t(len(fields)) } //export GOLUCY_Doc_Store func GOLUCY_Doc_Store(d *C.lucy_Doc, field *C.cfish_String, value *C.cfish_Obj) { - hash := fetchDocFields(d) - C.CFISH_Hash_Store(hash, field, C.cfish_inc_refcount(unsafe.Pointer(value))) + fields := fetchDocFields(d) + fieldGo := clownfish.CFStringToGo(unsafe.Pointer(field)) + valGo := clownfish.ToGo(unsafe.Pointer(value)) + fields[fieldGo] = valGo } //export GOLUCY_Doc_Serialize func GOLUCY_Doc_Serialize(d *C.lucy_Doc, outstream *C.lucy_OutStream) { ivars := C.lucy_Doc_IVARS(d) - hash := fetchDocFields(d) - C.lucy_Freezer_serialize_hash(hash, outstream) + fields := fetchDocFields(d) + hash := clownfish.GoToClownfish(fields, unsafe.Pointer(C.CFISH_HASH), false) + defer C.cfish_decref(hash) + C.lucy_Freezer_serialize_hash((*C.cfish_Hash)(hash), outstream) C.LUCY_OutStream_Write_C32(outstream, C.uint32_t(ivars.doc_id)) } @@ -291,7 +297,9 @@ func GOLUCY_Doc_Serialize(d *C.lucy_Doc, outstream *C.lucy_OutStream) { func GOLUCY_Doc_Deserialize(d *C.lucy_Doc, instream *C.lucy_InStream) *C.lucy_Doc { ivars := C.lucy_Doc_IVARS(d) hash := unsafe.Pointer(C.lucy_Freezer_read_hash(instream)) - fieldsID := registry.store(clownfish.WRAPAny(hash)) + defer C.cfish_decref(hash) + fields := clownfish.ToGo(hash) + fieldsID := registry.store(fields) ivars.fields = unsafe.Pointer(fieldsID) ivars.doc_id = C.int32_t(C.LUCY_InStream_Read_C32(instream)) return d @@ -299,15 +307,20 @@ func GOLUCY_Doc_Deserialize(d *C.lucy_Doc, instream *C.lucy_InStream) *C.lucy_Do //export GOLUCY_Doc_Extract func GOLUCY_Doc_Extract(d *C.lucy_Doc, field *C.cfish_String) *C.cfish_Obj { - hash := fetchDocFields(d) - val := C.CFISH_Hash_Fetch(hash, field) - return C.cfish_inc_refcount(unsafe.Pointer(val)) + fields := fetchDocFields(d) + fieldGo := clownfish.CFStringToGo(unsafe.Pointer(field)) + return (*C.cfish_Obj)(clownfish.GoToClownfish(fields[fieldGo], + unsafe.Pointer(C.CFISH_OBJ), true)) } //export GOLUCY_Doc_Field_Names func GOLUCY_Doc_Field_Names(d *C.lucy_Doc) *C.cfish_Vector { - hash := fetchDocFields(d) - return C.CFISH_Hash_Keys(hash) + fields := fetchDocFields(d) + vec := clownfish.NewVector(len(fields)) + for key, _ := range fields { + vec.Push(key) + } + return (*C.cfish_Vector)(C.cfish_incref(clownfish.Unwrap(vec, "vec"))) } //export GOLUCY_Doc_Equals @@ -319,9 +332,10 @@ func GOLUCY_Doc_Equals(d *C.lucy_Doc, other *C.cfish_Obj) C.bool { if !C.cfish_Obj_is_a(other, C.LUCY_DOC) { return false } - hash := fetchDocFields(d) - otherHash := (*C.cfish_Obj)(unsafe.Pointer(fetchDocFields(twin))) - return C.CFISH_Hash_Equals(hash, otherHash) + fields := fetchDocFields(d) + otherFields := fetchDocFields(twin) + result := reflect.DeepEqual(fields, otherFields) + return C.bool(result) } //export GOLUCY_Doc_Destroy @@ -332,7 +346,10 @@ func GOLUCY_Doc_Destroy(d *C.lucy_Doc) { C.cfish_super_destroy(unsafe.Pointer(d), C.LUCY_DOC) } -func fetchEntry(ivars *C.lucy_InverterIVARS, field *C.cfish_String) *C.lucy_InverterEntry { +func fetchEntry(ivars *C.lucy_InverterIVARS, fieldGo string) *C.lucy_InverterEntry { + field := (*C.cfish_String)(clownfish.GoToClownfish(fieldGo, + unsafe.Pointer(C.CFISH_STRING), false)) + defer C.cfish_decref(unsafe.Pointer(field)) schema := ivars.schema fieldNum := C.LUCY_Seg_Field_Num(ivars.segment, field) if fieldNum == 0 { @@ -366,7 +383,7 @@ func GOLUCY_DefDocReader_Fetch_Doc(ddr *C.lucy_DefaultDocReader, schema := ivars.schema datInstream := ivars.dat_in ixInstream := ivars.ix_in - fields := C.cfish_Hash_new(1) + fields := make(map[string]interface{}) fieldNameCap := C.size_t(31) var fieldName *C.char = ((*C.char)(C.malloc(fieldNameCap + 1))) @@ -391,43 +408,40 @@ func GOLUCY_DefDocReader_Fetch_Doc(ddr *C.lucy_DefaultDocReader, // inefficient. The solution should be to add a privte // Schema_Fetch_Type_Utf8 method which takes char* and size_t. fieldNameStr := C.cfish_Str_new_from_utf8(fieldName, fieldNameLen) + fieldNameGo := C.GoStringN(fieldName, C.int(fieldNameLen)) fieldType := C.LUCY_Schema_Fetch_Type(schema, fieldNameStr) C.cfish_dec_refcount(unsafe.Pointer(fieldNameStr)) // Read the field value. - var value *C.cfish_Obj switch C.LUCY_FType_Primitive_ID(fieldType) & C.lucy_FType_PRIMITIVE_ID_MASK { case C.lucy_FType_TEXT: valueLen := C.size_t(C.LUCY_InStream_Read_C32(datInstream)) buf := ((*C.char)(C.malloc(valueLen + 1))) C.LUCY_InStream_Read_Bytes(datInstream, buf, valueLen) - C.null_terminate_string(buf, valueLen) - value = ((*C.cfish_Obj)(C.cfish_Str_new_steal_utf8(buf, valueLen))) + val := C.GoStringN(buf, C.int(valueLen)) + fields[fieldNameGo] = val case C.lucy_FType_BLOB: valueLen := C.size_t(C.LUCY_InStream_Read_C32(datInstream)) buf := ((*C.char)(C.malloc(valueLen))) C.LUCY_InStream_Read_Bytes(datInstream, buf, valueLen) - value = ((*C.cfish_Obj)(C.cfish_Blob_new_steal(buf, valueLen))) + val := C.GoBytes(unsafe.Pointer(buf), C.int(valueLen)) + fields[fieldNameGo] = val case C.lucy_FType_FLOAT32: - value = ((*C.cfish_Obj)(C.cfish_Float_new(C.double(C.LUCY_InStream_Read_F32(datInstream))))) + fields[fieldNameGo] = float32(C.LUCY_InStream_Read_F32(datInstream)) case C.lucy_FType_FLOAT64: - value = ((*C.cfish_Obj)(C.cfish_Float_new(C.LUCY_InStream_Read_F64(datInstream)))) + fields[fieldNameGo] = float64(C.LUCY_InStream_Read_F64(datInstream)) case C.lucy_FType_INT32: - value = ((*C.cfish_Obj)(C.cfish_Int_new(C.int64_t(C.LUCY_InStream_Read_C32(datInstream))))) + fields[fieldNameGo] = int32(C.LUCY_InStream_Read_C32(datInstream)) case C.lucy_FType_INT64: - value = ((*C.cfish_Obj)(C.cfish_Int_new(C.int64_t(C.LUCY_InStream_Read_C64(datInstream))))) + fields[fieldNameGo] = int32(C.LUCY_InStream_Read_C64(datInstream)) default: - value = nil panic(clownfish.NewErr("Internal Lucy error: bad type id for field " + - C.GoStringN(fieldName, C.int(fieldNameLen)))) + fieldNameGo)) } - - // Store the value. - C.CFISH_Hash_Store_Utf8(fields, fieldName, fieldNameLen, value) } C.free(unsafe.Pointer(fieldName)) - fieldsID := registry.store(clownfish.WRAPAny(unsafe.Pointer(fields))) + fieldsID := registry.store(fields) retval := C.lucy_HitDoc_new(unsafe.Pointer(fieldsID), docID, 0.0) return retval } @@ -441,15 +455,7 @@ func GOLUCY_Inverter_Invert_Doc(inverter *C.lucy_Inverter, doc *C.lucy_Doc) { C.LUCY_Inverter_Set_Doc(inverter, doc) // Extract and invert the doc's fields. - iter := C.cfish_HashIter_new(fields) - for C.CFISH_HashIter_Next(iter) { - field := C.CFISH_HashIter_Get_Key(iter) - obj := C.CFISH_HashIter_Get_Value(iter) - if obj == nil { - mess := "Invalid nil value for field" + clownfish.CFStringToGo(unsafe.Pointer(field)) - panic(clownfish.NewErr(mess)) - } - + for field, val := range(fields) { inventry := fetchEntry(ivars, field) inventryIvars := C.lucy_InvEntry_IVARS(inventry) fieldType := inventryIvars._type @@ -470,22 +476,13 @@ func GOLUCY_Inverter_Invert_Doc(inverter *C.lucy_Inverter, doc *C.lucy_Doc) { case C.lucy_FType_FLOAT64: expectedType = C.CFISH_FLOAT default: - panic(clownfish.NewErr("Internal Lucy error: bad type id for field " + - clownfish.CFStringToGo(unsafe.Pointer(field)))) - } - if !C.cfish_Obj_is_a(obj, expectedType) { - className := C.cfish_Obj_get_class_name((*C.cfish_Obj)(unsafe.Pointer(fieldType))) - mess := fmt.Sprintf("Invalid type for field '%s': '%s'", - clownfish.CFStringToGo(unsafe.Pointer(field)), - clownfish.CFStringToGo(unsafe.Pointer(className))) - panic(clownfish.NewErr(mess)) - } - if inventryIvars.value != obj { - C.cfish_decref(unsafe.Pointer(inventryIvars.value)) - inventryIvars.value = C.cfish_inc_refcount(unsafe.Pointer(obj)) + panic(clownfish.NewErr("Internal Lucy error: bad type id for field " + field)) } + temp := inventryIvars.value + valCF := clownfish.GoToClownfish(val, unsafe.Pointer(expectedType), false) + inventryIvars.value = C.cfish_inc_refcount(valCF) + C.cfish_decref(unsafe.Pointer(temp)) C.LUCY_Inverter_Add_Field(inverter, inventry) } - C.cfish_dec_refcount(unsafe.Pointer(iter)) } diff --git a/go/lucy/search.go b/go/lucy/search.go index 7f6ce76df..ed1b5e489 100644 --- a/go/lucy/search.go +++ b/go/lucy/search.go @@ -150,19 +150,14 @@ func (obj *HitsIMP) Next(hit interface{}) bool { defer C.cfish_dec_refcount(unsafe.Pointer(docC)) fields := fetchDocFields((*C.lucy_Doc)(unsafe.Pointer(docC))) - iterator := C.cfish_HashIter_new(fields) - defer C.cfish_dec_refcount(unsafe.Pointer(iterator)) - for C.CFISH_HashIter_Next(iterator) { - keyC := C.CFISH_HashIter_Get_Key(iterator) - valC := C.CFISH_HashIter_Get_Value(iterator) - key := clownfish.CFStringToGo(unsafe.Pointer(keyC)) - val := clownfish.CFStringToGo(unsafe.Pointer(valC)) + for key, val := range fields { + stringVal := val.(string) // TODO type switch match := func(name string) bool { return strings.EqualFold(key, name) } structField := hitValue.FieldByNameFunc(match) if structField != (reflect.Value{}) { - structField.SetString(val) + structField.SetString(stringVal) } } return true From 2bec52c1c8314c68a0687b09a088406ddce3211c Mon Sep 17 00:00:00 2001 From: Marvin Humphrey Date: Fri, 18 Sep 2015 18:59:27 -0700 Subject: [PATCH 4/8] Allow multiple inputs to Indexer's AddDoc. * Doc object * Go struct * map[string]interface{} --- go/lucy/index.go | 80 ++++++++++++++++++++++++++++++++----------- go/lucy/index_test.go | 39 +++++++++++++++++++++ 2 files changed, 99 insertions(+), 20 deletions(-) create mode 100644 go/lucy/index_test.go diff --git a/go/lucy/index.go b/go/lucy/index.go index 1662d3e78..0ec4c9795 100644 --- a/go/lucy/index.go +++ b/go/lucy/index.go @@ -70,55 +70,95 @@ func (obj *IndexerIMP) Close() error { return nil // TODO catch errors } -func (obj *IndexerIMP) AddDoc(doc interface{}) error { - self := ((*C.lucy_Indexer)(unsafe.Pointer(obj.TOPTR()))) - stockDoc := C.LUCY_Indexer_Get_Stock_Doc(self) - docFields := fetchDocFields(stockDoc) +func (obj *IndexerIMP) addDocObj(doc Doc, boost float32) error { + self := (*C.lucy_Indexer)(clownfish.Unwrap(obj, "obj")) + d := (*C.lucy_Doc)(clownfish.Unwrap(doc, "doc")) + return clownfish.TrapErr(func() { + C.LUCY_Indexer_Add_Doc(self, d, C.float(boost)) + }) +} + +func (obj *IndexerIMP) addMapAsDoc(doc map[string]interface{}, boost float32) error { + self := (*C.lucy_Indexer)(clownfish.Unwrap(obj, "obj")) + d := C.LUCY_Indexer_Get_Stock_Doc(self) + docFields := fetchDocFields(d) for field := range docFields { delete(docFields, field) } + for key, value := range doc { + field, err := obj.findRealField(key) + if err != nil { + return err + } + docFields[field] = value + } + return clownfish.TrapErr(func() { + C.LUCY_Indexer_Add_Doc(self, d, C.float(boost)) + }) +} - // TODO: Support map as doc in addition to struct as doc. +func (obj *IndexerIMP) addStructAsDoc(doc interface{}, boost float32) error { + self := (*C.lucy_Indexer)(clownfish.Unwrap(obj, "obj")) + d := C.LUCY_Indexer_Get_Stock_Doc(self) + docFields := fetchDocFields(d) + for field := range docFields { + delete(docFields, field) + } // Get reflection value and type for the supplied struct. var docValue reflect.Value + var success bool if reflect.ValueOf(doc).Kind() == reflect.Ptr { temp := reflect.ValueOf(doc).Elem() if temp.Kind() == reflect.Struct { docValue = temp + success = true } } - if docValue == (reflect.Value{}) { - mess := fmt.Sprintf("Doc not struct pointer: %v", - reflect.TypeOf(doc)) + if !success { + mess := fmt.Sprintf("Unexpected type for doc: %t", doc) return clownfish.NewErr(mess) } - docType := docValue.Type() + // Copy field values into stockDoc. + docType := docValue.Type() for i := 0; i < docValue.NumField(); i++ { field := docType.Field(i).Name value := docValue.Field(i).String() - realField := obj.findRealField(field) + realField, err := obj.findRealField(field) + if err != nil { + return err + } docFields[realField] = value } + return clownfish.TrapErr(func() { + C.LUCY_Indexer_Add_Doc(self, d, C.float(boost)) + }) +} + +func (obj *IndexerIMP) AddDoc(doc interface{}) error { // TODO create an additional method AddDocWithBoost which allows the // client to supply `boost`. - boost := 1.0 - err := clownfish.TrapErr(func() { - C.LUCY_Indexer_Add_Doc(self, stockDoc, C.float(boost)) - }) + boost := float32(1.0) - return err + if suppliedDoc, ok := doc.(Doc); ok { + return obj.addDocObj(suppliedDoc, boost) + } else if m, ok := doc.(map[string]interface{}); ok { + return obj.addMapAsDoc(m, boost) + } else { + return obj.addStructAsDoc(doc, boost) + } } -func (obj *IndexerIMP) findRealField(name string) string { +func (obj *IndexerIMP) findRealField(name string) (string, error) { self := ((*C.lucy_Indexer)(unsafe.Pointer(obj.TOPTR()))) if obj.fieldNames == nil { obj.fieldNames = make(map[string]string) } - f, ok := obj.fieldNames[name] - if !ok { + if field, ok := obj.fieldNames[name]; ok { + return field, nil + } else { schema := C.LUCY_Indexer_Get_Schema(self) fieldList := C.LUCY_Schema_All_Fields(schema) defer C.cfish_dec_refcount(unsafe.Pointer(fieldList)) @@ -126,12 +166,12 @@ func (obj *IndexerIMP) findRealField(name string) string { cfString := unsafe.Pointer(C.CFISH_Vec_Fetch(fieldList, C.size_t(i))) field := clownfish.CFStringToGo(cfString) if strings.EqualFold(name, field) { - f = field obj.fieldNames[name] = field + return field, nil } } } - return f + return "", clownfish.NewErr(fmt.Sprintf("Unknown field: '%v'", name)) } func (obj *IndexerIMP) Commit() error { diff --git a/go/lucy/index_test.go b/go/lucy/index_test.go new file mode 100644 index 000000000..5967a317a --- /dev/null +++ b/go/lucy/index_test.go @@ -0,0 +1,39 @@ +/* Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package lucy + +import "testing" + +func TestIndexerAddDoc(t *testing.T) { + schema := createTestSchema() + index := NewRAMFolder("") + indexer, _ := OpenIndexer(&OpenIndexerArgs{ + Create: true, + Index: index, + Schema: schema, + }) + indexer.AddDoc(&testDoc{Content: "foo"}) + indexer.AddDoc(map[string]interface{}{"content": "foo"}) + doc := NewDoc(0) + doc.Store("content", "foo") + indexer.AddDoc(doc) + indexer.Commit() + searcher, _ := OpenIndexSearcher(index) + if got := searcher.DocFreq("content", "foo"); got != 3 { + t.Errorf("Didn't index all docs -- DocMax: %d", got) + } +} From 776c9aab8bd775d7badb4fbfa50a4df3d731a239 Mon Sep 17 00:00:00 2001 From: Marvin Humphrey Date: Tue, 22 Sep 2015 19:13:55 -0700 Subject: [PATCH 5/8] Refactor DocReader to accept diff doc types. Prepare to read into something other than a Doc object from DefaultDocReader. --- go/lucy/lucy.go | 112 ++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 98 insertions(+), 14 deletions(-) diff --git a/go/lucy/lucy.go b/go/lucy/lucy.go index 36868bd88..6a61cb154 100644 --- a/go/lucy/lucy.go +++ b/go/lucy/lucy.go @@ -182,6 +182,7 @@ null_terminate_string(char *string, size_t len) { import "C" import "unsafe" import "fmt" +import "strings" import "regexp" import "reflect" import "git-wip-us.apache.org/repos/asf/lucy-clownfish.git/runtime/go/clownfish" @@ -376,16 +377,72 @@ func fetchEntry(ivars *C.lucy_InverterIVARS, fieldGo string) *C.lucy_InverterEnt return (*C.lucy_InverterEntry)(unsafe.Pointer(entry)) } -//export GOLUCY_DefDocReader_Fetch_Doc -func GOLUCY_DefDocReader_Fetch_Doc(ddr *C.lucy_DefaultDocReader, - docID C.int32_t) *C.lucy_HitDoc { - ivars := C.lucy_DefDocReader_IVARS(ddr) +func setMapField(store interface{}, field string, val interface{}) error { + m := store.(map[string]interface{}) + m[field] = val + return nil +} + +func setStructField(store interface{}, field string, val interface{}) error { + structStore := store.(reflect.Value) + stringVal := val.(string) // TODO type switch + match := func(name string) bool { + return strings.EqualFold(field, name) + } + structField := structStore.FieldByNameFunc(match) + if structField != (reflect.Value{}) { // TODO require match? + structField.SetString(stringVal) + } + return nil +} + +func doReadDocData(ddrC *C.lucy_DefaultDocReader, docID int32, doc interface{}) error { + + // Adapt for different types of "doc". + var setField func(interface{}, string, interface{}) error + var fields interface{} + switch v := doc.(type) { + case Doc: + docC := (*C.lucy_Doc)(clownfish.Unwrap(v, "doc")) + fieldsMap := fetchDocFields(docC) + for field, _ := range fieldsMap { + delete(fieldsMap, field) + } + fields = fieldsMap + setField = setMapField + case map[string]interface{}: + for field, _ := range v { + delete(v, field) + } + fields = v + setField = setMapField + default: + // Get reflection value and type for the supplied struct. + var hitValue reflect.Value + if reflect.ValueOf(doc).Kind() == reflect.Ptr { + temp := reflect.ValueOf(doc).Elem() + if temp.Kind() == reflect.Struct { + if temp.CanSet() { + hitValue = temp + } + } + } + if hitValue == (reflect.Value{}) { + mess := fmt.Sprintf("Arg not writeable struct pointer: %v", + reflect.TypeOf(doc)) + return clownfish.NewErr(mess) + } + fields = hitValue + setField = setStructField + } + + ivars := C.lucy_DefDocReader_IVARS(ddrC) schema := ivars.schema datInstream := ivars.dat_in ixInstream := ivars.ix_in - fields := make(map[string]interface{}) fieldNameCap := C.size_t(31) var fieldName *C.char = ((*C.char)(C.malloc(fieldNameCap + 1))) + defer C.free(unsafe.Pointer(fieldName)) // Get data file pointer from index, read number of fields. C.LUCY_InStream_Seek(ixInstream, C.int64_t(docID*8)) @@ -419,28 +476,55 @@ func GOLUCY_DefDocReader_Fetch_Doc(ddr *C.lucy_DefaultDocReader, buf := ((*C.char)(C.malloc(valueLen + 1))) C.LUCY_InStream_Read_Bytes(datInstream, buf, valueLen) val := C.GoStringN(buf, C.int(valueLen)) - fields[fieldNameGo] = val + err := setField(fields, fieldNameGo, val) + if err != nil { + return err + } case C.lucy_FType_BLOB: valueLen := C.size_t(C.LUCY_InStream_Read_C32(datInstream)) buf := ((*C.char)(C.malloc(valueLen))) C.LUCY_InStream_Read_Bytes(datInstream, buf, valueLen) val := C.GoBytes(unsafe.Pointer(buf), C.int(valueLen)) - fields[fieldNameGo] = val + err := setField(fields, fieldNameGo, val) + if err != nil { + return err + } case C.lucy_FType_FLOAT32: - fields[fieldNameGo] = float32(C.LUCY_InStream_Read_F32(datInstream)) + err := setField(fields, fieldNameGo, float32(C.LUCY_InStream_Read_F32(datInstream))) + if err != nil { + return err + } case C.lucy_FType_FLOAT64: - fields[fieldNameGo] = float64(C.LUCY_InStream_Read_F64(datInstream)) + err := setField(fields, fieldNameGo, float64(C.LUCY_InStream_Read_F64(datInstream))) + if err != nil { + return err + } case C.lucy_FType_INT32: - fields[fieldNameGo] = int32(C.LUCY_InStream_Read_C32(datInstream)) + err := setField(fields, fieldNameGo, int32(C.LUCY_InStream_Read_C32(datInstream))) + if err != nil { + return err + } case C.lucy_FType_INT64: - fields[fieldNameGo] = int32(C.LUCY_InStream_Read_C64(datInstream)) + err := setField(fields, fieldNameGo, int64(C.LUCY_InStream_Read_C64(datInstream))) + if err != nil { + return err + } default: - panic(clownfish.NewErr("Internal Lucy error: bad type id for field " + - fieldNameGo)) + return clownfish.NewErr( + "Internal Lucy error: bad type id for field " + fieldNameGo) } } - C.free(unsafe.Pointer(fieldName)) + return nil +} +//export GOLUCY_DefDocReader_Fetch_Doc +func GOLUCY_DefDocReader_Fetch_Doc(ddr *C.lucy_DefaultDocReader, + docID C.int32_t) *C.lucy_HitDoc { + fields := make(map[string]interface{}) + err := doReadDocData(ddr, int32(docID), fields) + if err != nil { + panic(err) + } fieldsID := registry.store(fields) retval := C.lucy_HitDoc_new(unsafe.Pointer(fieldsID), docID, 0.0) return retval From 34ffe42cbc21d248508c76c3bfdd081eb48b23f9 Mon Sep 17 00:00:00 2001 From: Marvin Humphrey Date: Fri, 25 Sep 2015 19:59:06 -0700 Subject: [PATCH 6/8] Private readDoc for DocReader Go bindings. Make it possible to supply multiple types to DocReader. --- go/lucy/lucy.go | 40 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) diff --git a/go/lucy/lucy.go b/go/lucy/lucy.go index 6a61cb154..595f777b5 100644 --- a/go/lucy/lucy.go +++ b/go/lucy/lucy.go @@ -25,6 +25,7 @@ package lucy #define C_LUCY_DEFAULTDOCREADER #define C_LUCY_INVERTER #define C_LUCY_INVERTERENTRY +#define C_LUCY_POLYDOCREADER #include "lucy_parcel.h" #include "Lucy/Analysis/RegexTokenizer.h" @@ -46,9 +47,12 @@ package lucy #include "Lucy/Document/HitDoc.h" #include "Lucy/Plan/FieldType.h" #include "Lucy/Plan/Schema.h" +#include "Lucy/Index/DocReader.h" +#include "Lucy/Index/PolyReader.h" #include "Lucy/Index/Segment.h" #include "Lucy/Store/InStream.h" #include "Lucy/Store/OutStream.h" +#include "Lucy/Object/I32Array.h" #include "Lucy/Util/Freezer.h" extern lucy_RegexTokenizer* @@ -377,6 +381,42 @@ func fetchEntry(ivars *C.lucy_InverterIVARS, fieldGo string) *C.lucy_InverterEnt return (*C.lucy_InverterEntry)(unsafe.Pointer(entry)) } +func fetchDocFromDocReader(dr DocReader, docID int32, doc interface{}) error { + switch v := dr.(type) { + case *DefaultDocReaderIMP: + return v.readDoc(docID, doc) + case *PolyDocReaderIMP: + return v.readDoc(docID, doc) + default: + panic(clownfish.NewErr(fmt.Sprintf("Unexpected type: %T", v))) + } +} + +func (pdr *PolyDocReaderIMP) readDoc(docID int32, doc interface{}) error { + self := (*C.lucy_PolyDocReader)(clownfish.Unwrap(pdr, "pdr")) + ivars := C.lucy_PolyDocReader_IVARS(self) + segTick := C.lucy_PolyReader_sub_tick(ivars.offsets, C.int32_t(docID)) + offset := C.LUCY_I32Arr_Get(ivars.offsets, segTick) + defDocReader := (*C.lucy_DefaultDocReader)(C.CFISH_Vec_Fetch(ivars.readers, C.size_t(segTick))) + if (defDocReader == nil) { + return clownfish.NewErr(fmt.Sprintf("Invalid docID: %d", docID)) + } + if !C.cfish_Obj_is_a((*C.cfish_Obj)(unsafe.Pointer(defDocReader)), C.LUCY_DEFAULTDOCREADER) { + panic(clownfish.NewErr("Unexpected type")) // sanity check + } + adjustedDocID := docID - int32(offset) + err := doReadDocData(defDocReader, adjustedDocID, doc) + if docDoc, ok := doc.(Doc); ok { + docDoc.SetDocID(docID) + } + return err +} + +func (ddr *DefaultDocReaderIMP) readDoc(docID int32, doc interface{}) error { + self := (*C.lucy_DefaultDocReader)(clownfish.Unwrap(ddr, "ddr")) + return doReadDocData(self, docID, doc) +} + func setMapField(store interface{}, field string, val interface{}) error { m := store.(map[string]interface{}) m[field] = val From 853cf863a11be2d6543c90ad5830504081747c0f Mon Sep 17 00:00:00 2001 From: Marvin Humphrey Date: Fri, 25 Sep 2015 19:59:28 -0700 Subject: [PATCH 7/8] Add ReadDoc to Searcher Go bindings. Support multiple document types when fetching document data via a Searcher. --- go/build.go | 1 + go/lucy/search.go | 20 ++++++++++++++++++++ go/lucy/search_test.go | 30 ++++++++++++++++++++++++++++++ 3 files changed, 51 insertions(+) diff --git a/go/build.go b/go/build.go index 15fbd212a..e250aebde 100644 --- a/go/build.go +++ b/go/build.go @@ -152,6 +152,7 @@ func specClasses(parcel *cfc.Parcel) { searcherBinding.SpecMethod("Hits", "Hits(query interface{}, offset uint32, numWanted uint32, sortSpec SortSpec) (Hits, error)") searcherBinding.SpecMethod("Close", "Close() error") + searcherBinding.SpecMethod("", "ReadDoc(int32, interface{}) error") searcherBinding.Register() hitsBinding := cfc.NewGoClass(parcel, "Lucy::Search::Hits") diff --git a/go/lucy/search.go b/go/lucy/search.go index ed1b5e489..514c2eb5f 100644 --- a/go/lucy/search.go +++ b/go/lucy/search.go @@ -32,6 +32,7 @@ package lucy #include "Lucy/Search/SortSpec.h" #include "Lucy/Search/TopDocs.h" #include "Lucy/Document/HitDoc.h" +#include "Lucy/Index/IndexReader.h" #include "LucyX/Search/MockMatcher.h" #include "Clownfish/Blob.h" #include "Clownfish/Hash.h" @@ -76,6 +77,25 @@ func (obj *IndexSearcherIMP) Hits(query interface{}, offset uint32, numWanted ui return doHits(obj, query, offset, numWanted, sortSpec) } +// Read data into the supplied doc. +func (s *SearcherIMP) ReadDoc(docID int32, doc interface{}) error { + self := (*C.lucy_Searcher)(clownfish.Unwrap(s, "s")) + class := C.cfish_Obj_get_class((*C.cfish_Obj)(unsafe.Pointer(self))) + if class == C.LUCY_INDEXSEARCHER { + ixReader := C.LUCY_IxSearcher_Get_Reader((*C.lucy_IndexSearcher)(unsafe.Pointer(self))) + cfStr := (*C.cfish_String)(clownfish.GoToClownfish("Lucy::Index::DocReader", unsafe.Pointer(C.CFISH_STRING), false)) + defer C.cfish_decref(unsafe.Pointer(cfStr)) + docReader := C.LUCY_IxReader_Fetch(ixReader, cfStr) + if docReader == nil { + return clownfish.NewErr("No DocReader available") + } + docReaderGo := clownfish.WRAPAny(unsafe.Pointer(C.cfish_incref(unsafe.Pointer(docReader)))).(DocReader) + return fetchDocFromDocReader(docReaderGo, docID, doc) + } else { + return clownfish.NewErr("Support for ReadDoc not implemented") + } +} + func doClose(obj Searcher) error { self := ((*C.lucy_Searcher)(unsafe.Pointer(obj.TOPTR()))) return clownfish.TrapErr(func() { diff --git a/go/lucy/search_test.go b/go/lucy/search_test.go index b28147709..d0668ae11 100644 --- a/go/lucy/search_test.go +++ b/go/lucy/search_test.go @@ -623,6 +623,36 @@ func TestIndexSearcherTopDocs(t *testing.T) { } } +func TestIndexSearcherReadDoc(t *testing.T) { + index := createTestIndex("a", "b") + searcher, _ := OpenIndexSearcher(index) + docDoc := NewHitDoc(0, -1.0) + docStruct := &simpleTestDoc{} + docMap := make(map[string]interface{}) + var err error + err = searcher.ReadDoc(2, docDoc) + if err != nil { + t.Errorf("ReadDoc failed with HitDoc: %v", err) + } + err = searcher.ReadDoc(2, docStruct) + if err != nil { + t.Errorf("ReadDoc failed with struct: %v", err) + } + err = searcher.ReadDoc(2, docMap) + if err != nil { + t.Errorf("ReadDoc failed with map: %v", err) + } + if docDoc.Extract("content").(string) != "b" { + t.Error("Next with Doc object yielded bad data") + } + if docStruct.Content != "b" { + t.Error("Next with struct yielded bad data") + } + if docMap["content"].(string) != "b" { + t.Error("Next with map yielded bad data") + } +} + func TestMatchDocBasics(t *testing.T) { matchDoc := NewMatchDoc(0, 1.0, nil) matchDoc.SetDocID(42) From 1b97823de7df7a09826d16070d72dd1922b73072 Mon Sep 17 00:00:00 2001 From: Marvin Humphrey Date: Fri, 25 Sep 2015 20:01:24 -0700 Subject: [PATCH 8/8] Allow diff doc types in Hits.Next() Go binding. Allow 3 different types of object to retrieve fields via Next() * Go struct * Lucy HitDoc * map[string]interface{} --- go/lucy/search.go | 73 +++++++++++++++++------------------------- go/lucy/search_test.go | 24 ++++++++++++++ 2 files changed, 53 insertions(+), 44 deletions(-) diff --git a/go/lucy/search.go b/go/lucy/search.go index 514c2eb5f..a8bab6f55 100644 --- a/go/lucy/search.go +++ b/go/lucy/search.go @@ -17,6 +17,9 @@ package lucy /* + +#define C_LUCY_HITS + #include "Lucy/Search/Collector.h" #include "Lucy/Search/Collector/SortCollector.h" #include "Lucy/Search/Hits.h" @@ -26,6 +29,7 @@ package lucy #include "Lucy/Search/ANDQuery.h" #include "Lucy/Search/ORQuery.h" #include "Lucy/Search/ANDMatcher.h" +#include "Lucy/Search/MatchDoc.h" #include "Lucy/Search/ORMatcher.h" #include "Lucy/Search/SeriesMatcher.h" #include "Lucy/Search/SortRule.h" @@ -46,9 +50,6 @@ float32_set(float *floats, size_t i, float value) { */ import "C" -import "fmt" -import "reflect" -import "strings" import "unsafe" import "git-wip-us.apache.org/repos/asf/lucy-clownfish.git/runtime/go/clownfish" @@ -135,52 +136,36 @@ func (obj *PolySearcherIMP) Hits(query interface{}, offset uint32, numWanted uin return doHits(obj, query, offset, numWanted, sortSpec) } -func (obj *HitsIMP) Next(hit interface{}) bool { - self := ((*C.lucy_Hits)(unsafe.Pointer(obj.TOPTR()))) - // TODO: accept a HitDoc object and populate score. - - // Get reflection value and type for the supplied struct. - var hitValue reflect.Value - if reflect.ValueOf(hit).Kind() == reflect.Ptr { - temp := reflect.ValueOf(hit).Elem() - if temp.Kind() == reflect.Struct { - if temp.CanSet() { - hitValue = temp - } - } - } - if hitValue == (reflect.Value{}) { - mess := fmt.Sprintf("Arg not writeable struct pointer: %v", - reflect.TypeOf(hit)) - obj.err = clownfish.NewErr(mess) - return false - } +type setScorer interface { + SetScore(float32) +} - var docC *C.lucy_HitDoc - errCallingNext := clownfish.TrapErr(func() { - docC = C.LUCY_Hits_Next(self) - }) - if errCallingNext != nil { - obj.err = errCallingNext - return false - } - if docC == nil { - return false - } - defer C.cfish_dec_refcount(unsafe.Pointer(docC)) +func (h *HitsIMP) Next(hit interface{}) bool { + self := (*C.lucy_Hits)(clownfish.Unwrap(h, "h")) + ivars := C.lucy_Hits_IVARS(self) + matchDoc := (*C.lucy_MatchDoc)(unsafe.Pointer( + C.CFISH_Vec_Fetch(ivars.match_docs, C.size_t(ivars.offset)))) + ivars.offset += 1 - fields := fetchDocFields((*C.lucy_Doc)(unsafe.Pointer(docC))) - for key, val := range fields { - stringVal := val.(string) // TODO type switch - match := func(name string) bool { - return strings.EqualFold(key, name) + if matchDoc == nil { + // Bail if there aren't any more *captured* hits. (There may be + // more total hits.) + return false + } else { + // Lazily fetch HitDoc, set score. + searcher := clownfish.WRAPAny(unsafe.Pointer(C.cfish_incref( + unsafe.Pointer(ivars.searcher)))).(Searcher) + docID := int32(C.LUCY_MatchDoc_Get_Doc_ID(matchDoc)) + err := searcher.ReadDoc(docID, hit) + if err != nil { + h.err = err + return false } - structField := hitValue.FieldByNameFunc(match) - if structField != (reflect.Value{}) { - structField.SetString(stringVal) + if ss, ok := hit.(setScorer); ok { + ss.SetScore(float32(C.LUCY_MatchDoc_Get_Score(matchDoc))) } + return true } - return true } func (obj *HitsIMP) Error() error { diff --git a/go/lucy/search_test.go b/go/lucy/search_test.go index d0668ae11..ae4c35275 100644 --- a/go/lucy/search_test.go +++ b/go/lucy/search_test.go @@ -438,6 +438,30 @@ func TestHitsBasics(t *testing.T) { } } +func TestHitsNext(t *testing.T) { + index := createTestIndex("a x", "a y", "a z", "b") + searcher, _ := OpenIndexSearcher(index) + hits, _ := searcher.Hits("a", 0, 10, nil) + docDoc := NewHitDoc(0, -1.0) + docStruct := &simpleTestDoc{} + docMap := make(map[string]interface{}) + if !hits.Next(docDoc) || !hits.Next(docStruct) || !hits.Next(docMap) { + t.Errorf("Hits.Next returned false: %v", hits.Error()) + } + if hits.Next(&simpleTestDoc{}) { + t.Error("Hits iterator should be exhausted"); + } + if docDoc.Extract("content").(string) != "a x" { + t.Error("Next with Doc object yielded bad data") + } + if docStruct.Content != "a y" { + t.Error("Next with struct yielded bad data") + } + if docMap["content"].(string) != "a z" { + t.Error("Next with map yielded bad data") + } +} + func TestSortSpecBasics(t *testing.T) { folder := NewRAMFolder("") schema := NewSchema()