From d6fdddb0d4fa626c82c2a50bad04165c5f4b4d66 Mon Sep 17 00:00:00 2001 From: Nick Wellnhofer Date: Sun, 27 Sep 2015 00:22:12 +0200 Subject: [PATCH 1/7] Rewrite TextTermStepper Switch type of ivars->value from CharBuf to String. Before: ivars->value => CharBuf ivars->string => String After: ivars->value => String ivars->charbuf => CharBuf Only accept Strings in Set_Value and Write_Key_Frame (which erroneously assumed CharBufs before) and optimize accordingly. Only accept CharBufs in Write_Delta for now. --- core/Lucy/Plan/TextType.c | 108 +++++++++++++++++------------------- core/Lucy/Plan/TextType.cfh | 2 +- 2 files changed, 53 insertions(+), 57 deletions(-) diff --git a/core/Lucy/Plan/TextType.c b/core/Lucy/Plan/TextType.c index a27020a5b..a9f66b245 100644 --- a/core/Lucy/Plan/TextType.c +++ b/core/Lucy/Plan/TextType.c @@ -49,83 +49,78 @@ TextTermStepper* TextTermStepper_init(TextTermStepper *self) { TermStepper_init((TermStepper*)self); TextTermStepperIVARS *const ivars = TextTermStepper_IVARS(self); - ivars->value = (Obj*)CB_new(0); - ivars->string = NULL; + ivars->charbuf = CB_new(0); return self; } void TextTermStepper_Destroy_IMP(TextTermStepper *self) { TextTermStepperIVARS *const ivars = TextTermStepper_IVARS(self); - DECREF(ivars->string); + DECREF(ivars->charbuf); SUPER_DESTROY(self, TEXTTERMSTEPPER); } +static void +S_set_value(TextTermStepper *self, Obj *value) { + TextTermStepperIVARS *const ivars = TextTermStepper_IVARS(self); + if (ivars->value != value) { + DECREF(ivars->value); + ivars->value = INCREF(value); + } +} + void TextTermStepper_Set_Value_IMP(TextTermStepper *self, Obj *value) { - TextTermStepperIVARS *const ivars = TextTermStepper_IVARS(self); - CERTIFY(value, STRING); - CB_Mimic((CharBuf*)ivars->value, value); - // Invalidate string. - DECREF(ivars->string); - ivars->string = NULL; + S_set_value(self, CERTIFY(value, STRING)); } Obj* TextTermStepper_Get_Value_IMP(TextTermStepper *self) { TextTermStepperIVARS *const ivars = TextTermStepper_IVARS(self); - if (ivars->string == NULL) { - ivars->string = CB_To_String((CharBuf*)ivars->value); + if (ivars->value == NULL) { + ivars->value = (Obj*)CB_To_String(ivars->charbuf); } - return (Obj*)ivars->string; + return ivars->value; } void TextTermStepper_Reset_IMP(TextTermStepper *self) { TextTermStepperIVARS *const ivars = TextTermStepper_IVARS(self); - CB_Set_Size((CharBuf*)ivars->value, 0); - // Invalidate string. - DECREF(ivars->string); - ivars->string = NULL; + DECREF(ivars->value); + ivars->value = NULL; + CB_Set_Size(ivars->charbuf, 0); } void TextTermStepper_Write_Key_Frame_IMP(TextTermStepper *self, OutStream *outstream, Obj *value) { - TextTermStepperIVARS *const ivars = TextTermStepper_IVARS(self); - CharBuf *charbuf = (CharBuf*)ivars->value; - CB_Mimic(charbuf, value); - const char *buf = CB_Get_Ptr8(charbuf); - size_t size = CB_Get_Size(charbuf); + String *string = (String*)CERTIFY(value, STRING); + const char *buf = Str_Get_Ptr8(string); + size_t size = Str_Get_Size(string); OutStream_Write_C32(outstream, size); OutStream_Write_Bytes(outstream, buf, size); - // Invalidate string. - DECREF(ivars->string); - ivars->string = NULL; + + S_set_value(self, value); } void TextTermStepper_Write_Delta_IMP(TextTermStepper *self, OutStream *outstream, Obj *value) { TextTermStepperIVARS *const ivars = TextTermStepper_IVARS(self); - CharBuf *charbuf = (CharBuf*)ivars->value; - const char *last_text = CB_Get_Ptr8(charbuf); - size_t last_size = CB_Get_Size(charbuf); - const char *new_text = NULL; - size_t new_size = 0; - - if (Obj_is_a(value, STRING)) { - String *new_string = (String*)value; - new_text = Str_Get_Ptr8(new_string); - new_size = Str_Get_Size(new_string); - } - else if (Obj_is_a(value, CHARBUF)) { - CharBuf *new_charbuf = (CharBuf*)value; - new_text = CB_Get_Ptr8(new_charbuf); - new_size = CB_Get_Size(new_charbuf); + CharBuf *charbuf = (CharBuf*)CERTIFY(value, CHARBUF); + const char *new_text = CB_Get_Ptr8(charbuf); + size_t new_size = CB_Get_Size(charbuf); + + const char *last_text; + size_t last_size; + if (ivars->value) { + String *last_string = (String*)ivars->value; + last_text = Str_Get_Ptr8(last_string); + last_size = Str_Get_Size(last_string); } else { - THROW(ERR, "'value' must be a String or CharBuf"); + last_text = CB_Get_Ptr8(ivars->charbuf); + last_size = CB_Get_Size(ivars->charbuf); } // Count how many bytes the strings share at the top. @@ -139,11 +134,11 @@ TextTermStepper_Write_Delta_IMP(TextTermStepper *self, OutStream *outstream, OutStream_Write_String(outstream, diff_start_str, diff_len); // Update value. - CB_Mimic_Utf8(charbuf, new_text, new_size); + CB_Mimic_Utf8(ivars->charbuf, new_text, new_size); - // Invalidate string. - DECREF(ivars->string); - ivars->string = NULL; + // Invalidate string value. + DECREF(ivars->value); + ivars->value = NULL; } void @@ -153,12 +148,11 @@ TextTermStepper_Read_Key_Frame_IMP(TextTermStepper *self, const uint32_t text_len = InStream_Read_C32(instream); // Allocate space. - CharBuf *charbuf = (CharBuf*)ivars->value; - char *ptr = CB_Grow(charbuf, text_len); + char *ptr = CB_Grow(ivars->charbuf, text_len); // Set the value text. InStream_Read_Bytes(instream, ptr, text_len); - CB_Set_Size(charbuf, text_len); + CB_Set_Size(ivars->charbuf, text_len); if (!StrHelp_utf8_valid(ptr, text_len)) { THROW(ERR, "Invalid UTF-8 sequence in '%o' at byte %i64", InStream_Get_Filename(instream), @@ -168,9 +162,9 @@ TextTermStepper_Read_Key_Frame_IMP(TextTermStepper *self, // Null-terminate. ptr[text_len] = '\0'; - // Invalidate string. - DECREF(ivars->string); - ivars->string = NULL; + // Invalidate string value. + DECREF(ivars->value); + ivars->value = NULL; } void @@ -181,12 +175,14 @@ TextTermStepper_Read_Delta_IMP(TextTermStepper *self, InStream *instream) { const uint32_t total_text_len = text_overlap + finish_chars_len; // Allocate space. - CharBuf *charbuf = (CharBuf*)ivars->value; - char *ptr = CB_Grow(charbuf, total_text_len); + if (ivars->value) { + CB_Mimic(ivars->charbuf, ivars->value); + } + char *ptr = CB_Grow(ivars->charbuf, total_text_len); // Set the value text. InStream_Read_Bytes(instream, ptr + text_overlap, finish_chars_len); - CB_Set_Size(charbuf, total_text_len); + CB_Set_Size(ivars->charbuf, total_text_len); if (!StrHelp_utf8_valid(ptr, total_text_len)) { THROW(ERR, "Invalid UTF-8 sequence in '%o' at byte %i64", InStream_Get_Filename(instream), @@ -196,9 +192,9 @@ TextTermStepper_Read_Delta_IMP(TextTermStepper *self, InStream *instream) { // Null-terminate. ptr[total_text_len] = '\0'; - // Invalidate string. - DECREF(ivars->string); - ivars->string = NULL; + // Invalidate string value. + DECREF(ivars->value); + ivars->value = NULL; } diff --git a/core/Lucy/Plan/TextType.cfh b/core/Lucy/Plan/TextType.cfh index b2bf0148a..934529c44 100644 --- a/core/Lucy/Plan/TextType.cfh +++ b/core/Lucy/Plan/TextType.cfh @@ -27,7 +27,7 @@ class Lucy::Plan::TextType inherits Lucy::Plan::FieldType { class Lucy::Index::TermStepper::TextTermStepper inherits Lucy::Index::TermStepper { - String *string; + CharBuf *charbuf; inert incremented TextTermStepper* new(); From ca73413a9a96f3bed1963320326544f396670892 Mon Sep 17 00:00:00 2001 From: Nick Wellnhofer Date: Tue, 10 Nov 2015 13:27:21 +0100 Subject: [PATCH 2/7] Switch PostingPool and TextTermStepper over to ByteBufs --- core/Lucy/Index/PostingPool.c | 18 +++++++++--------- core/Lucy/Plan/TextType.c | 32 ++++++++++++++++---------------- core/Lucy/Plan/TextType.cfh | 2 +- 3 files changed, 26 insertions(+), 26 deletions(-) diff --git a/core/Lucy/Index/PostingPool.c b/core/Lucy/Index/PostingPool.c index 4ef1c3e59..5404742f0 100644 --- a/core/Lucy/Index/PostingPool.c +++ b/core/Lucy/Index/PostingPool.c @@ -22,7 +22,7 @@ #include "Lucy/Util/ToolSet.h" #include "Lucy/Index/PostingPool.h" -#include "Clownfish/CharBuf.h" +#include "Clownfish/ByteBuf.h" #include "Lucy/Analysis/Inversion.h" #include "Lucy/Plan/Architecture.h" #include "Lucy/Plan/FieldType.h" @@ -353,10 +353,10 @@ S_write_terms_and_postings(PostingPool *self, PostingWriter *post_writer, RawPosting *posting = (RawPosting*)CERTIFY(PostPool_Fetch(self), RAWPOSTING); RawPostingIVARS *post_ivars = RawPost_IVARS(posting); - CharBuf *last_term_text - = CB_new_from_trusted_utf8(post_ivars->blob, post_ivars->content_len); - const char *last_text_buf = CB_Get_Ptr8(last_term_text); - uint32_t last_text_size = CB_Get_Size(last_term_text); + ByteBuf *last_term_text + = BB_new_bytes(post_ivars->blob, post_ivars->content_len); + const char *last_text_buf = BB_Get_Buf(last_term_text); + uint32_t last_text_size = BB_Get_Size(last_term_text); SkipStepper_Set_ID_And_Filepos(skip_stepper, 0, 0); // Initialize sentinel to be used on the last iter, using an empty string @@ -402,10 +402,10 @@ S_write_terms_and_postings(PostingPool *self, PostingWriter *post_writer, last_skip_filepos = tinfo_ivars->post_filepos; // Remember the term_text so we can write string diffs. - CB_Mimic_Utf8(last_term_text, post_ivars->blob, - post_ivars->content_len); - last_text_buf = CB_Get_Ptr8(last_term_text); - last_text_size = CB_Get_Size(last_term_text); + BB_Mimic_Bytes(last_term_text, post_ivars->blob, + post_ivars->content_len); + last_text_buf = BB_Get_Buf(last_term_text); + last_text_size = BB_Get_Size(last_term_text); } // Bail on last iter before writing invalid posting data. diff --git a/core/Lucy/Plan/TextType.c b/core/Lucy/Plan/TextType.c index a9f66b245..a0f828105 100644 --- a/core/Lucy/Plan/TextType.c +++ b/core/Lucy/Plan/TextType.c @@ -21,7 +21,7 @@ #include "Lucy/Plan/TextType.h" #include "Lucy/Store/InStream.h" #include "Lucy/Store/OutStream.h" -#include "Clownfish/CharBuf.h" +#include "Clownfish/ByteBuf.h" #include "Clownfish/Util/StringHelper.h" TermStepper* @@ -49,14 +49,14 @@ TextTermStepper* TextTermStepper_init(TextTermStepper *self) { TermStepper_init((TermStepper*)self); TextTermStepperIVARS *const ivars = TextTermStepper_IVARS(self); - ivars->charbuf = CB_new(0); + ivars->bytebuf = BB_new(0); return self; } void TextTermStepper_Destroy_IMP(TextTermStepper *self) { TextTermStepperIVARS *const ivars = TextTermStepper_IVARS(self); - DECREF(ivars->charbuf); + DECREF(ivars->bytebuf); SUPER_DESTROY(self, TEXTTERMSTEPPER); } @@ -78,7 +78,7 @@ Obj* TextTermStepper_Get_Value_IMP(TextTermStepper *self) { TextTermStepperIVARS *const ivars = TextTermStepper_IVARS(self); if (ivars->value == NULL) { - ivars->value = (Obj*)CB_To_String(ivars->charbuf); + ivars->value = (Obj*)BB_Trusted_Utf8_To_String(ivars->bytebuf); } return ivars->value; } @@ -88,7 +88,7 @@ TextTermStepper_Reset_IMP(TextTermStepper *self) { TextTermStepperIVARS *const ivars = TextTermStepper_IVARS(self); DECREF(ivars->value); ivars->value = NULL; - CB_Set_Size(ivars->charbuf, 0); + BB_Set_Size(ivars->bytebuf, 0); } void @@ -107,9 +107,9 @@ void TextTermStepper_Write_Delta_IMP(TextTermStepper *self, OutStream *outstream, Obj *value) { TextTermStepperIVARS *const ivars = TextTermStepper_IVARS(self); - CharBuf *charbuf = (CharBuf*)CERTIFY(value, CHARBUF); - const char *new_text = CB_Get_Ptr8(charbuf); - size_t new_size = CB_Get_Size(charbuf); + ByteBuf *bytebuf = (ByteBuf*)CERTIFY(value, BYTEBUF); + const char *new_text = BB_Get_Buf(bytebuf); + size_t new_size = BB_Get_Size(bytebuf); const char *last_text; size_t last_size; @@ -119,8 +119,8 @@ TextTermStepper_Write_Delta_IMP(TextTermStepper *self, OutStream *outstream, last_size = Str_Get_Size(last_string); } else { - last_text = CB_Get_Ptr8(ivars->charbuf); - last_size = CB_Get_Size(ivars->charbuf); + last_text = BB_Get_Buf(ivars->bytebuf); + last_size = BB_Get_Size(ivars->bytebuf); } // Count how many bytes the strings share at the top. @@ -134,7 +134,7 @@ TextTermStepper_Write_Delta_IMP(TextTermStepper *self, OutStream *outstream, OutStream_Write_String(outstream, diff_start_str, diff_len); // Update value. - CB_Mimic_Utf8(ivars->charbuf, new_text, new_size); + BB_Mimic_Bytes(ivars->bytebuf, new_text, new_size); // Invalidate string value. DECREF(ivars->value); @@ -148,11 +148,11 @@ TextTermStepper_Read_Key_Frame_IMP(TextTermStepper *self, const uint32_t text_len = InStream_Read_C32(instream); // Allocate space. - char *ptr = CB_Grow(ivars->charbuf, text_len); + char *ptr = BB_Grow(ivars->bytebuf, text_len); // Set the value text. InStream_Read_Bytes(instream, ptr, text_len); - CB_Set_Size(ivars->charbuf, text_len); + BB_Set_Size(ivars->bytebuf, text_len); if (!StrHelp_utf8_valid(ptr, text_len)) { THROW(ERR, "Invalid UTF-8 sequence in '%o' at byte %i64", InStream_Get_Filename(instream), @@ -176,13 +176,13 @@ TextTermStepper_Read_Delta_IMP(TextTermStepper *self, InStream *instream) { // Allocate space. if (ivars->value) { - CB_Mimic(ivars->charbuf, ivars->value); + BB_Mimic(ivars->bytebuf, ivars->value); } - char *ptr = CB_Grow(ivars->charbuf, total_text_len); + char *ptr = BB_Grow(ivars->bytebuf, total_text_len); // Set the value text. InStream_Read_Bytes(instream, ptr + text_overlap, finish_chars_len); - CB_Set_Size(ivars->charbuf, total_text_len); + BB_Set_Size(ivars->bytebuf, total_text_len); if (!StrHelp_utf8_valid(ptr, total_text_len)) { THROW(ERR, "Invalid UTF-8 sequence in '%o' at byte %i64", InStream_Get_Filename(instream), diff --git a/core/Lucy/Plan/TextType.cfh b/core/Lucy/Plan/TextType.cfh index 934529c44..87bd125e6 100644 --- a/core/Lucy/Plan/TextType.cfh +++ b/core/Lucy/Plan/TextType.cfh @@ -27,7 +27,7 @@ class Lucy::Plan::TextType inherits Lucy::Plan::FieldType { class Lucy::Index::TermStepper::TextTermStepper inherits Lucy::Index::TermStepper { - CharBuf *charbuf; + ByteBuf *bytebuf; inert incremented TextTermStepper* new(); From e306700c71a9d8790c09be6e158683b304fcdc4d Mon Sep 17 00:00:00 2001 From: Nick Wellnhofer Date: Tue, 10 Nov 2015 13:33:54 +0100 Subject: [PATCH 3/7] Use ByteBuf in S_extract_tv_cache --- core/Lucy/Index/DocVector.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/core/Lucy/Index/DocVector.c b/core/Lucy/Index/DocVector.c index 4868890af..54dfedd40 100644 --- a/core/Lucy/Index/DocVector.c +++ b/core/Lucy/Index/DocVector.c @@ -123,7 +123,7 @@ S_extract_tv_cache(Blob *field_buf) { Hash *tv_cache = Hash_new(0); const char *tv_string = Blob_Get_Buf(field_buf); int32_t num_terms = NumUtil_decode_c32(&tv_string); - CharBuf *text_buf = CB_new(0); + ByteBuf *text_buf = BB_new(0); // Read the number of highlightable terms in the field. for (int32_t i = 0; i < num_terms; i++) { @@ -131,8 +131,8 @@ S_extract_tv_cache(Blob *field_buf) { size_t len = NumUtil_decode_c32(&tv_string); // Decompress the term text. - CB_Set_Size(text_buf, overlap); - CB_Cat_Trusted_Utf8(text_buf, tv_string, len); + BB_Set_Size(text_buf, overlap); + BB_Cat_Bytes(text_buf, tv_string, len); tv_string += len; // Get positions & offsets string. @@ -147,7 +147,7 @@ S_extract_tv_cache(Blob *field_buf) { len = tv_string - bookmark_ptr; // Store the $text => $posdata pair in the output hash. - String *text = CB_To_String(text_buf); + String *text = BB_Trusted_Utf8_To_String(text_buf); Hash_Store(tv_cache, text, (Obj*)Blob_new(bookmark_ptr, len)); DECREF(text); } From dfb8f335a520ef34b232756b383218bd906d043e Mon Sep 17 00:00:00 2001 From: Nick Wellnhofer Date: Tue, 10 Nov 2015 13:35:14 +0100 Subject: [PATCH 4/7] Switch over to CB_Clear --- core/Lucy/Highlight/Highlighter.c | 2 +- core/Lucy/Search/QueryParser.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/core/Lucy/Highlight/Highlighter.c b/core/Lucy/Highlight/Highlighter.c index f88b814ca..f25888f79 100644 --- a/core/Lucy/Highlight/Highlighter.c +++ b/core/Lucy/Highlight/Highlighter.c @@ -603,8 +603,8 @@ S_encode_entities(String *text, CharBuf *buf) { } } + CB_Clear(buf); CB_Grow(buf, space); - CB_Set_Size(buf, 0); DECREF(iter); iter = Str_Top(text); while (STR_OOB != (code_point = StrIter_Next(iter))) { diff --git a/core/Lucy/Search/QueryParser.c b/core/Lucy/Search/QueryParser.c index 32736d5a7..40ba9039d 100644 --- a/core/Lucy/Search/QueryParser.c +++ b/core/Lucy/Search/QueryParser.c @@ -830,7 +830,7 @@ S_unescape(QueryParser *self, String *orig, CharBuf *buf) { int32_t code_point; UNUSED_VAR(self); - CB_Set_Size(buf, 0); + CB_Clear(buf); CB_Grow(buf, Str_Get_Size(orig) + 4); while (STR_OOB != (code_point = StrIter_Next(iter))) { From c45db63a5a6845bbe6353866c064927d0be39067 Mon Sep 17 00:00:00 2001 From: Nick Wellnhofer Date: Tue, 10 Nov 2015 13:45:21 +0100 Subject: [PATCH 5/7] Only use CB_new constructor --- core/Lucy/Search/ANDQuery.c | 3 ++- core/Lucy/Search/Compiler.c | 3 ++- core/Lucy/Search/ORQuery.c | 3 ++- core/Lucy/Search/PhraseQuery.c | 3 ++- core/LucyX/Search/ProximityQuery.c | 3 ++- 5 files changed, 10 insertions(+), 5 deletions(-) diff --git a/core/Lucy/Search/ANDQuery.c b/core/Lucy/Search/ANDQuery.c index 3e2f2d5b5..3178c4264 100644 --- a/core/Lucy/Search/ANDQuery.c +++ b/core/Lucy/Search/ANDQuery.c @@ -49,7 +49,8 @@ ANDQuery_To_String_IMP(ANDQuery *self) { uint32_t num_kids = Vec_Get_Size(ivars->children); if (!num_kids) { return Str_new_from_trusted_utf8("()", 2); } else { - CharBuf *buf = CB_new_from_trusted_utf8("(", 1); + CharBuf *buf = CB_new(0); + CB_Cat_Trusted_Utf8(buf, "(", 1); for (uint32_t i = 0; i < num_kids; i++) { String *kid_string = Obj_To_String(Vec_Fetch(ivars->children, i)); CB_Cat(buf, kid_string); diff --git a/core/Lucy/Search/Compiler.c b/core/Lucy/Search/Compiler.c index 6dc920ec8..cea70f6e3 100644 --- a/core/Lucy/Search/Compiler.c +++ b/core/Lucy/Search/Compiler.c @@ -109,7 +109,8 @@ String* Compiler_To_String_IMP(Compiler *self) { CompilerIVARS *const ivars = Compiler_IVARS(self); String *stringified_query = Query_To_String(ivars->parent); - CharBuf *buf = CB_new_from_trusted_utf8("compiler(", 9); + CharBuf *buf = CB_new(0); + CB_Cat_Trusted_Utf8(buf, "compiler(", 9); CB_Cat(buf, stringified_query); CB_Cat_Trusted_Utf8(buf, ")", 1); String *string = CB_Yield_String(buf); diff --git a/core/Lucy/Search/ORQuery.c b/core/Lucy/Search/ORQuery.c index e6264bad5..9b34d85cb 100644 --- a/core/Lucy/Search/ORQuery.c +++ b/core/Lucy/Search/ORQuery.c @@ -64,7 +64,8 @@ ORQuery_To_String_IMP(ORQuery *self) { uint32_t num_kids = Vec_Get_Size(ivars->children); if (!num_kids) { return Str_new_from_trusted_utf8("()", 2); } else { - CharBuf *buf = CB_new_from_trusted_utf8("(", 1); + CharBuf *buf = CB_new(0); + CB_Cat_Trusted_Utf8(buf, "(", 1); uint32_t last_kid = num_kids - 1; for (uint32_t i = 0; i < num_kids; i++) { String *kid_string = Obj_To_String(Vec_Fetch(ivars->children, i)); diff --git a/core/Lucy/Search/PhraseQuery.c b/core/Lucy/Search/PhraseQuery.c index bcaad8de7..a453cfdee 100644 --- a/core/Lucy/Search/PhraseQuery.c +++ b/core/Lucy/Search/PhraseQuery.c @@ -139,7 +139,8 @@ String* PhraseQuery_To_String_IMP(PhraseQuery *self) { PhraseQueryIVARS *const ivars = PhraseQuery_IVARS(self); uint32_t num_terms = Vec_Get_Size(ivars->terms); - CharBuf *buf = CB_new_from_str(ivars->field); + CharBuf *buf = CB_new(0); + CB_Cat(buf, ivars->field); CB_Cat_Trusted_Utf8(buf, ":\"", 2); for (uint32_t i = 0; i < num_terms; i++) { Obj *term = Vec_Fetch(ivars->terms, i); diff --git a/core/LucyX/Search/ProximityQuery.c b/core/LucyX/Search/ProximityQuery.c index 5ee051957..3e0337dd6 100644 --- a/core/LucyX/Search/ProximityQuery.c +++ b/core/LucyX/Search/ProximityQuery.c @@ -151,7 +151,8 @@ String* ProximityQuery_To_String_IMP(ProximityQuery *self) { ProximityQueryIVARS *const ivars = ProximityQuery_IVARS(self); uint32_t num_terms = Vec_Get_Size(ivars->terms); - CharBuf *buf = CB_new_from_str(ivars->field); + CharBuf *buf = CB_new(0); + CB_Cat(buf, ivars->field); CB_Cat_Trusted_Utf8(buf, ":\"", 2); for (uint32_t i = 0; i < num_terms; i++) { Obj *term = Vec_Fetch(ivars->terms, i); From 9cc9730276a808ba434746a8bce9a1bd41223a3c Mon Sep 17 00:00:00 2001 From: Nick Wellnhofer Date: Thu, 12 Nov 2015 16:19:24 +0100 Subject: [PATCH 6/7] Don't use CB_newf for JSON errors --- core/Lucy/Util/Json.c | 25 ++++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/core/Lucy/Util/Json.c b/core/Lucy/Util/Json.c index 6afa9b94c..2d2669581 100644 --- a/core/Lucy/Util/Json.c +++ b/core/Lucy/Util/Json.c @@ -89,7 +89,7 @@ S_cat_whitespace(CharBuf *buf, int32_t depth); // Set the global error object, appending escaped JSON in the vicinity of the // error. static void -S_set_error(CharBuf *buf, const char *json, const char *limit, int line, +S_set_error(const char *mess, const char *json, const char *limit, int line, const char *func); #define SET_ERROR(_mess, _json, _end) \ S_set_error(_mess, _json, _end, __LINE__, CFISH_ERR_FUNC_MACRO) @@ -480,7 +480,7 @@ S_do_parse_json(void *json_parser, const char *json, size_t len) { } LucyParseJson(json_parser, token_type, value, &state); if (state.errors) { - SET_ERROR(CB_newf("JSON syntax error"), save, end); + SET_ERROR("JSON syntax error", save, end); return NULL; } } @@ -488,7 +488,7 @@ S_do_parse_json(void *json_parser, const char *json, size_t len) { // Finish up. LucyParseJson(json_parser, 0, NULL, &state); if (state.errors) { - SET_ERROR(CB_newf("JSON syntax error"), json, end); + SET_ERROR("JSON syntax error", json, end); return NULL; } return state.result; @@ -527,7 +527,7 @@ S_parse_number(const char **json_ptr, const char *limit) { } } if (!result) { - SET_ERROR(CB_newf("JSON syntax error"), top, limit); + SET_ERROR("JSON syntax error", top, limit); } return result; } @@ -554,7 +554,7 @@ S_parse_string(const char **json_ptr, const char *limit) { } } if (!end) { - SET_ERROR(CB_newf("Unterminated string"), *json_ptr, limit); + SET_ERROR("Unterminated string", *json_ptr, limit); return NULL; } @@ -628,12 +628,12 @@ S_unescape_text(const char *top, const char *end) { char *temp_ptr = temp; if (num_end != temp_ptr + 4 || code_point < 0) { FREEMEM(target_buf); - SET_ERROR(CB_newf("Invalid \\u escape"), text - 5, end); + SET_ERROR("Invalid \\u escape", text - 5, end); return NULL; } if (code_point >= 0xD800 && code_point <= 0xDFFF) { FREEMEM(target_buf); - SET_ERROR(CB_newf("Surrogate pairs not supported"), + SET_ERROR("Surrogate pairs not supported", text - 5, end); return NULL; } @@ -643,7 +643,7 @@ S_unescape_text(const char *top, const char *end) { break; default: FREEMEM(target_buf); - SET_ERROR(CB_newf("Illegal escape"), text - 1, end); + SET_ERROR("Illegal escape", text - 1, end); return NULL; } } @@ -674,8 +674,11 @@ SI_check_keyword(const char *json, const char* end, const char *keyword, } static void -S_set_error(CharBuf *buf, const char *json, const char *limit, int line, +S_set_error(const char *mess, const char *json, const char *limit, int line, const char *func) { + CharBuf *buf = CB_new(0); + CB_Cat_Utf8(buf, mess, strlen(mess)); + if (func) { CB_catf(buf, " at %s %s line %i32 near ", func, __FILE__, (int32_t)line); @@ -696,11 +699,11 @@ S_set_error(CharBuf *buf, const char *json, const char *limit, int line, String *snippet = SSTR_WRAP_UTF8(json, (size_t)len); S_append_json_string(snippet, buf); - String *mess = CB_Yield_String(buf); + String *full_mess = CB_Yield_String(buf); DECREF(buf); // Set global error object. - Err_set_error(Err_new(mess)); + Err_set_error(Err_new(full_mess)); } int64_t From 19dc916719792a949bb0a11732ff66d890ca3f1c Mon Sep 17 00:00:00 2001 From: Nick Wellnhofer Date: Thu, 12 Nov 2015 16:41:41 +0100 Subject: [PATCH 7/7] Don't null-terminate ByteBuf --- core/Lucy/Plan/TextType.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/core/Lucy/Plan/TextType.c b/core/Lucy/Plan/TextType.c index a0f828105..be851baa8 100644 --- a/core/Lucy/Plan/TextType.c +++ b/core/Lucy/Plan/TextType.c @@ -159,9 +159,6 @@ TextTermStepper_Read_Key_Frame_IMP(TextTermStepper *self, InStream_Tell(instream) - text_len); } - // Null-terminate. - ptr[text_len] = '\0'; - // Invalidate string value. DECREF(ivars->value); ivars->value = NULL; @@ -189,9 +186,6 @@ TextTermStepper_Read_Delta_IMP(TextTermStepper *self, InStream *instream) { InStream_Tell(instream) - finish_chars_len); } - // Null-terminate. - ptr[total_text_len] = '\0'; - // Invalidate string value. DECREF(ivars->value); ivars->value = NULL;