From 7e31cc6930fe9c78b9a21124765dc33a98d10759 Mon Sep 17 00:00:00 2001 From: Marvin Humphrey Date: Wed, 30 Mar 2016 17:34:19 -0700 Subject: [PATCH 1/7] Change I32Array size to `size_t`. Only the core module -- all usage sites need to be changed next. --- core/Lucy/Object/I32Array.c | 20 +++++++++++--------- core/Lucy/Object/I32Array.cfh | 16 ++++++++-------- 2 files changed, 19 insertions(+), 17 deletions(-) diff --git a/core/Lucy/Object/I32Array.c b/core/Lucy/Object/I32Array.c index d82448fac..4d1172749 100644 --- a/core/Lucy/Object/I32Array.c +++ b/core/Lucy/Object/I32Array.c @@ -20,7 +20,7 @@ #include "Lucy/Object/I32Array.h" I32Array* -I32Arr_new(int32_t *ints, uint32_t size) { +I32Arr_new(int32_t *ints, size_t size) { I32Array *self = (I32Array*)Class_Make_Obj(I32ARRAY); int32_t *ints_copy = (int32_t*)MALLOCATE(size * sizeof(int32_t)); memcpy(ints_copy, ints, size * sizeof(int32_t)); @@ -28,20 +28,20 @@ I32Arr_new(int32_t *ints, uint32_t size) { } I32Array* -I32Arr_new_blank(uint32_t size) { +I32Arr_new_blank(size_t size) { I32Array *self = (I32Array*)Class_Make_Obj(I32ARRAY); int32_t *ints = (int32_t*)CALLOCATE(size, sizeof(int32_t)); return I32Arr_init(self, ints, size); } I32Array* -I32Arr_new_steal(int32_t *ints, uint32_t size) { +I32Arr_new_steal(int32_t *ints, size_t size) { I32Array *self = (I32Array*)Class_Make_Obj(I32ARRAY); return I32Arr_init(self, ints, size); } I32Array* -I32Arr_init(I32Array *self, int32_t *ints, uint32_t size) { +I32Arr_init(I32Array *self, int32_t *ints, size_t size) { I32ArrayIVARS *const ivars = I32Arr_IVARS(self); ivars->ints = ints; ivars->size = size; @@ -56,24 +56,26 @@ I32Arr_Destroy_IMP(I32Array *self) { } void -I32Arr_Set_IMP(I32Array *self, uint32_t tick, int32_t value) { +I32Arr_Set_IMP(I32Array *self, size_t tick, int32_t value) { I32ArrayIVARS *const ivars = I32Arr_IVARS(self); if (tick >= ivars->size) { - THROW(ERR, "Out of bounds: %u32 >= %u32", tick, ivars->size); + THROW(ERR, "Out of bounds: %u64 >= %u64", (uint64_t)tick, + (uint64_t)ivars->size); } ivars->ints[tick] = value; } int32_t -I32Arr_Get_IMP(I32Array *self, uint32_t tick) { +I32Arr_Get_IMP(I32Array *self, size_t tick) { I32ArrayIVARS *const ivars = I32Arr_IVARS(self); if (tick >= ivars->size) { - THROW(ERR, "Out of bounds: %u32 >= %u32", tick, ivars->size); + THROW(ERR, "Out of bounds: %u64 >= %u64", (uint64_t)tick, + (uint64_t)ivars->size); } return ivars->ints[tick]; } -uint32_t +size_t I32Arr_Get_Size_IMP(I32Array *self) { return I32Arr_IVARS(self)->size; } diff --git a/core/Lucy/Object/I32Array.cfh b/core/Lucy/Object/I32Array.cfh index 84f436da0..9ba091802 100644 --- a/core/Lucy/Object/I32Array.cfh +++ b/core/Lucy/Object/I32Array.cfh @@ -18,35 +18,35 @@ parcel Lucy; public class Lucy::Object::I32Array nickname I32Arr inherits Clownfish::Obj { int32_t *ints; - uint32_t size; + size_t size; public inert incremented I32Array* - new(int32_t *ints, uint32_t size); + new(int32_t *ints, size_t size); inert incremented I32Array* - new_steal(int32_t *ints, uint32_t size); + new_steal(int32_t *ints, size_t size); public inert incremented I32Array* - new_blank(uint32_t size); + new_blank(size_t size); public inert I32Array* - init(I32Array *self, int32_t *ints, uint32_t size); + init(I32Array *self, int32_t *ints, size_t size); /** Set the value at `tick`, or throw an error if * `tick` is out of bounds. */ public void - Set(I32Array *self, uint32_t tick, int32_t value); + Set(I32Array *self, size_t tick, int32_t value); /** Return the value at `tick`, or throw an error if * `tick` is out of bounds. */ public int32_t - Get(I32Array *self, uint32_t tick); + Get(I32Array *self, size_t tick); /** Accessor for 'size' member. */ - public uint32_t + public size_t Get_Size(I32Array *self); public void From 0c494085ac98a77d7969f1a385be1bf39936bb2e Mon Sep 17 00:00:00 2001 From: Marvin Humphrey Date: Tue, 5 Apr 2016 17:47:50 -0700 Subject: [PATCH 2/7] Adapt Perl bindings for I32Array size change. --- perl/buildlib/Lucy/Build/Binding/Object.pm | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/perl/buildlib/Lucy/Build/Binding/Object.pm b/perl/buildlib/Lucy/Build/Binding/Object.pm index fb9fd5afb..f24f9cd2e 100644 --- a/perl/buildlib/Lucy/Build/Binding/Object.pm +++ b/perl/buildlib/Lucy/Build/Binding/Object.pm @@ -78,18 +78,22 @@ CODE: ints_av = (AV*)SvRV(ints_sv); } if (ints_av && SvTYPE(ints_av) == SVt_PVAV) { - int32_t size = av_len(ints_av) + 1; - int32_t *ints = (int32_t*)CFISH_MALLOCATE(size * sizeof(int32_t)); - int32_t i; + int64_t size = av_len(ints_av) + 1; + int32_t *ints; + int64_t i; + if (size < 0 || size > INT32_MAX) { + THROW(CFISH_ERR, "Size out of range: %i64", size); + } + ints = (int32_t*)CFISH_MALLOCATE(size * sizeof(int32_t)); for (i = 0; i < size; i++) { - SV **const sv_ptr = av_fetch(ints_av, i, 0); + SV **const sv_ptr = av_fetch(ints_av, (I32)i, 0); ints[i] = (sv_ptr && XSBind_sv_defined(aTHX_ *sv_ptr)) ? SvIV(*sv_ptr) : 0; } self = (lucy_I32Array*)XSBind_new_blank_obj(aTHX_ either_sv); - lucy_I32Arr_init(self, ints, size); + lucy_I32Arr_init(self, ints, (size_t)size); } else { THROW(CFISH_ERR, "Required param 'ints' isn't an arrayref"); @@ -105,8 +109,8 @@ to_arrayref(self) CODE: { AV *out_av = newAV(); - uint32_t i; - uint32_t size = LUCY_I32Arr_Get_Size(self); + size_t i; + size_t size = LUCY_I32Arr_Get_Size(self); av_extend(out_av, size); for (i = 0; i < size; i++) { From bb4f6e44bc55664f7ac4ec5e3c318619ffa6f8f2 Mon Sep 17 00:00:00 2001 From: Marvin Humphrey Date: Tue, 5 Apr 2016 19:02:38 -0700 Subject: [PATCH 3/7] Adapt PolyReader_sub_tick for changes to I32Array. --- core/Lucy/Index/PolyReader.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/core/Lucy/Index/PolyReader.c b/core/Lucy/Index/PolyReader.c index 5c807d298..37dfcec12 100644 --- a/core/Lucy/Index/PolyReader.c +++ b/core/Lucy/Index/PolyReader.c @@ -550,16 +550,19 @@ PolyReader_Get_Seg_Readers_IMP(PolyReader *self) { uint32_t PolyReader_sub_tick(I32Array *offsets, int32_t doc_id) { - int32_t size = I32Arr_Get_Size(offsets); + size_t size = I32Arr_Get_Size(offsets); if (size == 0) { return 0; } + else if (size > INT32_MAX) { + THROW(ERR, "Unexpectedly large offsets array: %u64", (uint64_t)size); + } int32_t lo = -1; - int32_t hi = size; + int32_t hi = (int32_t)size; while (hi - lo > 1) { int32_t mid = lo + ((hi - lo) / 2); - int32_t offset = I32Arr_Get(offsets, mid); + int32_t offset = I32Arr_Get(offsets, (size_t)mid); if (doc_id <= offset) { hi = mid; } @@ -567,12 +570,12 @@ PolyReader_sub_tick(I32Array *offsets, int32_t doc_id) { lo = mid; } } - if (hi == size) { + if (hi == (int32_t)size) { hi--; } while (hi > 0) { - int32_t offset = I32Arr_Get(offsets, hi); + int32_t offset = I32Arr_Get(offsets, (size_t)hi); if (doc_id <= offset) { hi--; } From ffb3c830ac57fd5d94b24b1ad438542238b8ac84 Mon Sep 17 00:00:00 2001 From: Marvin Humphrey Date: Tue, 5 Apr 2016 19:05:43 -0700 Subject: [PATCH 4/7] Adapt PhraseQuery, ProximityQuery for I32Array. Adapt these two very similar Query subclasses for the I32Array size change. --- core/Lucy/Search/PhraseQuery.c | 20 ++++++++++---------- core/LucyX/Search/ProximityQuery.c | 20 ++++++++++---------- 2 files changed, 20 insertions(+), 20 deletions(-) diff --git a/core/Lucy/Search/PhraseQuery.c b/core/Lucy/Search/PhraseQuery.c index a74547d87..48d7c1397 100644 --- a/core/Lucy/Search/PhraseQuery.c +++ b/core/Lucy/Search/PhraseQuery.c @@ -375,8 +375,8 @@ PhraseCompiler_Highlight_Spans_IMP(PhraseCompiler *self, Searcher *searcher, if (i == 0) { // Set initial positions from first term. I32Array *positions = TV_Get_Positions(term_vector); - for (uint32_t j = I32Arr_Get_Size(positions); j > 0; j--) { - BitVec_Set(posit_vec, I32Arr_Get(positions, j - 1)); + for (size_t j = I32Arr_Get_Size(positions); j > 0; j--) { + BitVec_Set(posit_vec, (uint32_t)I32Arr_Get(positions, j - 1)); } } else { @@ -384,8 +384,8 @@ PhraseCompiler_Highlight_Spans_IMP(PhraseCompiler *self, Searcher *searcher, I32Array *positions = TV_Get_Positions(term_vector); BitVec_Clear_All(other_posit_vec); - for (uint32_t j = I32Arr_Get_Size(positions); j > 0; j--) { - int32_t pos = I32Arr_Get(positions, j - 1) - i; + for (size_t j = I32Arr_Get_Size(positions); j > 0; j--) { + int32_t pos = I32Arr_Get(positions, j - 1) - (int32_t)i; if (pos >= 0) { BitVec_Set(other_posit_vec, pos); } @@ -406,24 +406,24 @@ PhraseCompiler_Highlight_Spans_IMP(PhraseCompiler *self, Searcher *searcher, I32Array *tv_end_offsets = TV_Get_End_Offsets(last_tv); uint32_t terms_max = num_terms - 1; I32Array *valid_posits = BitVec_To_Array(posit_vec); - uint32_t num_valid_posits = I32Arr_Get_Size(valid_posits); - uint32_t j = 0; + size_t num_valid_posits = I32Arr_Get_Size(valid_posits); + size_t j = 0; float weight = PhraseCompiler_Get_Weight(self); - uint32_t i = 0; + size_t i = 0; // Add only those starts/ends that belong to a valid position. - for (uint32_t posit_tick = 0; posit_tick < num_valid_posits; posit_tick++) { + for (size_t posit_tick = 0; posit_tick < num_valid_posits; posit_tick++) { int32_t valid_start_posit = I32Arr_Get(valid_posits, posit_tick); int32_t valid_end_posit = valid_start_posit + terms_max; int32_t start_offset = 0, end_offset = 0; - for (uint32_t max = I32Arr_Get_Size(tv_start_positions); i < max; i++) { + for (size_t max = I32Arr_Get_Size(tv_start_positions); i < max; i++) { if (I32Arr_Get(tv_start_positions, i) == valid_start_posit) { start_offset = I32Arr_Get(tv_start_offsets, i); break; } } - for (uint32_t max = I32Arr_Get_Size(tv_end_positions); j < max; j++) { + for (size_t max = I32Arr_Get_Size(tv_end_positions); j < max; j++) { if (I32Arr_Get(tv_end_positions, j) == valid_end_posit) { end_offset = I32Arr_Get(tv_end_offsets, j); break; diff --git a/core/LucyX/Search/ProximityQuery.c b/core/LucyX/Search/ProximityQuery.c index a4d42ccfb..2f319333e 100644 --- a/core/LucyX/Search/ProximityQuery.c +++ b/core/LucyX/Search/ProximityQuery.c @@ -404,8 +404,8 @@ ProximityCompiler_Highlight_Spans_IMP(ProximityCompiler *self, if (i == 0) { // Set initial positions from first term. I32Array *positions = TV_Get_Positions(term_vector); - for (uint32_t j = I32Arr_Get_Size(positions); j > 0; j--) { - BitVec_Set(posit_vec, I32Arr_Get(positions, j - 1)); + for (size_t j = I32Arr_Get_Size(positions); j > 0; j--) { + BitVec_Set(posit_vec, (uint32_t)I32Arr_Get(positions, j - 1)); } } else { @@ -413,8 +413,8 @@ ProximityCompiler_Highlight_Spans_IMP(ProximityCompiler *self, I32Array *positions = TV_Get_Positions(term_vector); BitVec_Clear_All(other_posit_vec); - for (uint32_t j = I32Arr_Get_Size(positions); j > 0; j--) { - int32_t pos = I32Arr_Get(positions, j - 1) - i; + for (size_t j = I32Arr_Get_Size(positions); j > 0; j--) { + int32_t pos = I32Arr_Get(positions, j - 1) - (int32_t)i; if (pos >= 0) { BitVec_Set(other_posit_vec, pos); } @@ -435,24 +435,24 @@ ProximityCompiler_Highlight_Spans_IMP(ProximityCompiler *self, I32Array *tv_end_offsets = TV_Get_End_Offsets(last_tv); uint32_t terms_max = num_terms - 1; I32Array *valid_posits = BitVec_To_Array(posit_vec); - uint32_t num_valid_posits = I32Arr_Get_Size(valid_posits); - uint32_t j = 0; + size_t num_valid_posits = I32Arr_Get_Size(valid_posits); + size_t j = 0; float weight = ProximityCompiler_Get_Weight(self); - uint32_t i = 0; + size_t i = 0; // Add only those starts/ends that belong to a valid position. - for (uint32_t posit_tick = 0; posit_tick < num_valid_posits; posit_tick++) { + for (size_t posit_tick = 0; posit_tick < num_valid_posits; posit_tick++) { int32_t valid_start_posit = I32Arr_Get(valid_posits, posit_tick); int32_t valid_end_posit = valid_start_posit + terms_max; int32_t start_offset = 0, end_offset = 0; - for (uint32_t max = I32Arr_Get_Size(tv_start_positions); i < max; i++) { + for (size_t max = I32Arr_Get_Size(tv_start_positions); i < max; i++) { if (I32Arr_Get(tv_start_positions, i) == valid_start_posit) { start_offset = I32Arr_Get(tv_start_offsets, i); break; } } - for (uint32_t max = I32Arr_Get_Size(tv_end_positions); j < max; j++) { + for (size_t max = I32Arr_Get_Size(tv_end_positions); j < max; j++) { if (I32Arr_Get(tv_end_positions, j) == valid_end_posit) { end_offset = I32Arr_Get(tv_end_offsets, j); break; From 3f0e59ba8f3f7c99f6ea8950d48c4fcd28a700b9 Mon Sep 17 00:00:00 2001 From: Marvin Humphrey Date: Tue, 5 Apr 2016 19:09:54 -0700 Subject: [PATCH 5/7] Adapt calls into I32Array APIs for size change. Add/remove casts for functions where the width of a size argument has changed. --- core/Lucy/Index/BackgroundMerger.c | 4 ++-- core/Lucy/Index/DeletionsWriter.c | 2 +- core/Lucy/Index/DocWriter.c | 2 +- core/Lucy/Index/HighlightWriter.c | 2 +- core/Lucy/Index/PolyReader.c | 2 +- core/Lucy/Index/PostingPool.c | 2 +- core/Lucy/Index/SortFieldWriter.c | 2 +- core/Lucy/Search/IndexSearcher.c | 2 +- core/Lucy/Search/PolySearcher.c | 2 +- core/Lucy/Test/Object/TestBitVector.c | 2 +- core/Lucy/Test/Search/TestSeriesMatcher.c | 2 +- core/LucyX/Search/MockMatcher.c | 4 ++-- 12 files changed, 14 insertions(+), 14 deletions(-) diff --git a/core/Lucy/Index/BackgroundMerger.c b/core/Lucy/Index/BackgroundMerger.c index a7f86a003..14ffe1dae 100644 --- a/core/Lucy/Index/BackgroundMerger.c +++ b/core/Lucy/Index/BackgroundMerger.c @@ -324,7 +324,7 @@ S_merge_updated_deletions(BackgroundMerger *self) { = (SegReader*)Vec_Fetch(merge_seg_readers, i); if (SegReader_Get_Seg_Num(seg_reader) == merge_seg_num) { I32Array *offsets = PolyReader_Offsets(merge_polyreader); - seg_tick = (uint32_t)i; + seg_tick = i; offset = I32Arr_Get(offsets, seg_tick); DECREF(offsets); } @@ -345,7 +345,7 @@ S_merge_updated_deletions(BackgroundMerger *self) { // Find the slot where the deleted doc resides in the // rewritten segment. If the doc was already deleted when we // were merging, do nothing. - int32_t remapped = I32Arr_Get(doc_map, del); + int32_t remapped = I32Arr_Get(doc_map, (size_t)del); if (remapped) { // It's a new deletion, so carry it forward and zap it in // the rewritten segment. diff --git a/core/Lucy/Index/DeletionsWriter.c b/core/Lucy/Index/DeletionsWriter.c index 1b25462a4..e2b9b09b5 100644 --- a/core/Lucy/Index/DeletionsWriter.c +++ b/core/Lucy/Index/DeletionsWriter.c @@ -64,7 +64,7 @@ DelWriter_Generate_Doc_Map_IMP(DeletionsWriter *self, Matcher *deletions, } } - return I32Arr_new_steal(doc_map, doc_max + 1); + return I32Arr_new_steal(doc_map, (size_t)doc_max + 1); } int32_t DefDelWriter_current_file_format = 1; diff --git a/core/Lucy/Index/DocWriter.c b/core/Lucy/Index/DocWriter.c index 08f450c14..818f45bee 100644 --- a/core/Lucy/Index/DocWriter.c +++ b/core/Lucy/Index/DocWriter.c @@ -180,7 +180,7 @@ DocWriter_Add_Segment_IMP(DocWriter *self, SegReader *reader, DEFAULTDOCREADER); for (int32_t i = 1, max = SegReader_Doc_Max(reader); i <= max; i++) { - if (I32Arr_Get(doc_map, i)) { + if (I32Arr_Get(doc_map, (size_t)i)) { int64_t start = OutStream_Tell(dat_out); // Copy record over. diff --git a/core/Lucy/Index/HighlightWriter.c b/core/Lucy/Index/HighlightWriter.c index 518750ce2..08ad85a88 100644 --- a/core/Lucy/Index/HighlightWriter.c +++ b/core/Lucy/Index/HighlightWriter.c @@ -233,7 +233,7 @@ HLWriter_Add_Segment_IMP(HighlightWriter *self, SegReader *reader, for (orig = 1; orig <= doc_max; orig++) { // Skip deleted docs. - if (doc_map && !I32Arr_Get(doc_map, orig)) { + if (doc_map && !I32Arr_Get(doc_map, (size_t)orig)) { continue; } diff --git a/core/Lucy/Index/PolyReader.c b/core/Lucy/Index/PolyReader.c index 37dfcec12..4febdebe2 100644 --- a/core/Lucy/Index/PolyReader.c +++ b/core/Lucy/Index/PolyReader.c @@ -131,7 +131,7 @@ S_init_sub_readers(PolyReader *self, Vector *sub_readers) { } DECREF(iter); } - ivars->offsets = I32Arr_new_steal(starts, (uint32_t)num_sub_readers); + ivars->offsets = I32Arr_new_steal(starts, num_sub_readers); HashIterator *iter = HashIter_new(data_readers); while (HashIter_Next(iter)) { diff --git a/core/Lucy/Index/PostingPool.c b/core/Lucy/Index/PostingPool.c index a69c2299c..74b0e0cfc 100644 --- a/core/Lucy/Index/PostingPool.c +++ b/core/Lucy/Index/PostingPool.c @@ -513,7 +513,7 @@ PostPool_Refill_IMP(PostingPool *self) { // Skip deletions. if (doc_map != NULL) { const int32_t remapped - = I32Arr_Get(doc_map, rawpost_ivars->doc_id - doc_base); + = I32Arr_Get(doc_map, (size_t)(rawpost_ivars->doc_id - doc_base)); if (!remapped) { continue; } diff --git a/core/Lucy/Index/SortFieldWriter.c b/core/Lucy/Index/SortFieldWriter.c index 12490da88..4118ece88 100644 --- a/core/Lucy/Index/SortFieldWriter.c +++ b/core/Lucy/Index/SortFieldWriter.c @@ -457,7 +457,7 @@ SortFieldWriter_Refill_IMP(SortFieldWriter *self) { int32_t ord = SortCache_Ordinal(sort_cache, raw_doc_id); if (ord != null_ord) { int32_t remapped = doc_map - ? I32Arr_Get(doc_map, raw_doc_id) + ? I32Arr_Get(doc_map, (size_t)raw_doc_id) : raw_doc_id; if (remapped) { Obj *val = SortCache_Value(sort_cache, ord); diff --git a/core/Lucy/Search/IndexSearcher.c b/core/Lucy/Search/IndexSearcher.c index 0274733a7..000d66cf9 100644 --- a/core/Lucy/Search/IndexSearcher.c +++ b/core/Lucy/Search/IndexSearcher.c @@ -147,7 +147,7 @@ IxSearcher_Collect_IMP(IndexSearcher *self, Query *query, Collector *collector) Matcher *matcher = Compiler_Make_Matcher(compiler, seg_reader, need_score); if (matcher) { - int32_t seg_start = I32Arr_Get(seg_starts, (uint32_t)i); + int32_t seg_start = I32Arr_Get(seg_starts, i); Matcher *deletions = DelReader_Iterator(del_reader); Coll_Set_Reader(collector, seg_reader); Coll_Set_Base(collector, seg_start); diff --git a/core/Lucy/Search/PolySearcher.c b/core/Lucy/Search/PolySearcher.c index 40cd20466..65f286d03 100644 --- a/core/Lucy/Search/PolySearcher.c +++ b/core/Lucy/Search/PolySearcher.c @@ -183,7 +183,7 @@ PolySearcher_Collect_IMP(PolySearcher *self, Query *query, I32Array *starts = ivars->starts; for (size_t i = 0, max = Vec_Get_Size(searchers); i < max; i++) { - int32_t start = I32Arr_Get(starts, (uint32_t)i); + int32_t start = I32Arr_Get(starts, i); Searcher *searcher = (Searcher*)Vec_Fetch(searchers, i); OffsetCollector *offset_coll = OffsetColl_new(collector, start); Searcher_Collect(searcher, query, (Collector*)offset_coll); diff --git a/core/Lucy/Test/Object/TestBitVector.c b/core/Lucy/Test/Object/TestBitVector.c index 1ad92faa1..c8c420349 100644 --- a/core/Lucy/Test/Object/TestBitVector.c +++ b/core/Lucy/Test/Object/TestBitVector.c @@ -418,7 +418,7 @@ test_To_Array(TestBatchRunner *runner) { // Create the array and compare it to the source. array = BitVec_To_Array(bit_vec); for (i = 0; i < num_unique; i++) { - if (I32Arr_Get(array, i) != (int32_t)source_ints[i]) { break; } + if (I32Arr_Get(array, (size_t)i) != (int32_t)source_ints[i]) { break; } } TEST_INT_EQ(runner, i, num_unique, "To_Array (%ld == %ld)", i, num_unique); diff --git a/core/Lucy/Test/Search/TestSeriesMatcher.c b/core/Lucy/Test/Search/TestSeriesMatcher.c index b6c949bd6..7becce430 100644 --- a/core/Lucy/Test/Search/TestSeriesMatcher.c +++ b/core/Lucy/Test/Search/TestSeriesMatcher.c @@ -71,7 +71,7 @@ S_generate_match_list(int32_t first, int32_t max, int32_t doc_inc) { } if (i != count) { THROW(ERR, "Screwed up somehow: %i32 %i32", i, count); } - return I32Arr_new_steal(doc_ids, count); + return I32Arr_new_steal(doc_ids, (size_t)count); } static void diff --git a/core/LucyX/Search/MockMatcher.c b/core/LucyX/Search/MockMatcher.c index 1cce7414a..308719bdf 100644 --- a/core/LucyX/Search/MockMatcher.c +++ b/core/LucyX/Search/MockMatcher.c @@ -58,7 +58,7 @@ MockMatcher_Next_IMP(MockMatcher* self) { ivars->tick--; return 0; } - return I32Arr_Get(ivars->doc_ids, ivars->tick); + return I32Arr_Get(ivars->doc_ids, (size_t)ivars->tick); } float @@ -74,7 +74,7 @@ MockMatcher_Score_IMP(MockMatcher* self) { int32_t MockMatcher_Get_Doc_ID_IMP(MockMatcher* self) { MockMatcherIVARS *const ivars = MockMatcher_IVARS(self); - return I32Arr_Get(ivars->doc_ids, ivars->tick); + return I32Arr_Get(ivars->doc_ids, (size_t)ivars->tick); } From e7b501caf3e6b6672f7c888e3fadecb30d07b360 Mon Sep 17 00:00:00 2001 From: Marvin Humphrey Date: Tue, 5 Apr 2016 19:14:53 -0700 Subject: [PATCH 6/7] Various adaptations to I32Array size change. These changes are a grab bag, some of which are easy to review and some of which are more subtle. --- core/Lucy/Index/DeletionsWriter.c | 5 ++++- core/Lucy/Index/IndexManager.c | 6 +++--- core/Lucy/Index/SegWriter.c | 4 ++-- core/Lucy/Index/TermVector.c | 16 +++++++++------- core/Lucy/Index/TermVector.cfh | 2 +- core/Lucy/Search/SeriesMatcher.c | 2 +- core/Lucy/Search/TermQuery.c | 2 +- core/Lucy/Test/Object/TestI32Array.c | 8 ++++---- core/Lucy/Test/Search/TestSeriesMatcher.c | 17 ++++++++--------- 9 files changed, 33 insertions(+), 29 deletions(-) diff --git a/core/Lucy/Index/DeletionsWriter.c b/core/Lucy/Index/DeletionsWriter.c index e2b9b09b5..c646658c9 100644 --- a/core/Lucy/Index/DeletionsWriter.c +++ b/core/Lucy/Index/DeletionsWriter.c @@ -53,6 +53,9 @@ DelWriter_Generate_Doc_Map_IMP(DeletionsWriter *self, Matcher *deletions, int32_t *doc_map = (int32_t*)CALLOCATE(doc_max + 1, sizeof(int32_t)); int32_t next_deletion = deletions ? Matcher_Next(deletions) : INT32_MAX; UNUSED_VAR(self); + if (doc_max < 0) { + THROW(ERR, "Negative doc_max is invalid: %i32", doc_max); + } // 0 for a deleted doc, a new number otherwise for (int32_t i = 1, new_doc_id = 1; i <= doc_max; i++) { @@ -308,7 +311,7 @@ DefDelWriter_Delete_By_Doc_ID_IMP(DefaultDeletionsWriter *self, int32_t doc_id) DefaultDeletionsWriterIVARS *const ivars = DefDelWriter_IVARS(self); uint32_t sub_tick = PolyReader_sub_tick(ivars->seg_starts, doc_id); BitVector *bit_vec = (BitVector*)Vec_Fetch(ivars->bit_vecs, sub_tick); - uint32_t offset = I32Arr_Get(ivars->seg_starts, sub_tick); + int32_t offset = I32Arr_Get(ivars->seg_starts, sub_tick); int32_t seg_doc_id = doc_id - offset; if (!BitVec_Get(bit_vec, seg_doc_id)) { diff --git a/core/Lucy/Index/IndexManager.c b/core/Lucy/Index/IndexManager.c index af6205fa5..15425e930 100644 --- a/core/Lucy/Index/IndexManager.c +++ b/core/Lucy/Index/IndexManager.c @@ -192,14 +192,14 @@ uint32_t IxManager_Choose_Sparse_IMP(IndexManager *self, I32Array *doc_counts) { UNUSED_VAR(self); uint32_t threshold = 0; - uint32_t total_docs = 0; - const uint32_t num_candidates = I32Arr_Get_Size(doc_counts); + int32_t total_docs = 0; + const uint32_t num_candidates = (uint32_t)I32Arr_Get_Size(doc_counts); // Find sparsely populated segments. for (uint32_t i = 0; i < num_candidates; i++) { uint32_t num_segs_when_done = num_candidates - threshold + 1; total_docs += I32Arr_Get(doc_counts, i); - if (total_docs < S_fibonacci(num_segs_when_done + 5)) { + if (total_docs < (int32_t)S_fibonacci(num_segs_when_done + 5)) { threshold = i + 1; } } diff --git a/core/Lucy/Index/SegWriter.c b/core/Lucy/Index/SegWriter.c index e67df829e..1026b94cf 100644 --- a/core/Lucy/Index/SegWriter.c +++ b/core/Lucy/Index/SegWriter.c @@ -127,8 +127,8 @@ SegWriter_Add_Inverted_Doc_IMP(SegWriter *self, Inverter *inverter, static void S_adjust_doc_id(SegWriter *self, SegReader *reader, I32Array *doc_map) { SegWriterIVARS *const ivars = SegWriter_IVARS(self); - uint32_t doc_count = SegReader_Doc_Max(reader); - for (uint32_t i = 1, max = I32Arr_Get_Size(doc_map); i < max; i++) { + size_t doc_count = SegReader_Doc_Max(reader); + for (size_t i = 1, max = I32Arr_Get_Size(doc_map); i < max; i++) { if (I32Arr_Get(doc_map, i) == 0) { doc_count--; } } Seg_Increment_Count(ivars->segment, doc_count); diff --git a/core/Lucy/Index/TermVector.c b/core/Lucy/Index/TermVector.c index e8d5aabfd..b1a3391c1 100644 --- a/core/Lucy/Index/TermVector.c +++ b/core/Lucy/Index/TermVector.c @@ -46,8 +46,10 @@ TV_init(TermVector *self, String *field, String *text, if (I32Arr_Get_Size(start_offsets) != ivars->num_pos || I32Arr_Get_Size(end_offsets) != ivars->num_pos ) { - THROW(ERR, "Unbalanced arrays: %u32 %u32 %u32", ivars->num_pos, - I32Arr_Get_Size(start_offsets), I32Arr_Get_Size(end_offsets)); + THROW(ERR, "Unbalanced arrays: %u64 %u64 %u64", + (uint64_t)ivars->num_pos, + (uint64_t)I32Arr_Get_Size(start_offsets), + (uint64_t)I32Arr_Get_Size(end_offsets)); } return self; @@ -88,9 +90,9 @@ TV_Serialize_IMP(TermVector *self, OutStream *target) { Freezer_serialize_string(ivars->field, target); Freezer_serialize_string(ivars->text, target); - OutStream_Write_C32(target, ivars->num_pos); + OutStream_Write_C64(target, ivars->num_pos); - for (uint32_t i = 0; i < ivars->num_pos; i++) { + for (size_t i = 0; i < ivars->num_pos; i++) { OutStream_Write_C32(target, posits[i]); OutStream_Write_C32(target, starts[i]); OutStream_Write_C32(target, ends[i]); @@ -101,13 +103,13 @@ TermVector* TV_Deserialize_IMP(TermVector *self, InStream *instream) { String *field = Freezer_read_string(instream); String *text = Freezer_read_string(instream); - uint32_t num_pos = InStream_Read_C32(instream); + size_t num_pos = InStream_Read_C64(instream); // Read positional data. int32_t *posits = (int32_t*)MALLOCATE(num_pos * sizeof(int32_t)); int32_t *starts = (int32_t*)MALLOCATE(num_pos * sizeof(int32_t)); int32_t *ends = (int32_t*)MALLOCATE(num_pos * sizeof(int32_t)); - for (uint32_t i = 0; i < num_pos; i++) { + for (size_t i = 0; i < num_pos; i++) { posits[i] = InStream_Read_C32(instream); starts[i] = InStream_Read_C32(instream); ends[i] = InStream_Read_C32(instream); @@ -142,7 +144,7 @@ TV_Equals_IMP(TermVector *self, Obj *other) { int32_t *const other_posits = I32Arr_IVARS(ovars->positions)->ints; int32_t *const other_starts = I32Arr_IVARS(ovars->start_offsets)->ints; int32_t *const other_ends = I32Arr_IVARS(ovars->start_offsets)->ints; - for (uint32_t i = 0; i < ivars->num_pos; i++) { + for (size_t i = 0; i < ivars->num_pos; i++) { if (posits[i] != other_posits[i]) { return false; } if (starts[i] != other_starts[i]) { return false; } if (ends[i] != other_ends[i]) { return false; } diff --git a/core/Lucy/Index/TermVector.cfh b/core/Lucy/Index/TermVector.cfh index 4242008f5..5e6eb7d31 100644 --- a/core/Lucy/Index/TermVector.cfh +++ b/core/Lucy/Index/TermVector.cfh @@ -24,7 +24,7 @@ class Lucy::Index::TermVector nickname TV String *field; String *text; - uint32_t num_pos; + size_t num_pos; I32Array *positions; I32Array *start_offsets; I32Array *end_offsets; diff --git a/core/Lucy/Search/SeriesMatcher.c b/core/Lucy/Search/SeriesMatcher.c index a36d8204c..4d7231bc0 100644 --- a/core/Lucy/Search/SeriesMatcher.c +++ b/core/Lucy/Search/SeriesMatcher.c @@ -71,7 +71,7 @@ SeriesMatcher_Advance_IMP(SeriesMatcher *self, int32_t target) { uint32_t next_offset = ivars->tick + 1 == ivars->num_matchers ? INT32_MAX - : I32Arr_Get(ivars->offsets, ivars->tick + 1); + : (uint32_t)I32Arr_Get(ivars->offsets, (size_t)(ivars->tick + 1)); ivars->current_matcher = (Matcher*)Vec_Fetch(ivars->matchers, ivars->tick); ivars->current_offset = ivars->next_offset; diff --git a/core/Lucy/Search/TermQuery.c b/core/Lucy/Search/TermQuery.c index 900303686..397f865dd 100644 --- a/core/Lucy/Search/TermQuery.c +++ b/core/Lucy/Search/TermQuery.c @@ -303,7 +303,7 @@ TermCompiler_Highlight_Spans_IMP(TermCompiler *self, Searcher *searcher, starts = TV_Get_Start_Offsets(term_vector); ends = TV_Get_End_Offsets(term_vector); - for (uint32_t i = 0, max = I32Arr_Get_Size(starts); i < max; i++) { + for (size_t i = 0, max = I32Arr_Get_Size(starts); i < max; i++) { int32_t start = I32Arr_Get(starts, i); int32_t length = I32Arr_Get(ends, i) - start; Vec_Push(spans, diff --git a/core/Lucy/Test/Object/TestI32Array.c b/core/Lucy/Test/Object/TestI32Array.c index 7661d7cd8..7236d7bb8 100644 --- a/core/Lucy/Test/Object/TestI32Array.c +++ b/core/Lucy/Test/Object/TestI32Array.c @@ -49,16 +49,16 @@ test_all(TestBatchRunner *runner) { break; } } - TEST_INT_EQ(runner, num_matched, num_ints, - "Matched all source ints with Get()"); + TEST_UINT_EQ(runner, num_matched, num_ints, + "Matched all source ints with Get()"); for (num_matched = 0; num_matched < num_ints; num_matched++) { if (source_ints[num_matched] != I32Arr_Get(stolen, num_matched)) { break; } } - TEST_INT_EQ(runner, num_matched, num_ints, - "Matched all source ints in stolen I32Array with Get()"); + TEST_UINT_EQ(runner, num_matched, num_ints, + "Matched all source ints in stolen I32Array with Get()"); DECREF(i32_array); DECREF(stolen); diff --git a/core/Lucy/Test/Search/TestSeriesMatcher.c b/core/Lucy/Test/Search/TestSeriesMatcher.c index 7becce430..d0ec08e6c 100644 --- a/core/Lucy/Test/Search/TestSeriesMatcher.c +++ b/core/Lucy/Test/Search/TestSeriesMatcher.c @@ -31,14 +31,13 @@ TestSeriesMatcher_new() { static SeriesMatcher* S_make_series_matcher(I32Array *doc_ids, I32Array *offsets, int32_t doc_max) { - int32_t num_doc_ids = I32Arr_Get_Size(doc_ids); - int32_t num_matchers = I32Arr_Get_Size(offsets); + size_t num_doc_ids = I32Arr_Get_Size(doc_ids); + size_t num_matchers = I32Arr_Get_Size(offsets); Vector *matchers = Vec_new(num_matchers); - int32_t tick = 0; - int32_t i; + size_t tick = 0; // Divvy up doc_ids by segment into BitVectors. - for (i = 0; i < num_matchers; i++) { + for (size_t i = 0; i < num_matchers; i++) { int32_t offset = I32Arr_Get(offsets, i); int32_t max = i == num_matchers - 1 ? doc_max + 1 @@ -83,16 +82,16 @@ S_do_test_matrix(TestBatchRunner *runner, int32_t doc_max, int32_t first_doc_id, = S_generate_match_list(0, doc_max, offset_inc); SeriesMatcher *series_matcher = S_make_series_matcher(doc_ids, offsets, doc_max); - uint32_t num_in_agreement = 0; + size_t num_in_agreement = 0; int32_t got; while (0 != (got = SeriesMatcher_Next(series_matcher))) { if (got != I32Arr_Get(doc_ids, num_in_agreement)) { break; } num_in_agreement++; } - TEST_INT_EQ(runner, num_in_agreement, I32Arr_Get_Size(doc_ids), - "doc_max=%d first_doc_id=%d doc_inc=%d offset_inc=%d", - doc_max, first_doc_id, doc_inc, offset_inc); + TEST_UINT_EQ(runner, num_in_agreement, I32Arr_Get_Size(doc_ids), + "doc_max=%d first_doc_id=%d doc_inc=%d offset_inc=%d", + doc_max, first_doc_id, doc_inc, offset_inc); DECREF(doc_ids); DECREF(offsets); From c07e85462f3d763fa5b2cbfc291eb03c5d52ebe2 Mon Sep 17 00:00:00 2001 From: Marvin Humphrey Date: Wed, 6 Apr 2016 15:28:24 -0700 Subject: [PATCH 7/7] Adapt Go bindings for I32Array size change. --- go/lucy/lucy.go | 2 +- go/lucy/object.go | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/go/lucy/lucy.go b/go/lucy/lucy.go index e92540d5c..138b599c8 100644 --- a/go/lucy/lucy.go +++ b/go/lucy/lucy.go @@ -386,7 +386,7 @@ func fetchEntry(ivars *C.lucy_InverterIVARS, fieldGo string) *C.lucy_InverterEnt func readDocPolyDR(pdr *C.lucy_PolyDocReader, docID int32, doc interface{}) error { ivars := C.lucy_PolyDocReader_IVARS(pdr) segTick := C.lucy_PolyReader_sub_tick(ivars.offsets, C.int32_t(docID)) - offset := C.LUCY_I32Arr_Get(ivars.offsets, segTick) + offset := C.LUCY_I32Arr_Get(ivars.offsets, C.size_t(segTick)) defDocReader := (*C.lucy_DefaultDocReader)(C.CFISH_Vec_Fetch(ivars.readers, C.size_t(segTick))) if (defDocReader == nil) { return clownfish.NewErr(fmt.Sprintf("Invalid docID: %d", docID)) diff --git a/go/lucy/object.go b/go/lucy/object.go index b8585a4fc..356f255cf 100644 --- a/go/lucy/object.go +++ b/go/lucy/object.go @@ -40,12 +40,12 @@ func (bv *BitVectorIMP) ToArray() []bool { func NewI32Array(nums []int32) I32Array { size := len(nums) - if int(C.uint32_t(size)) != size { + if int(C.size_t(size)) != size { panic(clownfish.NewErr("input too large")) } - obj := C.lucy_I32Arr_new_blank(C.uint32_t(size)) + obj := C.lucy_I32Arr_new_blank(C.size_t(size)) for i := 0; i < size; i++ { - C.LUCY_I32Arr_Set(obj, C.uint32_t(i), C.int32_t(nums[i])) + C.LUCY_I32Arr_Set(obj, C.size_t(i), C.int32_t(nums[i])) } return WRAPI32Array(unsafe.Pointer(obj)) } @@ -54,7 +54,7 @@ func i32ArrayToSlice(a *C.lucy_I32Array) []int32 { size := int(C.LUCY_I32Arr_Get_Size(a)) nums := make([]int32, size) for i := 0; i < size; i++ { - nums[i] = int32(C.LUCY_I32Arr_Get(a, C.uint32_t(i))) + nums[i] = int32(C.LUCY_I32Arr_Get(a, C.size_t(i))) } return nums }