diff --git a/numba/_helpermod.c b/numba/_helpermod.c index 33966e5dce6..971bd9a9278 100644 --- a/numba/_helpermod.c +++ b/numba/_helpermod.c @@ -149,6 +149,7 @@ build_c_helpers_dict(void) declmethod(list_getitem); declmethod(list_append); declmethod(list_pop); + declmethod(list_delete_slice); declmethod(list_iter_sizeof); declmethod(list_iter); declmethod(list_iter_next); diff --git a/numba/_listobject.c b/numba/_listobject.c index 056fe0c924e..4982503877d 100644 --- a/numba/_listobject.c +++ b/numba/_listobject.c @@ -32,8 +32,9 @@ * - Setting an item numba_list_getitem * - Poping an item numba_list_pop * - Resizing the list numba_list_resize + * - Deleting a slice numba_list_delete_slice * - * As you can see, no functions related to slicing have been implemented. This + * As you can see, only a single function for slices is implemented. The rest * is all done entirely at the compiler level which then calls the c functions * to mutate the list accordingly. Since slicing allows for replace, insert and * delete operations over multiple items, we can simply implement those using @@ -384,6 +385,104 @@ numba_list_resize(NB_List *lp, Py_ssize_t newsize) { return LIST_OK; } +/* Delete a slice + * + * start: the start index of ths slice + * stop: the stop index of the slice (not included) + * step: the step to take + * + * This function assumes that the start and stop were clipped appropriately. + * I.e. if step > 0 start >= 0 and stop <= len(l) and + * if step < 0 start <= length and stop >= -1 + * step != 0 and no Python negative indexing allowed. + * + * This code was copied and edited from the relevant section in + * list_ass_subscript from the cpython implementation. + */ +int +numba_list_delete_slice(NB_List *lp, + Py_ssize_t start, Py_ssize_t stop, Py_ssize_t step) { + int result, i, slicelength, new_length; + char *loc, *new_loc; + Py_ssize_t leftover_bytes, cur, lim; + // calculate the slicelength, taken from PySlice_AdjustIndices + if (step > 0) { + slicelength = start < stop ? (stop - start - 1) / step + 1 : 0; + } else { + slicelength = stop < start ? (start - stop - 1) / -step + 1 : 0; + } + if (slicelength <= 0){ + return LIST_OK; + } + new_length = lp->size - slicelength; + // reverse step and indices + if (step < 0) { + stop = start + 1; + start = stop + step * (slicelength - 1) - 1; + step = -step; + } + if (step == 1) { + // decref if needed + if (lp->methods.item_decref) { + for (i = start ; i < stop ; i++){ + loc = lp->items + lp->item_size * i; + lp->methods.item_decref(loc); + } + } + // memove items into place + leftover_bytes = (lp->size - stop) * lp->item_size; + loc = lp->items + lp->item_size * start; + new_loc = lp->items + lp->item_size * stop; + memmove(loc, new_loc, leftover_bytes); + } + else { // step != 1 + /* drawing pictures might help understand these for + * loops. Basically, we memmove the parts of the + * list that are *not* part of the slice: step-1 + * items for each item that is part of the slice, + * and then tail end of the list that was not + * covered by the slice + * + * */ + for (cur = start, // index of item to be deleted + i = 0; // counter of total items deleted so far + cur < stop; + cur += step, + i++) { + lim = step - 1; // number of leftover items after deletion of item + // clip limit, in case we are at the end of the slice, and there + // are now less than step-1 items to be moved + if (cur + step >= lp->size) { + lim = lp->size - cur - 1; + } + // decref item being removed + loc = lp->items + lp->item_size * cur; + list_decref_item(lp, loc); + /* memmove the aforementiond step-1 (or less) items + * dst : index of deleted item minus total deleted sofar + * src : index of deleted item plus one (next item) + */ + memmove(lp->items + lp->item_size * (cur - i), + lp->items + lp->item_size * (cur + 1), + lim * lp->item_size); + } + // memmove tail of the list + cur = start + slicelength * step; + if (cur < lp->size) { + memmove(lp->items + lp->item_size * (cur - slicelength), + lp->items + lp->item_size * cur, + (lp->size - cur) * lp->item_size); + } + } + // resize to correct size + result = numba_list_resize(lp, new_length); + if(result < LIST_OK) { + // Since we are decreasing the size, this should never happen + return result; + } + return LIST_OK; +} + /* Return the size of the list iterator (NB_ListIter) struct. */ @@ -446,6 +545,7 @@ numba_test_list(void) { NB_ListIter iter; char got_item[4] = "\x00\x00\x00\x00"; const char *test_items_1 = NULL, *test_items_2 = NULL; + char *test_items_3 = NULL; puts("test_list"); @@ -610,11 +710,156 @@ numba_test_list(void) { case 1: CHECK(lp->allocated == 0); break; } } + // free existing list + numba_list_free(lp); + + + // Setup list for testing delete_slice + status = numba_list_new(&lp, 1, 0); + CHECK(status == LIST_OK); + CHECK(lp->item_size == 1); + CHECK(lp->size == 0); + CHECK(lp->allocated == 0); + for (i = 0; i < 17 ; i++) { + status = numba_list_append(lp, (const char*)&i); + CHECK(status == LIST_OK); + } + CHECK(lp->size == 17); + test_items_3 = "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f\x10"; + CHECK(memcmp(lp->items, test_items_3, 17) == 0); + + // delete multiple elements from the middle + status = numba_list_delete_slice(lp, 2, 5, 1); + CHECK(status == LIST_OK); + CHECK(lp->size == 14); + test_items_3 = "\x00\x01\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f\x10"; + CHECK(memcmp(lp->items, test_items_3, 14) == 0); + + // delete single element from start + status = numba_list_delete_slice(lp, 0, 1, 1); + CHECK(status == LIST_OK); + CHECK(lp->size == 13); + test_items_3 = "\x01\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f\x10"; + CHECK(memcmp(lp->items, test_items_3, 13) == 0); + + // delete single element from end + status = numba_list_delete_slice(lp, 12, 13, 1); + CHECK(status == LIST_OK); + CHECK(lp->size == 12); + test_items_3 = "\x01\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"; + CHECK(memcmp(lp->items, test_items_3, 12) == 0); + + // delete single element from middle + status = numba_list_delete_slice(lp, 4, 5, 1); + CHECK(status == LIST_OK); + CHECK(lp->size == 11); + test_items_3 = "\x01\x05\x06\x07\x09\x0a\x0b\x0c\x0d\x0e\x0f"; + CHECK(memcmp(lp->items, test_items_3, 11) == 0); + + // delete all elements except first and last + status = numba_list_delete_slice(lp, 1, 10, 1); + CHECK(status == LIST_OK); + CHECK(lp->size == 2); + test_items_3 = "\x01\x0f"; + CHECK(memcmp(lp->items, test_items_3, 2) == 0); + + // delete all remaining elements + status = numba_list_delete_slice(lp, 0, lp->size, 1); + CHECK(status == LIST_OK); + CHECK(lp->size == 0); + test_items_3 = ""; + CHECK(memcmp(lp->items, test_items_3, 0) == 0); + + // free existing list + numba_list_free(lp); + + // Setup list for testing delete_slice with non unary step + status = numba_list_new(&lp, 1, 0); + CHECK(status == LIST_OK); + CHECK(lp->item_size == 1); + CHECK(lp->size == 0); + CHECK(lp->allocated == 0); + for (i = 0; i < 17 ; i++) { + status = numba_list_append(lp, (const char*)&i); + CHECK(status == LIST_OK); + } + CHECK(lp->size == 17); + + // delete all items with odd index + status = numba_list_delete_slice(lp, 0, 17, 2); + CHECK(status == LIST_OK); + CHECK(lp->size == 8); + test_items_3 = "\x01\x03\x05\x07\x09\x0b\x0d\x0f"; + CHECK(memcmp(lp->items, test_items_3, 8) == 0); + + // delete with a step of 4, starting at index 1 + status = numba_list_delete_slice(lp, 1, 8, 4); + CHECK(status == LIST_OK); + CHECK(lp->size == 6); + test_items_3 = "\x01\x05\x07\x09\x0d\x0f"; + CHECK(memcmp(lp->items, test_items_3, 6) == 0); + + // delete with a step of 2, but finish before end of list + status = numba_list_delete_slice(lp, 0, 4, 2); + CHECK(status == LIST_OK); + CHECK(lp->size == 4); + test_items_3 = "\x05\x09\x0d\x0f"; + CHECK(memcmp(lp->items, test_items_3, 4) == 0); + + // no-op on empty slice + status = numba_list_delete_slice(lp, 0, 0, 1); + CHECK(status == LIST_OK); + CHECK(lp->size == 4); + test_items_3 = "\x05\x09\x0d\x0f"; + CHECK(memcmp(lp->items, test_items_3, 4) == 0); + + // no-op on empty slice, non-zero index + status = numba_list_delete_slice(lp, 2, 2, 1); + CHECK(status == LIST_OK); + CHECK(lp->size == 4); + test_items_3 = "\x05\x09\x0d\x0f"; + CHECK(memcmp(lp->items, test_items_3, 4) == 0); + + // free list and return 0 + numba_list_free(lp); + + // Setup list for testing delete_slice with negative step + status = numba_list_new(&lp, 1, 0); + CHECK(status == LIST_OK); + CHECK(lp->item_size == 1); + CHECK(lp->size == 0); + CHECK(lp->allocated == 0); + for (i = 0; i < 17 ; i++) { + status = numba_list_append(lp, (const char*)&i); + CHECK(status == LIST_OK); + } + CHECK(lp->size == 17); + + // delete all items using unary negative slice + status = numba_list_delete_slice(lp, 16, -1, -1); + CHECK(status == LIST_OK); + CHECK(lp->size == 0); + + // refill list + for (i = 0; i < 17 ; i++) { + status = numba_list_append(lp, (const char*)&i); + CHECK(status == LIST_OK); + } + + // delete all items using unary negative slice + // need to start at index of last item (16) and + // go beyond first item, i.e. -1 in Cd + status = numba_list_delete_slice(lp, 16, -1, -2); + CHECK(status == LIST_OK); + CHECK(lp->size == 8); + test_items_3 = "\x01\x03\x05\x07\x09\x0b\x0d\x0f"; + CHECK(memcmp(lp->items, test_items_3, 8) == 0); // free list and return 0 numba_list_free(lp); return 0; + } #undef CHECK diff --git a/numba/_listobject.h b/numba/_listobject.h index d25b72d501d..a530d6fcf91 100644 --- a/numba/_listobject.h +++ b/numba/_listobject.h @@ -85,6 +85,10 @@ numba_list_pop(NB_List *lp, Py_ssize_t index, char *out); NUMBA_EXPORT_FUNC(int) numba_list_resize(NB_List *lp, Py_ssize_t newsize); +NUMBA_EXPORT_FUNC(int) +numba_list_delete_slice(NB_List *lp, + Py_ssize_t start, Py_ssize_t stop, Py_ssize_t step); + NUMBA_EXPORT_FUNC(size_t) numba_list_iter_sizeof(void); diff --git a/numba/listobject.py b/numba/listobject.py index f6920929b4d..e156688ae75 100644 --- a/numba/listobject.py +++ b/numba/listobject.py @@ -739,6 +739,38 @@ def impl(l, index=-1): raise TypingError("argument for pop must be a signed integer") +@intrinsic +def _list_delete_slice(typingctx, l, start, stop, step): + """Wrap numba_list_delete_slice + """ + resty = types.int32 + sig = resty(l, start, stop, step) + + def codegen(context, builder, sig, args): + fnty = ir.FunctionType( + ll_status, + [ll_list_type, ll_ssize_t, ll_ssize_t, ll_ssize_t], + ) + [l, start, stop, step] = args + [tl, tstart, tstop, tstep] = sig.args + fn = builder.module.get_or_insert_function(fnty, + name='numba_list_delete_slice') + + lp = _list_get_data(context, builder, tl, l) + status = builder.call( + fn, + [ + lp, + start, + stop, + step, + ], + ) + return status + + return sig, codegen + + @overload(operator.delitem) def impl_delitem(l, index): if not isinstance(l, types.ListType): @@ -752,8 +784,11 @@ def integer_impl(l, index): elif isinstance(index, types.SliceType): def slice_impl(l, index): - raise NotImplementedError - + slice_range = handle_slice(l, index) + _list_delete_slice(l, + slice_range.start, + slice_range.stop, + slice_range.step) return slice_impl else: diff --git a/numba/tests/test_listimpl.py b/numba/tests/test_listimpl.py index 64147615b3b..9973df81dfd 100644 --- a/numba/tests/test_listimpl.py +++ b/numba/tests/test_listimpl.py @@ -54,6 +54,9 @@ def __getitem__(self, i): def __iter__(self): return ListIter(self) + def __delitem__(self, i): + self.list_delitem(i) + def append(self, item): self.list_append(item) @@ -108,6 +111,18 @@ def list_pop(self, i): self.tc.assertEqual(status, LIST_OK) return item_out_buffer.raw + def list_delitem(self, i): + # special case slice + if isinstance(i, slice): + status = self.tc.numba_list_delete_slice(self.lp, + i.start, + i.stop, + i.step) + self.tc.assertEqual(status, LIST_OK) + # must be an intger, deferr to pop + else: + self.list_pop(i) + def list_iter(self, itptr): self.tc.numba_list_iter(itptr, self.lp) @@ -204,6 +219,15 @@ def wrap(name, restype, argtypes=()): ctypes.c_int, [list_t, ctypes.c_ssize_t, ctypes.c_char_p], ) + # numba_list_delete_slice(NB_List *l, + # Py_ssize_t start, + # Py_ssize_t stop, + # Py_ssize_t step) + self.numba_list_delete_slice = wrap( + 'list_delete_slice', + ctypes.c_int, + [list_t, ctypes.c_ssize_t, ctypes.c_ssize_t, ctypes.c_ssize_t], + ) # numba_list_iter_sizeof() self.numba_list_iter_sizeof = wrap( 'list_iter_sizeof', @@ -333,6 +357,34 @@ def test_pop_byte(self): received = [j for j in l] self.assertEqual(received, expected) + def test_delete_slice(self): + l = List(self, 1, 0) + values = [b'a', b'b', b'c', b'd', b'e', b'f', b'g', b'h'] + for i in values: + l.append(i) + self.assertEqual(len(l), 8) + + # delete every second item + # no slice default normalization here, be explict about start anb stop + del l[0:8:2] + self.assertEqual(len(l), 4) + self.assertEqual(list(l), values[1:8:2]) + + # delete first item + del l[0:1:1] + self.assertEqual(len(l), 3) + self.assertEqual(list(l), [b'd', b'f', b'h']) + + # delete last item + del l[2:3:1] + self.assertEqual(len(l), 2) + self.assertEqual(list(l), [b'd', b'f']) + + # delete all left items + del l[0:2:1] + self.assertEqual(len(l), 0) + self.assertEqual(list(l), []) + def check_sizing(self, item_size, nmax): # Helper to verify different item_sizes l = List(self, item_size, 0) diff --git a/numba/tests/test_listobject.py b/numba/tests/test_listobject.py index d8672f3b6e1..541aa16c9cd 100644 --- a/numba/tests/test_listobject.py +++ b/numba/tests/test_listobject.py @@ -672,7 +672,77 @@ class TestListObjectDelitem(MemoryLeakMixin, TestCase): """Test list delitem. """ - def test_list_delitem(self): + def test_list_singleton_delitem_index(self): + + @njit + def foo(): + l = listobject.new_list(int32) + l.append(0) + del l[0] + return len(l) + self.assertEqual(foo(), 0) + + def test_list_singleton_delitem_slice_defaults(self): + + @njit + def foo(): + l = listobject.new_list(int32) + l.append(0) + del l[:] + return len(l) + self.assertEqual(foo(), 0) + + def test_list_singleton_delitem_slice_start(self): + + @njit + def foo(): + l = listobject.new_list(int32) + l.append(0) + del l[0:] + return len(l) + self.assertEqual(foo(), 0) + + def test_list_singleton_delitem_slice_stop(self): + + @njit + def foo(): + l = listobject.new_list(int32) + l.append(0) + del l[:1] + return len(l) + self.assertEqual(foo(), 0) + + def test_list_singleton_delitem_slice_start_stop(self): + + @njit + def foo(): + l = listobject.new_list(int32) + l.append(0) + del l[0:1] + return len(l) + self.assertEqual(foo(), 0) + + def test_list_singleton_delitem_slice_start_step(self): + + @njit + def foo(): + l = listobject.new_list(int32) + l.append(0) + del l[0::1] + return len(l) + self.assertEqual(foo(), 0) + + def test_list_singleton_delitem_slice_start_stop_step(self): + + @njit + def foo(): + l = listobject.new_list(int32) + l.append(0) + del l[0:1:1] + return len(l) + self.assertEqual(foo(), 0) + + def test_list_multiple_delitem(self): @njit def foo(): @@ -683,10 +753,7 @@ def foo(): return len(l), l[0], l[1] self.assertEqual(foo(), (2, 11, 12)) - @unittest.expectedFailure - def test_list_delitem_slice(self): - # remove when tests no longer fails - self.disable_leak_check() + def test_list_multiple_delitem_slice(self): @njit def foo(): @@ -697,6 +764,22 @@ def foo(): return len(l) self.assertEqual(foo(), 0) + def test_list_multiple_delitem_off_by_one(self): + # this was exposing a nasty off-by-one error, leaving it in to detect + # and regressions + @njit + def foo(): + l = listobject.new_list(int32) + for j in range(10, 20): + l.append(j) + k = listobject.new_list(int32) + for j in range(10, 20): + k.append(j) + # should be a no-op + del l[-9:-20] + return k == l + self.assertTrue(foo()) + class TestContains(MemoryLeakMixin, TestCase): """Test list contains. """ diff --git a/numba/tests/test_typedlist.py b/numba/tests/test_typedlist.py index db3cd461a64..2dc2efcc9d1 100644 --- a/numba/tests/test_typedlist.py +++ b/numba/tests/test_typedlist.py @@ -292,6 +292,90 @@ def test_setitem_slice_value_error(self): str(raises.exception), ) + def test_delitem_slice(self): + """ Test delitem using a slice. + + This tests suffers from combinatorial explosion, so we parametrize it + and compare results agains the regular list in a quasi fuzzing approach. + + """ + + def setup(start=10, stop=20): + # initialize regular list + rl_ = list(range(start, stop)) + # intialize typed list + tl_ = List.empty_list(int32) + # populate typed list + for i in range(start, stop): + tl_.append(i) + # check they are the same + self.assertEqual(rl_, list(tl_)) + return rl_, tl_ + + def to_tl(l): + tl = List.empty_list(int32) + for k in l: + tl.append(k) + return tl + + # define the ranges + start_range = list(range(-20, 30)) + stop_range = list(range(-20, 30)) + step_range = [-5, -4, -3, -2, -1, 1, 2, 3, 4, 5] + + rl, tl = setup() + # check that they are the same initially + self.assertEqual(rl, list(tl)) + # check that deletion of the whole list by slice works + del rl[:] + del tl[:] + self.assertEqual(rl, list(tl)) + + # start only + for sa in start_range: + rl, tl = setup() + del rl[sa:] + del tl[sa:] + self.assertEqual(rl, list(tl)) + # stop only + for so in stop_range: + rl, tl = setup() + del rl[:so] + del tl[:so] + self.assertEqual(rl, list(tl)) + # step only + for se in step_range: + rl, tl = setup() + del rl[::se] + del tl[::se] + self.assertEqual(rl, list(tl)) + + # start and stop + for sa, so in product(start_range, stop_range): + rl, tl = setup() + del rl[sa:so] + del tl[sa:so] + self.assertEqual(rl, list(tl)) + # start and step + for sa, se in product(start_range, step_range): + rl, tl = setup() + del rl[sa::se] + del tl[sa::se] + self.assertEqual(rl, list(tl)) + # stop and step + for so, se in product(stop_range, step_range): + rl, tl = setup() + del rl[:so:se] + del tl[:so:se] + self.assertEqual(rl, list(tl)) + + # start, stop and step + for sa, so, se in product(start_range, stop_range, step_range): + rl, tl = setup() + del rl[sa:so:se] + del tl[sa:so:se] + self.assertEqual(rl, list(tl)) + class TestExtend(MemoryLeakMixin, TestCase): diff --git a/numba/typed/typedlist.py b/numba/typed/typedlist.py index cb1d457a926..44488a13606 100644 --- a/numba/typed/typedlist.py +++ b/numba/typed/typedlist.py @@ -64,6 +64,11 @@ def _pop(l, i): return l.pop(i) +@njit +def _delitem(l, i): + del l[i] + + @njit def _extend(l, iterable): return l.extend(iterable) @@ -204,7 +209,7 @@ def __contains__(self, item): return _contains(self, item) def __delitem__(self, i): - _pop(self, i) + _delitem(self, i) def insert(self, i, item): _insert(self, i, item)