Merge pull request #3533 from cjnolet/fea-08-sparse-setting

[REVIEW] Compressed Sparse __setitem__
cupy · Sep 17, 2020 · 69e88ca · 69e88ca
1 parent e9d7ef3
commit 69e88ca
Show file tree

Hide file tree

Showing 4 changed files with 695 additions and 24 deletions.
diff --git a/cupyx/scipy/sparse/_index.py b/cupyx/scipy/sparse/_index.py
@@ -94,25 +94,29 @@ def _get_csr_submatrix_minor_axis(Ax, Aj, Ap, start, stop):
 
 def _csr_row_index(Ax, Aj, Ap, rows):
     """Populate indices and data arrays from the given row index
-
     Args:
         Ax (cupy.ndarray): data array from input sparse matrix
         Aj (cupy.ndarray): indices array from input sparse matrix
         Ap (cupy.ndarray): indptr array from input sparse matrix
         rows (cupy.ndarray): index array of rows to populate
-
     Returns:
         Bx (cupy.ndarray): data array of output sparse matrix
         Bj (cupy.ndarray): indices array of output sparse matrix
         Bp (cupy.ndarray): indptr array for output sparse matrix
-
     """
     row_nnz = cupy.diff(Ap)
     Bp = cupy.empty(rows.size + 1, dtype=Ap.dtype)
     Bp[0] = 0
     cupy.cumsum(row_nnz[rows], out=Bp[1:])
     nnz = int(Bp[-1])
 
+    out_rows = _csr_indptr_to_coo_rows(nnz, Bp)
+
+    Bj, Bx = _csr_row_index_ker(out_rows, rows, Ap, Aj, Ax, Bp)
+    return Bx, Bj, Bp
+
+
+def _csr_indptr_to_coo_rows(nnz, Bp):
     out_rows = cupy.empty(nnz, dtype=numpy.int32)
 
     # Build a COO row array from output CSR indptr.
@@ -123,13 +127,149 @@ def _csr_row_index(Ax, Aj, Ap, rows):
         handle, Bp.data.ptr, nnz, Bp.size-1, out_rows.data.ptr,
         cusparse.CUSPARSE_INDEX_BASE_ZERO)
 
-    Bj, Bx = _csr_row_index_ker(out_rows, rows, Ap, Aj, Ax, Bp)
-    return Bx, Bj, Bp
+    return out_rows
+
+
+def _select_last_indices(i, j, x, idx_dtype):
+    """Find the unique indices for each row and keep only the last"""
+    i = cupy.asarray(i, dtype=idx_dtype)
+    j = cupy.asarray(j, dtype=idx_dtype)
+
+    stacked = cupy.stack([j, i])
+    order = cupy.lexsort(stacked).astype(idx_dtype)
+
+    indptr_inserts = i[order]
+    indices_inserts = j[order]
+    data_inserts = x[order]
+
+    mask = cupy.ones(indptr_inserts.size, dtype='bool')
+    _unique_mask_kern(indptr_inserts, indices_inserts, order, mask,
+                      size=indptr_inserts.size-1)
+
+    return indptr_inserts[mask], indices_inserts[mask], data_inserts[mask]
+
+
+_insert_many_populate_arrays = core.ElementwiseKernel(
+    '''raw I insert_indices, raw T insert_values, raw I insertion_indptr,
+        raw I Ap, raw I Aj, raw T Ax, raw I Bp''',
+    'raw I Bj, raw T Bx', '''
+
+        const I input_row_start = Ap[i];
+        const I input_row_end = Ap[i+1];
+        const I input_count = input_row_end - input_row_start;
+
+        const I insert_row_start = insertion_indptr[i];
+        const I insert_row_end = insertion_indptr[i+1];
+        const I insert_count = insert_row_end - insert_row_start;
+
+        I input_offset = 0;
+        I insert_offset = 0;
+
+        I output_n = Bp[i];
+
+        I cur_existing_index = -1;
+        T cur_existing_value = -1;
+
+        I cur_insert_index = -1;
+        T cur_insert_value = -1;
+
+        if(input_offset < input_count) {
+            cur_existing_index = Aj[input_row_start+input_offset];
+            cur_existing_value = Ax[input_row_start+input_offset];
+        }
+
+        if(insert_offset < insert_count) {
+            cur_insert_index = insert_indices[insert_row_start+insert_offset];
+            cur_insert_value = insert_values[insert_row_start+insert_offset];
+        }
+
+
+        for(I jj = 0; jj < input_count + insert_count; jj++) {
+
+            // if we have both available, use the lowest one.
+            if(input_offset < input_count &&
+               insert_offset < insert_count) {
+
+                if(cur_existing_index < cur_insert_index) {
+                    Bj[output_n] = cur_existing_index;
+                    Bx[output_n] = cur_existing_value;
+
+                    ++input_offset;
+
+                    if(input_offset < input_count) {
+                        cur_existing_index = Aj[input_row_start+input_offset];
+                        cur_existing_value = Ax[input_row_start+input_offset];
+                    }
+
+
+                } else {
+                    Bj[output_n] = cur_insert_index;
+                    Bx[output_n] = cur_insert_value;
+
+                    ++insert_offset;
+                    if(insert_offset < insert_count) {
+                        cur_insert_index =
+                            insert_indices[insert_row_start+insert_offset];
+                        cur_insert_value =
+                            insert_values[insert_row_start+insert_offset];
+                    }
+                }
+
+            } else if(input_offset < input_count) {
+                Bj[output_n] = cur_existing_index;
+                Bx[output_n] = cur_existing_value;
+
+                ++input_offset;
+                if(input_offset < input_count) {
+                    cur_existing_index = Aj[input_row_start+input_offset];
+                    cur_existing_value = Ax[input_row_start+input_offset];
+                }
+
+            } else {
+                    Bj[output_n] = cur_insert_index;
+                    Bx[output_n] = cur_insert_value;
+
+                    ++insert_offset;
+                    if(insert_offset < insert_count) {
+                        cur_insert_index =
+                            insert_indices[insert_row_start+insert_offset];
+                        cur_insert_value =
+                            insert_values[insert_row_start+insert_offset];
+                    }
+            }
+
+            output_n++;
+        }
+    ''', 'csr_copy_existing_indices_kern', no_return=True)
+
+
+# Create a filter mask based on the lowest value of order
+_unique_mask_kern = core.ElementwiseKernel(
+    '''raw I rows, raw I cols, raw I order''',
+    '''raw bool mask''',
+    """
+    I cur_row = rows[i];
+    I next_row = rows[i+1];
+
+    I cur_col = cols[i];
+    I next_col = cols[i+1];
+
+    I cur_order = order[i];
+    I next_order = order[i+1];
+
+    if(cur_row == next_row && cur_col == next_col) {
+        if(cur_order < next_order)
+            mask[i] = false;
+        else
+            mask[i+1] = false;
+    }
+    """, no_return=True
+)
 
 
 def _csr_sample_values(n_row, n_col,
                        Ap, Aj, Ax,
-                       Bi, Bj):
+                       Bi, Bj, not_found_val=0):
     """Populate data array for a set of rows and columns
     Args
         n_row : total number of rows in input array
@@ -146,28 +286,31 @@ def _csr_sample_values(n_row, n_col,
     Bi[Bi < 0] += n_row
     Bj[Bj < 0] += n_col
 
-    Bx = cupy.empty(Bi.size, dtype=Ax.dtype)
-    _csr_sample_values_kern(n_row, n_col,
-                            Ap, Aj, Ax,
-                            Bi, Bj, Bx, size=Bi.size)
-
-    return Bx
+    return _csr_sample_values_kern(n_row, n_col,
+                                   Ap, Aj, Ax,
+                                   Bi, Bj,
+                                   not_found_val,
+                                   size=Bi.size)
 
 
 _csr_sample_values_kern = core.ElementwiseKernel(
-    '''I n_row, I n_col, raw I Ap, raw I Aj, raw T Ax, raw I Bi, raw I Bj''',
+    '''I n_row, I n_col, raw I Ap, raw I Aj, raw T Ax,
+    raw I Bi, raw I Bj, I not_found_val''',
     'raw T Bx', '''
     const I j = Bi[i]; // sample row
     const I k = Bj[i]; // sample column
     const I row_start = Ap[j];
     const I row_end   = Ap[j+1];
     T x = 0;
+    bool val_found = false;
     for(I jj = row_start; jj < row_end; jj++) {
-        if (Aj[jj] == k)
+        if (Aj[jj] == k) {
             x += Ax[jj];
+            val_found = true;
+        }
     }
-    Bx[i] = x;
-''', 'csr_sample_values_kern', no_return=True)
+    Bx[i] = val_found ? x : not_found_val;
+''', 'csr_sample_values_kern')
 
 
 class IndexMixin(object):
@@ -227,6 +370,57 @@ def __getitem__(self, key):
             return self.__class__(cupy.atleast_2d(row).shape, dtype=self.dtype)
         return self._get_arrayXarray(row, col)
 
+    def __setitem__(self, key, x):
+        row, col = self._parse_indices(key)
+
+        if isinstance(row, _int_scalar_types) and\
+                isinstance(col, _int_scalar_types):
+            x = cupy.asarray(x, dtype=self.dtype)
+            if x.size != 1:
+                raise ValueError('Trying to assign a sequence to an item')
+            self._set_intXint(row, col, x.flat[0])
+            return
+
+        if isinstance(row, slice):
+            row = cupy.arange(*row.indices(self.shape[0]))[:, None]
+        else:
+            row = cupy.atleast_1d(row)
+
+        if isinstance(col, slice):
+            col = cupy.arange(*col.indices(self.shape[1]))[None, :]
+            if row.ndim == 1:
+                row = row[:, None]
+        else:
+            col = cupy.atleast_1d(col)
+
+        i, j = cupy.broadcast_arrays(row, col)
+        if i.shape != j.shape:
+            raise IndexError('number of row and column indices differ')
+
+        if isspmatrix(x):
+            if i.ndim == 1:
+                # Inner indexing, so treat them like row vectors.
+                i = i[None]
+                j = j[None]
+            broadcast_row = x.shape[0] == 1 and i.shape[0] != 1
+            broadcast_col = x.shape[1] == 1 and i.shape[1] != 1
+            if not ((broadcast_row or x.shape[0] == i.shape[0]) and
+                    (broadcast_col or x.shape[1] == i.shape[1])):
+                raise ValueError('shape mismatch in assignment')
+            if x.size == 0:
+                return
+            x = x.tocoo(copy=True)
+            x.sum_duplicates()
+            self._set_arrayXarray_sparse(i, j, x)
+        else:
+            # Make x and i into the same shape
+            x = cupy.asarray(x, dtype=self.dtype)
+            x, _ = cupy.broadcast_arrays(x, i)
+            if x.size == 0:
+                return
+            x = x.reshape(i.shape)
+            self._set_arrayXarray(i, j, x)
+
     def _is_scalar(self, index):
         if isinstance(index, (cupy.ndarray, numpy.ndarray)) and \
                 index.ndim == 0 and index.size == 1:
@@ -237,15 +431,15 @@ def _parse_indices(self, key):
         M, N = self.shape
         row, col = _unpack_index(key)
 
-        # Scipy calls sputils.isintlike() rather than
-        # isinstance(x, _int_scalar_types). Comparing directly to int
-        # here to minimize the impact of nested exception catching
-
         if self._is_scalar(row):
             row = row.item()
         if self._is_scalar(col):
             col = col.item()
 
+        # Scipy calls sputils.isintlike() rather than
+        # isinstance(x, _int_scalar_types). Comparing directly to int
+        # here to minimize the impact of nested exception catching
+
         if isinstance(row, _int_scalar_types):
             row = _normalize_index(row, M, 'row')
         elif not isinstance(row, slice):