-
-
Notifications
You must be signed in to change notification settings - Fork 833
/
coo.py
506 lines (416 loc) · 17.5 KB
/
coo.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
import numpy
try:
import scipy.sparse
_scipy_available = True
except ImportError:
_scipy_available = False
import cupy
from cupy import _core
from cupy import cusparse
from cupyx.scipy.sparse import base
from cupyx.scipy.sparse import csc
from cupyx.scipy.sparse import csr
from cupyx.scipy.sparse import data as sparse_data
from cupyx.scipy.sparse import _util
class coo_matrix(sparse_data._data_matrix):
"""COOrdinate format sparse matrix.
This can be instantiated in several ways.
``coo_matrix(D)``
``D`` is a rank-2 :class:`cupy.ndarray`.
``coo_matrix(S)``
``S`` is another sparse matrix. It is equivalent to ``S.tocoo()``.
``coo_matrix((M, N), [dtype])``
It constructs an empty matrix whose shape is ``(M, N)``. Default dtype
is float64.
``coo_matrix((data, (row, col)))``
All ``data``, ``row`` and ``col`` are one-dimenaional
:class:`cupy.ndarray`.
Args:
arg1: Arguments for the initializer.
shape (tuple): Shape of a matrix. Its length must be two.
dtype: Data type. It must be an argument of :class:`numpy.dtype`.
copy (bool): If ``True``, copies of given data are always used.
.. seealso::
:class:`scipy.sparse.coo_matrix`
"""
format = 'coo'
_sum_duplicates_diff = _core.ElementwiseKernel(
'raw T row, raw T col',
'T diff',
'''
T diff_out = 1;
if (i == 0 || row[i - 1] == row[i] && col[i - 1] == col[i]) {
diff_out = 0;
}
diff = diff_out;
''', 'cupyx_scipy_sparse_coo_sum_duplicates_diff')
def __init__(self, arg1, shape=None, dtype=None, copy=False):
if shape is not None and len(shape) != 2:
raise ValueError(
'Only two-dimensional sparse arrays are supported.')
if base.issparse(arg1):
x = arg1.asformat(self.format)
data = x.data
row = x.row
col = x.col
if arg1.format != self.format:
# When formats are differnent, all arrays are already copied
copy = False
if shape is None:
shape = arg1.shape
self.has_canonical_format = x.has_canonical_format
elif _util.isshape(arg1):
m, n = arg1
m, n = int(m), int(n)
data = cupy.zeros(0, dtype if dtype else 'd')
row = cupy.zeros(0, dtype='i')
col = cupy.zeros(0, dtype='i')
# shape and copy argument is ignored
shape = (m, n)
copy = False
self.has_canonical_format = True
elif _scipy_available and scipy.sparse.issparse(arg1):
# Convert scipy.sparse to cupyx.scipy.sparse
x = arg1.tocoo()
data = cupy.array(x.data)
row = cupy.array(x.row, dtype='i')
col = cupy.array(x.col, dtype='i')
copy = False
if shape is None:
shape = arg1.shape
self.has_canonical_format = x.has_canonical_format
elif isinstance(arg1, tuple) and len(arg1) == 2:
try:
data, (row, col) = arg1
except (TypeError, ValueError):
raise TypeError('invalid input format')
if not (base.isdense(data) and data.ndim == 1 and
base.isdense(row) and row.ndim == 1 and
base.isdense(col) and col.ndim == 1):
raise ValueError('row, column, and data arrays must be 1-D')
if not (len(data) == len(row) == len(col)):
raise ValueError(
'row, column, and data array must all be the same length')
self.has_canonical_format = False
elif base.isdense(arg1):
if arg1.ndim > 2:
raise TypeError('expected dimension <= 2 array or matrix')
dense = cupy.atleast_2d(arg1)
row, col = dense.nonzero()
data = dense[row, col]
shape = dense.shape
self.has_canonical_format = True
else:
raise TypeError('invalid input format')
if dtype is None:
dtype = data.dtype
else:
dtype = numpy.dtype(dtype)
if dtype != 'f' and dtype != 'd' and dtype != 'F' and dtype != 'D':
raise ValueError(
'Only float32, float64, complex64 and complex128'
' are supported')
data = data.astype(dtype, copy=copy)
row = row.astype('i', copy=copy)
col = col.astype('i', copy=copy)
if shape is None:
if len(row) == 0 or len(col) == 0:
raise ValueError(
'cannot infer dimensions from zero sized index arrays')
shape = (int(row.max()) + 1, int(col.max()) + 1)
if len(data) > 0:
if row.max() >= shape[0]:
raise ValueError('row index exceeds matrix dimensions')
if col.max() >= shape[1]:
raise ValueError('column index exceeds matrix dimensions')
if row.min() < 0:
raise ValueError('negative row index found')
if col.min() < 0:
raise ValueError('negative column index found')
sparse_data._data_matrix.__init__(self, data)
self.row = row
self.col = col
if not _util.isshape(shape):
raise ValueError('invalid shape (must be a 2-tuple of int)')
self._shape = int(shape[0]), int(shape[1])
def _with_data(self, data, copy=True):
"""Returns a matrix with the same sparsity structure as self,
but with different data. By default the index arrays
(i.e. .row and .col) are copied.
"""
if copy:
return coo_matrix(
(data, (self.row.copy(), self.col.copy())),
shape=self.shape, dtype=data.dtype)
else:
return coo_matrix(
(data, (self.row, self.col)), shape=self.shape,
dtype=data.dtype)
def diagonal(self, k=0):
"""Returns the k-th diagonal of the matrix.
Args:
k (int, optional): Which diagonal to get, corresponding to elements
a[i, i+k]. Default: 0 (the main diagonal).
Returns:
cupy.ndarray : The k-th diagonal.
"""
rows, cols = self.shape
if k <= -rows or k >= cols:
return cupy.empty(0, dtype=self.data.dtype)
diag = cupy.zeros(min(rows + min(k, 0), cols - max(k, 0)),
dtype=self.dtype)
diag_mask = (self.row + k) == self.col
if self.has_canonical_format:
row = self.row[diag_mask]
data = self.data[diag_mask]
else:
row, _, data = self._sum_duplicates(self.row[diag_mask],
self.col[diag_mask],
self.data[diag_mask])
diag[row + min(k, 0)] = data
return diag
def setdiag(self, values, k=0):
"""Set diagonal or off-diagonal elements of the array.
Args:
values (ndarray): New values of the diagonal elements. Values may
have any length. If the diagonal is longer than values, then
the remaining diagonal entries will not be set. If values are
longer than the diagonal, then the remaining values are
ignored. If a scalar value is given, all of the diagonal is set
to it.
k (int, optional): Which off-diagonal to set, corresponding to
elements a[i,i+k]. Default: 0 (the main diagonal).
"""
M, N = self.shape
if (k > 0 and k >= N) or (k < 0 and -k >= M):
raise ValueError("k exceeds matrix dimensions")
if values.ndim and not len(values):
return
idx_dtype = self.row.dtype
# Determine which triples to keep and where to put the new ones.
full_keep = self.col - self.row != k
if k < 0:
max_index = min(M + k, N)
if values.ndim:
max_index = min(max_index, len(values))
keep = cupy.logical_or(full_keep, self.col >= max_index)
new_row = cupy.arange(-k, -k + max_index, dtype=idx_dtype)
new_col = cupy.arange(max_index, dtype=idx_dtype)
else:
max_index = min(M, N - k)
if values.ndim:
max_index = min(max_index, len(values))
keep = cupy.logical_or(full_keep, self.row >= max_index)
new_row = cupy.arange(max_index, dtype=idx_dtype)
new_col = cupy.arange(k, k + max_index, dtype=idx_dtype)
# Define the array of data consisting of the entries to be added.
if values.ndim:
new_data = values[:max_index]
else:
new_data = cupy.empty(max_index, dtype=self.dtype)
new_data[:] = values
# Update the internal structure.
self.row = cupy.concatenate((self.row[keep], new_row))
self.col = cupy.concatenate((self.col[keep], new_col))
self.data = cupy.concatenate((self.data[keep], new_data))
self.has_canonical_format = False
def eliminate_zeros(self):
"""Removes zero entories in place."""
ind = self.data != 0
self.data = self.data[ind]
self.row = self.row[ind]
self.col = self.col[ind]
def get_shape(self):
"""Returns the shape of the matrix.
Returns:
tuple: Shape of the matrix.
"""
return self._shape
def getnnz(self, axis=None):
"""Returns the number of stored values, including explicit zeros."""
if axis is None:
return self.data.size
else:
raise ValueError
def get(self, stream=None):
"""Returns a copy of the array on host memory.
Args:
stream (cupy.cuda.Stream): CUDA stream object. If it is given, the
copy runs asynchronously. Otherwise, the copy is synchronous.
Returns:
scipy.sparse.coo_matrix: Copy of the array on host memory.
"""
if not _scipy_available:
raise RuntimeError('scipy is not available')
data = self.data.get(stream)
row = self.row.get(stream)
col = self.col.get(stream)
return scipy.sparse.coo_matrix(
(data, (row, col)), shape=self.shape)
def sum_duplicates(self):
"""Eliminate duplicate matrix entries by adding them together.
.. warning::
When sorting the indices, CuPy follows the convention of cuSPARSE,
which is different from that of SciPy. Therefore, the order of the
output indices may differ:
.. code-block:: python
>>> # 1 0 0
>>> # A = 1 1 0
>>> # 1 1 1
>>> data = cupy.array([1, 1, 1, 1, 1, 1], 'f')
>>> row = cupy.array([0, 1, 1, 2, 2, 2], 'i')
>>> col = cupy.array([0, 0, 1, 0, 1, 2], 'i')
>>> A = cupyx.scipy.sparse.coo_matrix((data, (row, col)),
... shape=(3, 3))
>>> a = A.get()
>>> A.sum_duplicates()
>>> a.sum_duplicates() # a is scipy.sparse.coo_matrix
>>> A.row
array([0, 1, 1, 2, 2, 2], dtype=int32)
>>> a.row
array([0, 1, 2, 1, 2, 2], dtype=int32)
>>> A.col
array([0, 0, 1, 0, 1, 2], dtype=int32)
>>> a.col
array([0, 0, 0, 1, 1, 2], dtype=int32)
.. warning::
Calling this function might synchronize the device.
.. seealso::
:meth:`scipy.sparse.coo_matrix.sum_duplicates`
"""
if self.has_canonical_format:
return
# Note: The sorting order below follows the cuSPARSE convention (first
# row then col, so-called row-major) and differs from that of SciPy, as
# the cuSPARSE functions such as cusparseSpMV() assume this sorting
# order.
# See https://docs.nvidia.com/cuda/cusparse/index.html#coo-format
keys = cupy.stack([self.col, self.row])
order = cupy.lexsort(keys)
src_data = self.data[order]
src_row = self.row[order]
src_col = self.col[order]
diff = self._sum_duplicates_diff(src_row, src_col, size=self.row.size)
if diff[1:].all():
# All elements have different indices.
data = src_data
row = src_row
col = src_col
else:
# TODO(leofang): move the kernels outside this method
index = cupy.cumsum(diff, dtype='i')
size = int(index[-1]) + 1
data = cupy.zeros(size, dtype=self.data.dtype)
row = cupy.empty(size, dtype='i')
col = cupy.empty(size, dtype='i')
if self.data.dtype.kind == 'f':
cupy.ElementwiseKernel(
'T src_data, int32 src_row, int32 src_col, int32 index',
'raw T data, raw int32 row, raw int32 col',
'''
atomicAdd(&data[index], src_data);
row[index] = src_row;
col[index] = src_col;
''',
'cupyx_scipy_sparse_coo_sum_duplicates_assign'
)(src_data, src_row, src_col, index, data, row, col)
elif self.data.dtype.kind == 'c':
cupy.ElementwiseKernel(
'T src_real, T src_imag, int32 src_row, int32 src_col, '
'int32 index',
'raw T real, raw T imag, raw int32 row, raw int32 col',
'''
atomicAdd(&real[index], src_real);
atomicAdd(&imag[index], src_imag);
row[index] = src_row;
col[index] = src_col;
''',
'cupyx_scipy_sparse_coo_sum_duplicates_assign_complex'
)(src_data.real, src_data.imag, src_row, src_col, index,
data.real, data.imag, row, col)
self.data = data
self.row = row
self.col = col
self.has_canonical_format = True
def toarray(self, order=None, out=None):
"""Returns a dense matrix representing the same value.
Args:
order (str): Not supported.
out: Not supported.
Returns:
cupy.ndarray: Dense array representing the same value.
.. seealso:: :meth:`scipy.sparse.coo_matrix.toarray`
"""
return self.tocsr().toarray(order=order, out=out)
def tocoo(self, copy=False):
"""Converts the matrix to COOdinate format.
Args:
copy (bool): If ``False``, it shares data arrays as much as
possible.
Returns:
cupyx.scipy.sparse.coo_matrix: Converted matrix.
"""
if copy:
return self.copy()
else:
return self
def tocsc(self, copy=False):
"""Converts the matrix to Compressed Sparse Column format.
Args:
copy (bool): If ``False``, it shares data arrays as much as
possible. Actually this option is ignored because all
arrays in a matrix cannot be shared in coo to csc conversion.
Returns:
cupyx.scipy.sparse.csc_matrix: Converted matrix.
"""
if self.nnz == 0:
return csc.csc_matrix(self.shape, dtype=self.dtype)
# copy is silently ignored (in line with SciPy) because both
# sum_duplicates and coosort change the underlying data
x = self.copy()
x.sum_duplicates()
cusparse.coosort(x, 'c')
x = cusparse.coo2csc(x)
x.has_canonical_format = True
return x
def tocsr(self, copy=False):
"""Converts the matrix to Compressed Sparse Row format.
Args:
copy (bool): If ``False``, it shares data arrays as much as
possible. Actually this option is ignored because all
arrays in a matrix cannot be shared in coo to csr conversion.
Returns:
cupyx.scipy.sparse.csr_matrix: Converted matrix.
"""
if self.nnz == 0:
return csr.csr_matrix(self.shape, dtype=self.dtype)
# copy is silently ignored (in line with SciPy) because both
# sum_duplicates and coosort change the underlying data
x = self.copy()
x.sum_duplicates()
cusparse.coosort(x, 'r')
x = cusparse.coo2csr(x)
x.has_canonical_format = True
return x
def transpose(self, axes=None, copy=False):
"""Returns a transpose matrix.
Args:
axes: This option is not supported.
copy (bool): If ``True``, a returned matrix shares no data.
Otherwise, it shared data arrays as much as possible.
Returns:
cupyx.scipy.sparse.spmatrix: Transpose matrix.
"""
if axes is not None:
raise ValueError(
'Sparse matrices do not support an \'axes\' parameter because '
'swapping dimensions is the only logical permutation.')
shape = self.shape[1], self.shape[0]
return coo_matrix(
(self.data, (self.col, self.row)), shape=shape, copy=copy)
def isspmatrix_coo(x):
"""Checks if a given matrix is of COO format.
Returns:
bool: Returns if ``x`` is :class:`cupyx.scipy.sparse.coo_matrix`.
"""
return isinstance(x, coo_matrix)