/
dia.py
182 lines (137 loc) · 5.66 KB
/
dia.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
try:
import scipy.sparse
_scipy_available = True
except ImportError:
_scipy_available = False
import cupy
from cupy import core
from cupy.sparse import csc
from cupy.sparse import data
class dia_matrix(data._data_matrix):
"""Sparse matrix with DIAgonal storage.
Now it has only one initializer format below:
``dia_matrix((data, offsets))``
Args:
arg1: Arguments for the initializer.
shape (tuple): Shape of a matrix. Its length must be two.
dtype: Data type. It must be an argument of :class:`numpy.dtype`.
copy (bool): If ``True``, copies of given arrays are always used.
.. seealso::
:class:`scipy.sparse.dia_matrix`
"""
format = 'dia'
def __init__(self, arg1, shape=None, dtype=None, copy=False):
if isinstance(arg1, tuple):
data, offsets = arg1
if shape is None:
raise ValueError('expected a shape argument')
else:
raise ValueError(
'unrecognized form for dia_matrix constructor')
data = cupy.array(data, dtype=dtype, copy=copy)
data = cupy.atleast_2d(data)
offsets = cupy.array(offsets, dtype='i', copy=copy)
offsets = cupy.atleast_1d(offsets)
if offsets.ndim != 1:
raise ValueError('offsets array must have rank 1')
if data.ndim != 2:
raise ValueError('data array must have rank 2')
if data.shape[0] != len(offsets):
raise ValueError(
'number of diagonals (%d) does not match the number of '
'offsets (%d)'
% (data.shape[0], len(offsets)))
sorted_offsets = cupy.sort(offsets)
if (sorted_offsets[:-1] == sorted_offsets[1:]).any():
raise ValueError('offset array contains duplicate values')
self.data = data
self.offsets = offsets
self._shape = shape
def _with_data(self, data):
return dia_matrix((data, self.offsets), shape=self.shape)
def get(self, stream=None):
"""Returns a copy of the array on host memory.
Args:
stream (cupy.cuda.Stream): CUDA stream object. If it is given, the
copy runs asynchronously. Otherwise, the copy is synchronous.
Returns:
scipy.sparse.dia_matrix: Copy of the array on host memory.
"""
if not _scipy_available:
raise RuntimeError('scipy is not available')
data = self.data.get(stream)
offsets = self.offsets.get(stream)
return scipy.sparse.dia_matrix((data, offsets), shape=self._shape)
def get_shape(self):
"""Returns the shape of the matrix.
Returns:
tuple: Shape of the matrix.
"""
return self._shape
def getnnz(self, axis=None):
"""Returns the number of stored values, including explicit zeros.
Args:
axis: Not supported yet.
Returns:
int: The number of stored values.
"""
if axis is not None:
raise NotImplementedError(
'getnnz over an axis is not implemented for DIA format')
m, n = self.shape
nnz = core.ReductionKernel(
'int32 offsets, int32 m, int32 n', 'int32 nnz',
'offsets > 0 ? min(m, n - offsets) : min(m + offsets, n)',
'a + b', 'nnz = a', '0', 'dia_nnz')(self.offsets, m, n)
return int(nnz)
def toarray(self, order=None, out=None):
"""Returns a dense matrix representing the same value."""
return self.tocsc().toarray(order=order, out=out)
def tocsc(self, copy=False):
"""Converts the matrix to Compressed Sparse Column format.
Args:
copy (bool): If ``False``, it shares data arrays as much as
possible. Actually this option is ignored because all
arrays in a matrix cannot be shared in dia to csc conversion.
Returns:
cupy.sparse.csc_matrix: Converted matrix.
"""
if self.data.size == 0:
return csc.csc_matrix(self.shape, dtype=self.dtype)
num_rows, num_cols = self.shape
num_offsets, offset_len = self.data.shape
row, mask = core.ElementwiseKernel(
'int32 offset_len, int32 offsets, int32 num_rows, '
'int32 num_cols, T data',
'int32 row, bool mask',
'''
int offset_inds = i % offset_len;
row = offset_inds - offsets;
mask = (row >= 0 && row < num_rows && offset_inds < num_cols
&& data != 0);
''',
'dia_tocsc')(offset_len, self.offsets[:, None], num_rows,
num_cols, self.data)
indptr = cupy.zeros(num_cols + 1, dtype='i')
indptr[1: offset_len + 1] = cupy.cumsum(mask.sum(axis=0))
indptr[offset_len + 1:] = indptr[offset_len]
indices = row.T[mask.T].astype('i', copy=False)
data = self.data.T[mask.T]
return csc.csc_matrix(
(data, indices, indptr), shape=self.shape, dtype=self.dtype)
def tocsr(self, copy=False):
"""Converts the matrix to Compressed Sparse Row format.
Args:
copy (bool): If ``False``, it shares data arrays as much as
possible. Actually this option is ignored because all
arrays in a matrix cannot be shared in dia to csr conversion.
Returns:
cupy.sparse.csc_matrix: Converted matrix.
"""
return self.tocsc().tocsr()
def isspmatrix_dia(x):
"""Checks if a given matrix is of DIA format.
Returns:
bool: Returns if ``x`` is :class:`cupy.sparse.dia_matrix`.
"""
return isinstance(x, dia_matrix)