## A test to convert numpy.ndarray to a sparse matrix and back again

In [189]:
import numpy as np
from scipy import sparse

### 1. With a completely dense ndarray

In [190]:
x = np.arange(25).reshape(5, 5)
x.astype(np.float64)
print(x)
print(type(x))

[[ 0  1  2  3  4]
 [ 5  6  7  8  9]
 [10 11 12 13 14]
 [15 16 17 18 19]
 [20 21 22 23 24]]
<class 'numpy.ndarray'>


In [191]:
a = sparse.csc_matrix(x)
a

<5x5 sparse matrix of type '<class 'numpy.int64'>'
	with 24 stored elements in Compressed Sparse Column format>

In [192]:
print(a)

  (1, 0)	5
  (2, 0)	10
  (3, 0)	15
  (4, 0)	20
  (0, 1)	1
  (1, 1)	6
  (2, 1)	11
  (3, 1)	16
  (4, 1)	21
  (0, 2)	2
  (1, 2)	7
  (2, 2)	12
  (3, 2)	17
  (4, 2)	22
  (0, 3)	3
  (1, 3)	8
  (2, 3)	13
  (3, 3)	18
  (4, 3)	23
  (0, 4)	4
  (1, 4)	9
  (2, 4)	14
  (3, 4)	19
  (4, 4)	24


In [193]:
y = a.toarray()
print(y)
print(type(y))

[[ 0  1  2  3  4]
 [ 5  6  7  8  9]
 [10 11 12 13 14]
 [15 16 17 18 19]
 [20 21 22 23 24]]
<class 'numpy.ndarray'>


In [194]:
x == y

array([[ True,  True,  True,  True,  True],
       [ True,  True,  True,  True,  True],
       [ True,  True,  True,  True,  True],
       [ True,  True,  True,  True,  True],
       [ True,  True,  True,  True,  True]])

### 2. With a zero-padded ndarray

In [215]:
i = x
i[:, 0] = 0
i[:, -1] = 0
i[0, :] = 0
i[-1, :] = 0
print(type(i))
i

<class 'numpy.ndarray'>


array([[ 0,  0,  0,  0,  0],
       [ 0,  6,  7,  8,  0],
       [ 0, 11, 12, 13,  0],
       [ 0, 16, 17, 18,  0],
       [ 0,  0,  0,  0,  0]])

In [216]:
b = sparse.csc_matrix(i)
b

<5x5 sparse matrix of type '<class 'numpy.int64'>'
	with 9 stored elements in Compressed Sparse Column format>

In [217]:
print(b)

  (1, 1)	6
  (2, 1)	11
  (3, 1)	16
  (1, 2)	7
  (2, 2)	12
  (3, 2)	17
  (1, 3)	8
  (2, 3)	13
  (3, 3)	18


In [218]:
j = b.toarray()
print(type(j))
j

<class 'numpy.ndarray'>


array([[ 0,  0,  0,  0,  0],
       [ 0,  6,  7,  8,  0],
       [ 0, 11, 12, 13,  0],
       [ 0, 16, 17, 18,  0],
       [ 0,  0,  0,  0,  0]])

In [219]:
i == j

array([[ True,  True,  True,  True,  True],
       [ True,  True,  True,  True,  True],
       [ True,  True,  True,  True,  True],
       [ True,  True,  True,  True,  True],
       [ True,  True,  True,  True,  True]])

### 3. With a nan-padded ndarray

In [244]:
k = x.astype(np.float64)
k[:, 0] = np.nan
k[:, -1] = np.nan
k[0, :] = np.nan
k[-1, :] = np.nan
print(type(k))
k

<class 'numpy.ndarray'>


array([[nan, nan, nan, nan, nan],
       [nan,  6.,  7.,  8., nan],
       [nan, 11., 12., 13., nan],
       [nan, 16., 17., 18., nan],
       [nan, nan, nan, nan, nan]])

In [245]:
k0 = k.copy()
k0[np.isnan(k0)] = 0
k0

array([[ 0.,  0.,  0.,  0.,  0.],
       [ 0.,  6.,  7.,  8.,  0.],
       [ 0., 11., 12., 13.,  0.],
       [ 0., 16., 17., 18.,  0.],
       [ 0.,  0.,  0.,  0.,  0.]])

In [246]:
c = sparse.csc_matrix(k0)
c

<5x5 sparse matrix of type '<class 'numpy.float64'>'
	with 9 stored elements in Compressed Sparse Column format>

In [247]:
print(c)

  (1, 1)	6.0
  (2, 1)	11.0
  (3, 1)	16.0
  (1, 2)	7.0
  (2, 2)	12.0
  (3, 2)	17.0
  (1, 3)	8.0
  (2, 3)	13.0
  (3, 3)	18.0


In [248]:
l = b.toarray().astype(np.float64)
print(type(l))
l

<class 'numpy.ndarray'>


array([[ 0.,  0.,  0.,  0.,  0.],
       [ 0.,  6.,  7.,  8.,  0.],
       [ 0., 11., 12., 13.,  0.],
       [ 0., 16., 17., 18.,  0.],
       [ 0.,  0.,  0.,  0.,  0.]])

In [249]:
l0 = l.copy()
l0[l0 == 0] = np.nan
l0

array([[nan, nan, nan, nan, nan],
       [nan,  6.,  7.,  8., nan],
       [nan, 11., 12., 13., nan],
       [nan, 16., 17., 18., nan],
       [nan, nan, nan, nan, nan]])

In [261]:
print(k,  end='\n\n')
print(l0)
k == l0 # has False padding because np.nan DOES NOT EQUAL np.nan, see above...

[[nan nan nan nan nan]
 [nan  6.  7.  8. nan]
 [nan 11. 12. 13. nan]
 [nan 16. 17. 18. nan]
 [nan nan nan nan nan]]

[[nan nan nan nan nan]
 [nan  6.  7.  8. nan]
 [nan 11. 12. 13. nan]
 [nan 16. 17. 18. nan]
 [nan nan nan nan nan]]


array([[False, False, False, False, False],
       [False,  True,  True,  True, False],
       [False,  True,  True,  True, False],
       [False,  True,  True,  True, False],
       [False, False, False, False, False]])

#### Learn from below

In [207]:
c = sparse.csc_matrix(k)
c

<5x5 sparse matrix of type '<class 'numpy.float64'>'
	with 25 stored elements in Compressed Sparse Column format>

In [208]:
print(c)

  (0, 0)	nan
  (1, 0)	nan
  (2, 0)	nan
  (3, 0)	nan
  (4, 0)	nan
  (0, 1)	nan
  (1, 1)	6.0
  (2, 1)	11.0
  (3, 1)	16.0
  (4, 1)	nan
  (0, 2)	nan
  (1, 2)	7.0
  (2, 2)	12.0
  (3, 2)	17.0
  (4, 2)	nan
  (0, 3)	nan
  (1, 3)	8.0
  (2, 3)	13.0
  (3, 3)	18.0
  (4, 3)	nan
  (0, 4)	nan
  (1, 4)	nan
  (2, 4)	nan
  (3, 4)	nan
  (4, 4)	nan


##### A little aside because I want the nan's gone...not still in the CSC matrix!

###### Doesn't create a view

In [209]:
k2 = k[~np.isnan(k)]
print(k2)
print(type(k2))
print(k2.base)
sparse.csc_matrix(k2)
print(sparse.csc_matrix(k2))

[ 6.  7.  8. 11. 12. 13. 16. 17. 18.]
<class 'numpy.ndarray'>
None
  (0, 0)	6.0
  (0, 1)	7.0
  (0, 2)	8.0
  (0, 3)	11.0
  (0, 4)	12.0
  (0, 5)	13.0
  (0, 6)	16.0
  (0, 7)	17.0
  (0, 8)	18.0


In [210]:
k3 = k[k < 1000000000].view()
print(k3)
k3
print(k3.base)
sparse.csc_matrix(k3)
print(sparse.csc_matrix(k3))

[ 6.  7.  8. 11. 12. 13. 16. 17. 18.]
[ 6.  7.  8. 11. 12. 13. 16. 17. 18.]
  (0, 0)	6.0
  (0, 1)	7.0
  (0, 2)	8.0
  (0, 3)	11.0
  (0, 4)	12.0
  (0, 5)	13.0
  (0, 6)	16.0
  (0, 7)	17.0
  (0, 8)	18.0


In [211]:
k4 = k2[k2 > 7].view()
print(k4)
print(k4.base)
sparse.csc_matrix(k4)
print(sparse.csc_matrix(k4))

[ 8. 11. 12. 13. 16. 17. 18.]
[ 8. 11. 12. 13. 16. 17. 18.]
  (0, 0)	8.0
  (0, 1)	11.0
  (0, 2)	12.0
  (0, 3)	13.0
  (0, 4)	16.0
  (0, 5)	17.0
  (0, 6)	18.0


In [212]:
k4 = np.ma.masked_invalid(k, copy=False)
print(k4)
print(k4.base)
sparse.csc_matrix(k4)
print(sparse.csc_matrix(k4))

[[-- -- -- -- --]
 [-- 6.0 7.0 8.0 --]
 [-- 11.0 12.0 13.0 --]
 [-- 16.0 17.0 18.0 --]
 [-- -- -- -- --]]
[[nan nan nan nan nan]
 [nan  6.  7.  8. nan]
 [nan 11. 12. 13. nan]
 [nan 16. 17. 18. nan]
 [nan nan nan nan nan]]
  (0, 0)	nan
  (1, 0)	nan
  (2, 0)	nan
  (3, 0)	nan
  (4, 0)	nan
  (0, 1)	nan
  (1, 1)	6.0
  (2, 1)	11.0
  (3, 1)	16.0
  (4, 1)	nan
  (0, 2)	nan
  (1, 2)	7.0
  (2, 2)	12.0
  (3, 2)	17.0
  (4, 2)	nan
  (0, 3)	nan
  (1, 3)	8.0
  (2, 3)	13.0
  (3, 3)	18.0
  (4, 3)	nan
  (0, 4)	nan
  (1, 4)	nan
  (2, 4)	nan
  (3, 4)	nan
  (4, 4)	nan


#### Done with my aside...but disappointed!

In [213]:
k = c.toarray()
print(type(k))
k

<class 'numpy.ndarray'>


array([[nan, nan, nan, nan, nan],
       [nan,  6.,  7.,  8., nan],
       [nan, 11., 12., 13., nan],
       [nan, 16., 17., 18., nan],
       [nan, nan, nan, nan, nan]])