# Sparse Matrix and its representations

For more details refer to my article: https://limitlessdatascience.wordpress.com/2020/11/26/sparse-matrix-in-machine-learning/

### Simple Example: Convert Dense matrix to Sparse matrix and then again back to Dense matrix

In [1]:
import sys                 # Return the size of an object in bytes
import numpy as np

In [2]:
from scipy.sparse import csr_matrix, csc_matrix
# csr_matrix: used to create compressed sparse row matrix from Matrix
# csc_matrix: used to create compressed sparse column matrix from Matrix

In [3]:
# create a 2-D representation of the matrix
A = np.array([[1, 0, 0, 0, 0, 0], [0, 0, 2, 0, 0, 1],\
 [0, 0, 0, 2, 0, 0]])
print("Dense matrix representation: \n", A)
print("Memory utilised (bytes): ", sys.getsizeof(A))
print("Type of the object", type(A))

Dense matrix representation: 
 [[1 0 0 0 0 0]
 [0 0 2 0 0 1]
 [0 0 0 2 0 0]]
Memory utilised (bytes):  184
Type of the object <class 'numpy.ndarray'>


In [4]:
# convert to Compressed sparse row matrix representation 
S = csr_matrix(A)
print("Sparse 'row' matrix: \n",S)
print("Memory utilised (bytes): ", sys.getsizeof(S))
print("Type of the object", type(S))

Sparse 'row' matrix: 
   (0, 0)	1
  (1, 2)	2
  (1, 5)	1
  (2, 3)	2
Memory utilised (bytes):  56
Type of the object <class 'scipy.sparse.csr.csr_matrix'>


In [5]:
# convert to Compressed sparse column matrix representation 
S = csc_matrix(A)
print("Sparse 'column' matrix: \n",S)
print("Memory utilised (bytes): ", sys.getsizeof(S))
print("Type of the object", type(S))

Sparse 'column' matrix: 
   (0, 0)	1
  (1, 2)	2
  (2, 3)	2
  (1, 5)	1
Memory utilised (bytes):  56
Type of the object <class 'scipy.sparse.csc.csc_matrix'>


# Convert back Sparse matrix to 2-D representation of the matrix

In [6]:
B = S.todense()
print("Dense matrix: \n", B)
print("Memory utilised (bytes): ", sys.getsizeof(B))
print("Type of the object", type(B))

Dense matrix: 
 [[1 0 0 0 0 0]
 [0 0 2 0 0 1]
 [0 0 0 2 0 0]]
Memory utilised (bytes):  136
Type of the object <class 'numpy.matrix'>


#### Note the reason for 184 Vs 136 is because of numpy.ndarray and numpy.matrix

In [7]:
B = S.toarray()
print("Dense matrix: \n", B)
print("Memory utilised (bytes): ", sys.getsizeof(B))
print("Type of the object", type(B))

Dense matrix: 
 [[1 0 0 0 0 0]
 [0 0 2 0 0 1]
 [0 0 0 2 0 0]]
Memory utilised (bytes):  184
Type of the object <class 'numpy.ndarray'>


# Example 2

In [8]:
X = np.random.uniform(size=(6, 6))
print(X)
print("Memory utilised (bytes): ", sys.getsizeof(X))

[[0.45948034 0.77151356 0.97124281 0.49015045 0.54030134 0.47465488]
 [0.10439909 0.93797568 0.27714433 0.54126204 0.64281374 0.22220006]
 [0.43202329 0.05291794 0.71129842 0.40946858 0.55398491 0.93432567]
 [0.95533057 0.76651525 0.02970801 0.49983595 0.49378451 0.24335626]
 [0.27167082 0.21165409 0.41305648 0.18596697 0.17038109 0.26334191]
 [0.98262283 0.40976662 0.72299496 0.63035364 0.59889638 0.78087513]]
Memory utilised (bytes):  400


In [9]:
#Then we need to zero out a majority of the matrix elements, making it sparse.
X[X < 0.7] = 0
print(X)
print("Memory utilised (bytes): ", sys.getsizeof(X))

[[0.         0.77151356 0.97124281 0.         0.         0.        ]
 [0.         0.93797568 0.         0.         0.         0.        ]
 [0.         0.         0.71129842 0.         0.         0.93432567]
 [0.95533057 0.76651525 0.         0.         0.         0.        ]
 [0.         0.         0.         0.         0.         0.        ]
 [0.98262283 0.         0.72299496 0.         0.         0.78087513]]
Memory utilised (bytes):  400


In [10]:
X_csr = csr_matrix(X)
print(X_csr)
print("Memory utilised (bytes): ", sys.getsizeof(X_csr))

  (0, 1)	0.7715135624055367
  (0, 2)	0.9712428096352037
  (1, 1)	0.9379756815413223
  (2, 2)	0.7112984215174241
  (2, 5)	0.9343256743390597
  (3, 0)	0.9553305681966052
  (3, 1)	0.7665152460404666
  (5, 0)	0.9826228333873722
  (5, 2)	0.722994961401296
  (5, 5)	0.7808751293630022
Memory utilised (bytes):  56


In [11]:
X_csc = csc_matrix(X)
print(X_csc)
print("Memory utilised (bytes): ", sys.getsizeof(X_csc))

  (3, 0)	0.9553305681966052
  (5, 0)	0.9826228333873722
  (0, 1)	0.7715135624055367
  (1, 1)	0.9379756815413223
  (3, 1)	0.7665152460404666
  (0, 2)	0.9712428096352037
  (2, 2)	0.7112984215174241
  (5, 2)	0.722994961401296
  (2, 5)	0.9343256743390597
  (5, 5)	0.7808751293630022
Memory utilised (bytes):  56


# To Identify whether Matrix is Sparse matrix 

In [12]:
from scipy.sparse import issparse, isspmatrix

In [32]:
isspmatrix(X_csc)

True

In [41]:
issparse(X_csc)

True