In [2]:
import numpy as np
from scipy.sparse import csr_matrix

x = [[1, 0, 0, 0, 2],
     [0, 3, 0, 4, 0],
     [0, 0, 0, 0, 0],
     [5, 0, 0, 6, 0]]
x = np.asarray(x)
csr = csr_matrix(x)

In [4]:
csr.todense()

matrix([[1, 0, 0, 0, 2],
        [0, 3, 0, 4, 0],
        [0, 0, 0, 0, 0],
        [5, 0, 0, 6, 0]], dtype=int64)

In [16]:
csr.data

array([1, 2, 3, 4, 5, 6], dtype=int64)

In [15]:
csr.indices

array([0, 4, 1, 3, 0, 3], dtype=int32)

In [13]:
csr.indptr

array([0, 2, 4, 4, 6], dtype=int32)

In [10]:
from scipy.sparse import dok_matrix

dok = dok_matrix(x)
dok.values()

dict_values([1, 2, 3, 4, 5, 6])

In [14]:
for i, (b, e) in enumerate(zip(csr.indptr, csr.indptr[1:])):
    for idx in range(b, e):
        j = csr.indices[idx]
        d = csr.data[idx]
        print('({}, {}) = {}'.format(i, j, d))

(0, 0) = 1
(0, 4) = 2
(1, 1) = 3
(1, 3) = 4
(3, 0) = 5
(3, 3) = 6


In [39]:
def csr_mat_slicing(x, row, col, row_slicing=False):
    data = x.data
    indices = x.indices
    indptr = x.indptr
    
    b_indx = indptr[row]
    e_indx = indptr[row+1]
    row_values = data[b_indx:e_indx]
    if row_slicing:
        return row_values
    col_indices = indices[b_indx:e_indx]
    
    return row_values[np.where(col_indices == col)[0]]

def csr_row_slicing(x, row):
    data = x.data
    indices = x.indices
    indptr = x.indptr
    
    b_indx = indptr[row]
    e_indx = indptr[row+1]

csr_mat_slicing(csr, 1,2)

array([], dtype=int64)

In [43]:
from sklearn.feature_extraction.text import CountVectorizer

In [47]:
example = ['hi my name is eden hazard',
          'my name is englo kante']

cv = CountVectorizer()
cv_fit = cv.fit_transform(example)

In [50]:
cv.vocabulary_

{'hi': 3,
 'my': 6,
 'name': 7,
 'is': 4,
 'eden': 0,
 'hazard': 2,
 'englo': 1,
 'kante': 5}

In [73]:
from scipy.sparse import csr_matrix

rows = [0, 0, 1, 1, 3, 3]
cols = [0, 4, 1, 3, 0, 3]
data = [1, 2, 3, 4, 5, 6]
csr = csr_matrix((data, (rows, cols)))

In [74]:
csr.toarray()

array([[1, 0, 0, 0, 2],
       [0, 3, 0, 4, 0],
       [0, 0, 0, 0, 0],
       [5, 0, 0, 6, 0]], dtype=int64)

In [54]:
a = csr_matrix((4,5))

In [55]:
a

<4x5 sparse matrix of type '<class 'numpy.float64'>'
	with 0 stored elements in Compressed Sparse Row format>

In [63]:
csr.toarray()

array([[2, 0, 0, 0, 3],
       [0, 3, 0, 4, 0],
       [0, 0, 0, 0, 0],
       [5, 0, 0, 6, 0]], dtype=int64)

In [66]:
csr.indices[csr.indptr[1]:csr.indptr[2]]

array([1, 3], dtype=int32)

In [98]:
ex = csr_matrix((4,5))
ex.indptr = csr.indptr
ex.indices = csr.indices
ex.data = csr.data
ex.toarray()

array([[1, 0, 0, 0, 2],
       [0, 3, 0, 4, 0],
       [0, 0, 0, 0, 0],
       [5, 0, 0, 6, 0]], dtype=int64)

In [100]:
csr.toarray()

array([[1, 0, 0, 0, 2],
       [0, 3, 0, 4, 0],
       [0, 0, 0, 0, 0],
       [5, 0, 0, 6, 0]], dtype=int64)

In [104]:
csr.data

array([1, 2, 3, 4, 5, 6], dtype=int64)

In [93]:
csr.indices

array([0, 4, 1, 3, 0, 3], dtype=int32)

In [94]:
csr.indptr

array([0, 2, 4, 4, 6], dtype=int32)

In [8]:
ex = csr_matrix((5,5))
indptr = np.array([0,0,5,1,1,1])
indices = np.array(range(5))
data = np.repeat(1,5)
ex.indptr = indptr
ex.indices = indices
ex.data = data

In [9]:
ex.toarray()

array([[0, 0, 0, 0, 0],
       [1, 1, 1, 1, 1],
       [0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0]])

In [25]:
def make_sparse_Nd_mat(word_nonzero_indices, V, K):
    mat = csr_matrix((V, K))
    
    indptr = [0]
    
    for i in word_nonzero_indices:
        if i in word_nonzero_indices:
            indptr.append(indptr[-1]+5)
        else:
            indptr.append(indptr[-1])
            
    begin_append = np.zeros(min(word_nonzero_indices))
    end_append = np.zeros(V-max(word_nonzero_indices))
    
    indptr = list(begin_append) + indptr + list(end_append)
    data = np.ones(max(indptr))
    indices = np.tile(range(K), len(word_nonzero_indices))
    
    mat.indptr = indptr
    mat.indices = indices
    mat.data = data
    
    return mat

In [101]:
import numpy as np
from scipy.sparse import csr_matrix

word_nonzero_indices = [3,6,10,15,19]
index_minus = [ word_nonzero_indices[i+1] - word_nonzero_indices[i] -1
                   for i in range(len(word_nonzero_indices)) if i != len(word_nonzero_indices)-1]
index_minus.append(0)

V = 20
K = 5

mat = csr_matrix((V, K))

indptr = [0]

for i,j in zip(word_nonzero_indices, index_minus):
    indptr.append(indptr[-1]+5)
    indptr += list(np.repeat(indptr[-1],j))

begin_append = np.zeros(min(word_nonzero_indices))
end_append = np.zeros(V-max(word_nonzero_indices)-1)

indptr = list(begin_append) + indptr + list(end_append)
data = np.ones(max(indptr))
indices = np.tile(range(K), len(word_nonzero_indices))

mat.indptr = np.array(indptr, dtype=np.int8)
mat.indices = indices
mat.data = data

In [103]:
a = {}
a[0] = mat

In [108]:
a[0][1,:] /= 10000

In [109]:
a[0].A

array([[0.   , 0.   , 0.   , 0.   , 0.   ],
       [0.001, 0.001, 0.001, 0.001, 0.001],
       [0.   , 0.   , 0.   , 0.   , 0.   ],
       [1.   , 1.   , 1.   , 1.   , 1.   ],
       [0.   , 0.   , 0.   , 0.   , 0.   ],
       [0.   , 0.   , 0.   , 0.   , 0.   ],
       [1.   , 1.   , 1.   , 1.   , 1.   ],
       [0.   , 0.   , 0.   , 0.   , 0.   ],
       [0.   , 0.   , 0.   , 0.   , 0.   ],
       [0.   , 0.   , 0.   , 0.   , 0.   ],
       [1.   , 1.   , 1.   , 1.   , 1.   ],
       [0.   , 0.   , 0.   , 0.   , 0.   ],
       [0.   , 0.   , 0.   , 0.   , 0.   ],
       [0.   , 0.   , 0.   , 0.   , 0.   ],
       [0.   , 0.   , 0.   , 0.   , 0.   ],
       [1.   , 1.   , 1.   , 1.   , 1.   ],
       [0.   , 0.   , 0.   , 0.   , 0.   ],
       [0.   , 0.   , 0.   , 0.   , 0.   ],
       [0.   , 0.   , 0.   , 0.   , 0.   ],
       [1.   , 1.   , 1.   , 1.   , 1.   ]])

In [93]:
mat.data[:5] = 10

In [94]:
nonzero_mask = np.array(mat[mat.nonzero()] < 3)[0]
rows = mat.nonzero()[0][nonzero_mask]
cols = mat.nonzero()[1][nonzero_mask]

In [95]:
mat[rows,cols] *= 100
mat

<20x5 sparse matrix of type '<class 'numpy.float64'>'
	with 25 stored elements in Compressed Sparse Row format>

In [99]:
mat[1,:]=100