In [1]:
import numpy as np
from scipy import sparse
import sys
import os
import time
from scipy.sparse import csc_matrix, csr_matrix, coo_matrix

In [2]:
np.random.seed(0)

## Sparse .npz (compressed) takes the least disk space regardless of density
Uncompressed sparse matrices take more disk space when the matrices are very dense

In [3]:
for i in ['CCpGeAeGaD', 'DaGpBP', 'MFpGdCcSE', 'GiGpBP']:
    mat = np.load(f'data/{i}-dense-uncompressed.npy')
    print(i, mat.shape)
    
    density = (mat != 0).sum() / np.prod(mat.shape)
    print(f'density: {density:.3%}')

    sparse_mat = sparse.csc_matrix(mat, copy=True)

    sparse.save_npz(f'data/{i}-sparse-compressed.npz', sparse_mat, compressed=True)
    sparse.save_npz(f'data/{i}-sparse-uncompressed.npz', sparse_mat, compressed=False)
    
    dense_size = os.path.getsize(f'data/{i}-dense-uncompressed.npy') / 1000000
    sparse_size = os.path.getsize(f'data/{i}-sparse-compressed.npz') / 1000000
    sparse_un_size = os.path.getsize(f'data/{i}-sparse-uncompressed.npz') / 1000000
    print(f'dense-uncompressed: {dense_size :.4g} MB\nsparse-compressed: {sparse_size :.4g} MB\nsparse-uncompressed: {sparse_un_size :.4g} MB\n')

CCpGeAeGaD (1391, 137)
density: 97.807%
dense-uncompressed: 1.525 MB
sparse-compressed: 1.347 MB
sparse-uncompressed: 2.238 MB

DaGpBP (137, 11381)
density: 21.031%
dense-uncompressed: 12.47 MB
sparse-compressed: 2.529 MB
sparse-uncompressed: 3.982 MB

MFpGdCcSE (2884, 5734)
density: 14.093%
dense-uncompressed: 132.3 MB
sparse-compressed: 17.79 MB
sparse-uncompressed: 27.99 MB

GiGpBP (20945, 11381)
density: 4.650%
dense-uncompressed: 1907 MB
sparse-compressed: 73.81 MB
sparse-uncompressed: 133.1 MB



## Memory-map reduces matrix-vector multiplication time for lower-density matrices

Putting a normal load above a memmap load decreases memmap load time. However, loading memmap first does not speed up normal load times. Note, this only appears to be the case for very dense matrices. For best comparisons, we should always load a memmap matrix first in the sequence.

In [4]:
for i in ['CCpGeAeGaD', 'DaGpBP', 'MFpGdCcSE', 'GiGpBP']:
    print(i)
    mat = np.load(f'data/{i}-dense-uncompressed.npy')
    
    density = 100 * (mat != 0).sum() / np.prod(mat.shape)
    print(f'density: {density :.3} %')

    # Create a vector to multiply
    vector_size = mat.shape[0]
    vec = np.zeros((1, vector_size))
    # 4 search nodes
    indices = np.random.randint(0, high=vector_size, size=4)
    vec[0, indices] = 1
    del mat
    
    t1 = time.time()
    matrix = np.load(f'data/{i}-dense-uncompressed.npy', mmap_mode='r')
    output = vec @ matrix
    t2 = time.time()
    time_2 = t2 - t1
    del matrix
    del output
    
    t1 = time.time()
    matrix = np.load(f'data/{i}-dense-uncompressed.npy', mmap_mode=None)
    output = vec @ matrix
    t2 = time.time()
    time_1 = t2 - t1
    del matrix
    del output
    
    # Second memory-map load time for comparison
    t1 = time.time()
    matrix = np.load(f'data/{i}-dense-uncompressed.npy', mmap_mode='r')
    output = vec @ matrix
    t2 = time.time()
    time_3 = t2 - t1
    del matrix
    del output
      
    print(f'Normal: {1000 * time_1 :.3g} ms\nMMAP: {1000 * time_2 :.3g} ms\nMMAP2: {1000 * time_3 :.3g} ms\n')

CCpGeAeGaD
density: 97.8 %
Normal: 0.395 ms
MMAP: 0.706 ms
MMAP2: 0.267 ms

DaGpBP
density: 21.0 %
Normal: 2.2 ms
MMAP: 0.833 ms
MMAP2: 0.869 ms

MFpGdCcSE
density: 14.1 %
Normal: 38.7 ms
MMAP: 5.19 ms
MMAP2: 21.7 ms

GiGpBP
density: 4.65 %
Normal: 538 ms
MMAP: 69.2 ms
MMAP2: 70.1 ms



## Sparse (uncompressed/compressed) vs Dense

Uncompressed sparse matrices load and multiply about as quickly as dense matrices. The order of matrix reads does not change for sparse reads from npz files. Compressed sparse matrices take an order of magnitude more time to load and multiply.

In [5]:
for i in ['CCpGeAeGaD', 'DaGpBP', 'MFpGdCcSE', 'GiGpBP']:
    print(i)
    mat = np.load(f'data/{i}-dense-uncompressed.npy')
    
    density = 100 * (mat != 0).sum() / np.prod(mat.shape)
    print(f'density: {density :.3} %')

    # Create a vector to multiply
    vector_size = mat.shape[0]
    vec = np.zeros((1, vector_size))
    # 4 search nodes
    indices = np.random.randint(0, high=vector_size, size=4)
    vec[0, indices] = 1
    del mat
    
    t1 = time.time()
    matrix = np.load(f'data/{i}-dense-uncompressed.npy')
    output = vec @ matrix
    t2 = time.time()
    time_1 = t2 - t1
    del matrix
    del output
    
    t1 = time.time()
    matrix = sparse.load_npz(f'data/{i}-sparse-compressed.npz')
    output = vec @ matrix
    t2 = time.time()
    time_2 = t2 - t1
    del matrix
    del output
    
    t1 = time.time()
    matrix = sparse.load_npz(f'data/{i}-sparse-uncompressed.npz')
    output = vec @ matrix
    t2 = time.time()
    time_3 = t2 - t1
    del matrix
    del output
    
    print(f'Dense No Memmap: {1000 * time_1 :.4g} ms\nCompressed Sparse: {1000 * time_2 :.4g} ms\nUncompressed Sparse: {1000 * time_3 :.4g} ms\n')

CCpGeAeGaD
density: 97.8 %
Dense No Memmap: 1.171 ms
Compressed Sparse: 9.154 ms
Uncompressed Sparse: 3.733 ms

DaGpBP
density: 21.0 %
Dense No Memmap: 2.751 ms
Compressed Sparse: 15.2 ms
Uncompressed Sparse: 4.849 ms

MFpGdCcSE
density: 14.1 %
Dense No Memmap: 38.17 ms
Compressed Sparse: 105.5 ms
Uncompressed Sparse: 25.24 ms

GiGpBP
density: 4.65 %
Dense No Memmap: 537.7 ms
Compressed Sparse: 513.9 ms
Uncompressed Sparse: 143.5 ms



## Subset matrix memmap based on nonzero rows of the search vector

In [6]:
for i in ['CCpGeAeGaD', 'DaGpBP', 'MFpGdCcSE', 'GiGpBP']:
    print(i)
    mat = np.load(f'data/{i}-dense-uncompressed.npy')
    print(mat.shape)
    
    # Create a vector to multiply
    vector_size = mat.shape[0]
    vec4 = np.zeros((1, vector_size)).flatten()
    vec10 = np.zeros((1, vector_size)).flatten()
    
    # 4 and 10 search nodes
    indices4 = np.random.randint(0, high=vector_size, size=4)
    indices10 = np.random.randint(0, high=vector_size, size=10)
    vec4[indices4] = 1
    vec10[indices10] = 1
    del mat
    print_str = ''
    
    # Sparse vectors
    csc_vec4 = csc_matrix(vec4)
    csc_vec10 = csc_matrix(vec10)
    csr_vec4 = csr_matrix(vec4)
    csr_vec10 = csr_matrix(vec10)
    coo_vec4 = coo_matrix(vec4)
    coo_vec10 = coo_matrix(vec10)
    
    # No memmap
    t1 = time.time()
    matrix = np.load(f'data/{i}-dense-uncompressed.npy', mmap_mode=None)
    output = vec4 @ matrix
    t2 = time.time()
    time_1 = t2 - t1
    del matrix
    del output
    print_str += f'no memmap4: {1000 * time_1 :.4g} ms\n'
    
    t1 = time.time()
    matrix = np.load(f'data/{i}-dense-uncompressed.npy', mmap_mode=None)
    output = vec10 @ matrix
    t2 = time.time()
    time_2 = t2 - t1
    del matrix
    del output
    print_str += f'no memmap10: {1000 * time_2 :.4g} ms\n'
        
    # Simple memmap
    t1 = time.time()
    matrix = np.load(f'data/{i}-dense-uncompressed.npy', mmap_mode='r')
    output = vec4 @ matrix
    t2 = time.time()
    time_3 = t2 - t1
    del matrix
    del output
    print_str += f'memmap4: {time_3 / time_1 :.3} times no-memmap\n'
    
    t1 = time.time()
    matrix = np.load(f'data/{i}-dense-uncompressed.npy', mmap_mode='r')
    output = vec10 @ matrix
    t2 = time.time()
    time_4 = t2 - t1
    del matrix
    del output
    print_str += f'memmap10: {time_4 / time_2 :.3} times no-memmap\n'
          
    # create new vector of ones
    t1 = time.time()
    matrix = np.load(f'data/{i}-dense-uncompressed.npy', mmap_mode='r')
    output = np.ones(4) @ matrix[indices4]
    t2 = time.time()
    time_5 = t2 - t1
    del matrix
    del output
    print_str += f'subset 4: {time_5 / time_1 :.3} times no-memmap\n'
    
    t1 = time.time()
    matrix = np.load(f'data/{i}-dense-uncompressed.npy', mmap_mode='r')
    output = np.ones(10) @ matrix[indices10]
    t2 = time.time()
    time_6 = t2 - t1
    del matrix
    del output
    print_str += f'subset 10: {time_6 / time_2 :.3} times no-memmap\n'
    
    # multiply by sparse vector
    # CSC matrix
    t1 = time.time()
    matrix = np.load(f'data/{i}-dense-uncompressed.npy', mmap_mode='r')
    output = csc_vec4 @ matrix
    t2 = time.time()
    time_7 = t2 - t1
    del matrix
    del output
    print_str += f'sparse-csc 4: {time_7 / time_1 :.3} times no-memmap\n'
    
    t1 = time.time()
    matrix = np.load(f'data/{i}-dense-uncompressed.npy', mmap_mode='r')
    output = csc_vec4 @ matrix
    t2 = time.time()
    time_8 = t2 - t1
    del matrix
    del output
    print_str += f'sparse-csc 10: {time_8 / time_2 :.3} times no-memmap\n'
    
    # CSR matrix
    t1 = time.time()
    matrix = np.load(f'data/{i}-dense-uncompressed.npy', mmap_mode='r')
    output = csr_vec4 @ matrix
    t2 = time.time()
    time_9 = t2 - t1
    del matrix
    del output
    print_str += f'sparse-csr 4: {time_9 / time_1 :.3} times no-memmap\n'
    
    t1 = time.time()
    matrix = np.load(f'data/{i}-dense-uncompressed.npy', mmap_mode='r')
    output = csr_vec4 @ matrix
    t2 = time.time()
    time_10 = t2 - t1
    del matrix
    del output
    print_str += f'sparse-csr 10: {time_10 / time_2 :.3} times no-memmap\n'
        
    # COO matrix
    t1 = time.time()
    matrix = np.load(f'data/{i}-dense-uncompressed.npy', mmap_mode='r')
    output = coo_vec4 @ matrix
    t2 = time.time()
    time_11 = t2 - t1
    del matrix
    del output
    print_str += f'sparse-coo 4: {time_11 / time_1 :.3} times no-memmap\n'
    
    t1 = time.time()
    matrix = np.load(f'data/{i}-dense-uncompressed.npy', mmap_mode='r')
    output = coo_vec4 @ matrix
    t2 = time.time()
    time_12 = t2 - t1
    del matrix
    del output
    print_str += f'sparse-coo 10: {time_12 / time_2 :.3} times no-memmap\n'
    
    print(print_str)

CCpGeAeGaD
(1391, 137)
no memmap4: 0.4876 ms
no memmap10: 0.3936 ms
memmap4: 0.617 times no-memmap
memmap10: 0.693 times no-memmap
subset 4: 0.603 times no-memmap
subset 10: 0.742 times no-memmap
sparse-csc 4: 1.14 times no-memmap
sparse-csc 10: 1.14 times no-memmap
sparse-csr 4: 0.829 times no-memmap
sparse-csr 10: 0.996 times no-memmap
sparse-coo 4: 1.05 times no-memmap
sparse-coo 10: 1.27 times no-memmap

DaGpBP
(137, 11381)
no memmap4: 3.015 ms
no memmap10: 2.264 ms
memmap4: 0.317 times no-memmap
memmap10: 0.31 times no-memmap
subset 4: 0.519 times no-memmap
subset 10: 0.698 times no-memmap
sparse-csc 4: 1.73 times no-memmap
sparse-csc 10: 2.0 times no-memmap
sparse-csr 4: 1.78 times no-memmap
sparse-csr 10: 3.04 times no-memmap
sparse-coo 4: 6.88 times no-memmap
sparse-coo 10: 9.84 times no-memmap

MFpGdCcSE
(2884, 5734)
no memmap4: 38.59 ms
no memmap10: 51.75 ms
memmap4: 0.272 times no-memmap
memmap10: 0.265 times no-memmap
subset 4: 0.0846 times no-memmap
subset 10: 0.0817 times

## Ensure that different forms give the same results

In [7]:
mat = np.array([[1,2,3],[4,5,6],[7,8,9]])
np.save('test_mat.npy', mat)
del mat

In [8]:
mat = np.load('test_mat.npy', mmap_mode='r')

In [9]:
vec = np.array([[1,1,0]], dtype=np.bool_)

vec @ mat

array([[5, 7, 9]])

In [10]:
csc_vec = csc_matrix(vec)

csc_vec @ mat

array([[5, 7, 9]], dtype=int64)

In [11]:
csr_vec = csr_matrix(vec)

csr_vec @ mat

array([[5, 7, 9]], dtype=int64)

In [12]:
coo_vec = coo_matrix(vec)

coo_vec @ mat

array([[5, 7, 9]], dtype=int64)

In [13]:
sparse_vec = coo_matrix(([1,1], ([0, 0], [0,1])), shape=(1,3), dtype=np.bool_)

In [14]:
sparse_vec.toarray()

array([[ True,  True, False]])

In [15]:
sparse_vec @ mat

array([[5, 7, 9]], dtype=int64)

In [16]:
vector_size = mat.shape[0]
vec4 = np.zeros((1, vector_size)).flatten()
vec10 = np.zeros((1, vector_size)).flatten()

# 4 and 10 search nodes
indices4 = np.random.randint(0, high=vector_size, size=1)
indices10 = np.random.randint(0, high=vector_size, size=2)
vec4[indices4] = 1
vec10[indices10] = 1

In [17]:
vec10

array([0., 1., 0.])

In [18]:
csr_matrix(vec10, dtype=np.bool_) @ mat

array([[4, 5, 6]], dtype=int64)

### TODO: 
* Test different dtypes for both mat and vec
* Compare sparse vector multiplication to dense, etc.
* Compare different types of sparse vectors