In [1]:
import numpy as np
import random

### Framework:
- $A$ sparse matrix of size $n \times m$ with each row having at most $L$ nonzero entries
- We assume that the entries in A have been scaled to [-1,1]

#### First tests: find limiting $(n,m)$ such that the computation of $A^TA$ is no longer feasible

In [2]:
def sparse_generator(n,m,L=1):
    """
    Function that generates sparse matrix with n rows and m columns, each row having at most L nonzero entries
    input:
    n: number of rows
    m: number of columns
    L: number of nonzero entries per row
    output:
    random sparse matrix A
    """
    A = np.zeros((n,m))
    for row in range(n): 
        new_value = [2*random.random()-1 for j in range(L)] # random numbers in [-1,1]
        index = random.sample(range(m),L) # indexes of the nonzero values
        for (v,i) in zip(new_value, index):
            A[row,i] = v 
    return A

In [3]:
def naive_mult_np(A): # function that depends on numpy (because of dot product)
    """
    Function that computes sparse matrix multiplication
    input: 
    A: sparse matrix
    output:
    A^T A: sparse matrix product
    """
    return A.T@A

In [4]:
def naive_mult(A): # function that doesnt depend on numpy
    """
    Function that computes sparse matrix multiplication
    input: 
    A: sparse matrix
    output:
    A^T A: sparse matrix product
    """
    (n,m) = A.shape
    AProd = sum( [A[i][0]*A[i] for i in range(len(A))] )
    for r in range(1,m):
        AProd = np.vstack((AProd,sum( [A[i][r]*A[i] for i in range(len(A))] ) ))
    return AProd

In [5]:
A = sparse_generator(10,3,1)

In [6]:
naive_mult(A)

array([[2.10128915, 0.        , 0.        ],
       [0.        , 0.07994209, 0.        ],
       [0.        , 0.        , 0.55157068]])

In [7]:
naive_mult_np(A)

array([[2.10128915, 0.        , 0.        ],
       [0.        , 0.07994209, 0.        ],
       [0.        , 0.        , 0.55157068]])

In [8]:
m = 10**13
n = 10**4

In [14]:
# n = 10**2: can compute until m = 10**7
A = sparse_generator(10**2,10**8,L=1)

MemoryError: Unable to allocate 74.5 GiB for an array with shape (100, 100000000) and data type float64

In [26]:
# n = 10**2: can multiply until m = 10**4
A = sparse_generator(10**2,10**4,L=1)

naive_mult_np(A)

array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]])

In [17]:
# n = 10**3: can compute until m = 10**6
A = sparse_generator(10**3,10**7,L=1)

MemoryError: Unable to allocate 74.5 GiB for an array with shape (1000, 10000000) and data type float64

In [24]:
# n = 10**3: can multiply until m = 10**4
A = sparse_generator(10**3,10**4,L=1)

naive_mult_np(A)

array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]])