In [1]:
from scipy import sparse
import numpy as np
import numba
from scipy.sparse import linalg as sla

In [79]:
data = np.load('Ab.npz')
A = data['A']
b = data['b']

A_sparse = sparse.csc_matrix(A)
b_sparse = sparse.csc_matrix(b)
inv_A_sparse = sparse.linalg.inv(A_sparse)
inv_A = inv_A_sparse.toarray()
lu = sla.splu(A_sparse)

In [3]:
%timeit sparse.linalg.inv(A_sparse)

1 loop, best of 3: 7.08 s per loop


In [4]:
%timeit sla.splu(A_sparse)

100 loops, best of 3: 3.99 ms per loop


## Fast inverse

In [5]:
def inv_splu(A_sparse):
    N = A_sparse.shape[0]
    lu = sla.splu(A_sparse)
    return lu.solve(np.eye(N,dtype=np.complex128))

In [6]:
%timeit inv_splu(A_sparse)

1 loop, best of 3: 1.21 s per loop


In [7]:
def solve_sparse(A,b):   
    return sla.spsolve(A,b)

In [8]:
%timeit solve_sparse(A_sparse,b_sparse)

100 loops, best of 3: 6.14 ms per loop


## Using sparse LU

In [9]:
def solve_splu(lu,b):
    return lu.solve(b)

In [10]:
%timeit solve_splu(lu,b)

1000 loops, best of 3: 1.4 ms per loop


## Using Inverse

In [11]:
@numba.jit(nopython=True, cache=True)
def solve_inv(inv_A,b):
    return inv_A @ b

In [12]:
%timeit solve_inv(inv_A,b)

The slowest run took 14.31 times longer than the fastest. This could mean that an intermediate result is being cached.
100 loops, best of 3: 7.38 ms per loop


In [13]:
def solve_np(A,b):
    return np.linalg.solve(A,b)

In [14]:
%timeit solve_np(A,b)

1 loop, best of 3: 2.21 s per loop


In [15]:
@numba.jit(nopython=True, cache=True)
def lu_solver(L,U,b):
    '''
    Ax = b -> LUx = b. Then y is defined to be Ux
    ''' 
    N = b.shape[0]   
    x = np.zeros(b.shape,dtype=np.complex128)
    y = np.zeros(b.shape,dtype=np.complex128)
    
    # Forward solve Ly = b
    for i in range(N):
        y[i] = b[i]
        for j in range(N):
            y[i] -= L[i,j] * y[j]
        y[i] /= L[i,i]


    # Backward solve Ux = y
    for i in range(N-1,0):
        x[i] = y[i]

        for j in range(N+1):
        
            x[i] -= U[i,j] * x[j]
        

        x[i] /= U[i][i]
    

    return x


In [16]:
%timeit lu_solver(lu.L.toarray(),lu.U.toarray(),b)

1 loop, best of 3: 1.5 s per loop


In [17]:
L=lu.L.toarray()

In [18]:
@numba.vectorize([numba.float64[:](numba.int32[:], numba.int32[:], numba.float64[:], numba.float64[:])])
def mult_numbav(rows,cols,data,x):
    y = np.zeros(x.shape)
    it = 0
    for it in range(len(data)):
        y[rows[it],0] += data[it]*x[cols[it],0]
       
    return y

NotImplementedError: array(int32, 1d, A) cannot be represented as a Numpy dtype

In [None]:
y_numba = mult_numbav(A_row,A_col,A_data,x)
%timeit mult_numbav(A_row,A_col,A_data,x)

In [None]:
b[np.abs(b)>0].shape

In [None]:
@numba.jit(nopython=True,cache=True)
def solve_npbz(inv_A,b,N_bz):
    N = b.shape[0]
    X = np.zeros((N,1), dtype=np.complex128)
    for irow in range(N):
        X[irow,0] = inv_A[irow,0:N_bz] @ b[0:N_bz,0]
    #X = inv_A  @ b 
    

In [None]:
%timeit solve_npbz(inv_A,b,60)

In [None]:
X_sp = solve_sparse(A_sparse,b_sparse)

In [None]:
mask=(b==0)

In [None]:
mask

In [None]:
b[0:100]

In [213]:
error=X_sp-X[:,0]

In [214]:
np.max(np.abs(error))

6.2471947356980855e-11

In [183]:
X[:,0].shape

(2718,)

In [215]:
1440*83e-6

0.11952

In [186]:
error

array([  1.42108547e-12 +1.70530257e-13j,
        -3.96127575e-13 -1.42108547e-13j,
         2.07478479e-12 +4.97379915e-13j, ...,
         1.98951966e-12 -1.05160325e-12j,
         3.39284156e-13 -8.52651283e-14j,  -1.98951966e-13 -1.27897692e-13j])

In [42]:
import pyklu
from pyklu import solve_linear_system

In [21]:
A = np.array([[2, 3, 0, 0, 0], [3, 0, 4, 0, 6],
                 [0, -1, -3, 2, 0], [0, 0, 1, 0, 0],
                 [0, 4, 2, 0, 1]], dtype=np.double)
b = np.array([8, 45, -3, 3, 19], dtype=np.double)
x=pyklu.solve_linear_system(A, b)

In [22]:
x

array([ 1.,  2.,  3.,  4.,  5.])

In [29]:
indptr = np.array([0, 2, 5, 9, 10, 12])
indices = np.array([0, 1, 0, 2, 4, 1, 2, 3, 4, 2, 1, 4])
data = np.array([2, 3, 3, -1, 4, 4, -3, 1, 2, 2, 6, 1])
A_test=sparse.csc_matrix((data, indices, indptr), shape=(5, 5))
b_test = [8, 45, -3, 3, 19]

In [31]:
A_test.indices

array([0, 1, 0, 2, 4, 1, 2, 3, 4, 2, 1, 4], dtype=int32)

In [32]:
A_test

<5x5 sparse matrix of type '<class 'numpy.int64'>'
	with 12 stored elements in Compressed Sparse Column format>

In [53]:
c = np.copy(b[:,0].real)
x=pyklu.solve_linear_system(A.real, c)

In [57]:
%timeit pyklu.solve_linear_system(A.real, c)

1 loop, best of 3: 1.97 s per loop


In [43]:
solve_linear_system(5,indptr,indices,data,b_test)

TypeError: solve_linear_system() takes 2 positional arguments but 5 were given

In [44]:
A_test=sparse.csc_matrix((data, indices, indptr), shape=(5, 5))
b_test = [8, 45, -3, 3, 19]

solve_linear_system(A_test,b_test)

ValueError: data, indices, and indptr should be 1-D

In [58]:
import ctypes

In [59]:
libklu = ctypes.cdll.LoadLibrary('libpyklu.so')


In [74]:
n = 5
Ap = np.array([0, 2, 5, 9, 10, 12])
Ai = np.array([0, 1, 0, 2, 4, 1, 2, 3, 4, 2, 1, 4])
Ax = np.array([2, 3, 3, -1, 4, 4, -3, 1, 2, 2, 6, 1.0])
b = np.array([8, 45, -3, 3, 190.])

c_Ap = np.ctypeslib.as_ctypes(Ap)
c_Ai = np.ctypeslib.as_ctypes(Ai)
c_Ax = np.ctypeslib.as_ctypes(Ax)
c_b = np.ctypeslib.as_ctypes(b)
    


libklu.solve_linear_system(n, c_Ap, c_Ai, c_Ax, c_b)

0

In [86]:
n = A_sparse.shape[0]
Ap = A_sparse.indptr
Ai = A_sparse.indices
Ax = np.copy(A_sparse.data.real)
#b = np.array([8, 45, -3, 3, 190.])

c_Ap = np.ctypeslib.as_ctypes(Ap)
c_Ai = np.ctypeslib.as_ctypes(Ai)
c_Ax = np.ctypeslib.as_ctypes(Ax)
c_b = np.ctypeslib.as_ctypes(np.copy(b.real))
    




0

In [88]:
%timeit libklu.solve_linear_system(n, c_Ap, c_Ai, c_Ax, c_b)

The slowest run took 4.30 times longer than the fastest. This could mean that an intermediate result is being cached.
100 loops, best of 3: 1.33 ms per loop
