# Init for pyqcu.

In [3]:
import cupy as cp
import functools
import cupyx.scipy.sparse as cpx_sparse
from pyqcu import eigen, bistabcg


    @@@@@@######QCU NOTES START######@@@@@@@
    0. Required: MPI(e.g. 4.1.2), CUDA(e.g. 12.4), CMAKE(e.g. 3.22.1), GCC(e.g. 11.4.0), HDF5-MPI(e.g. 1.10.7,'apt install libhdf5-mpi-dev && export HDF5_MPI="ON" && pip install --no-binary=h5py h5py').
    1. The libqcu.so was compiled when pyqcu setup in download_path/PyQCU/lib, please add this path to your LD_LIBRARY_PATH.
    2. The QCU(PyQCU) splite grid by x->y->z->t, lattice by x->y->z->t->p->d->c->c or x->y->z->t->c->s(->p) and x->y->z->t->c->s->c->s(->p).
    3. The QUDA(PyQUDA) splite grid by t->z->y->x, lattice by c->c->x->y->z->t->p->d or c->s->x->y->z->t(->p) and c->s->c->s->x->y->z->t(->p).
    4. The QCU input params in numpy array(dtype=np.int32), argv in  numpy array(dtype=np.float32 or float64) array, set_ptrs in numpy array(dtype=np.int64), other in cupy array(dtype=cp.complex64 or complex128).
    5. The smallest lattice size is (x=4,y=4,z=4,t=8) that QCU support.
    @@@@@@######QCU NOTES END######@@@@@@@
    


# Give matvec.

In [4]:
index = -1
n = 16**3*3


def generate_sparse_complex_psd_matrix(n, density=0.1):
    real_part = cpx_sparse.random(
        n, n, density=density, format="csr", dtype=cp.float32)
    imag_part = cpx_sparse.random(
        n, n, density=density, format="csr", dtype=cp.float32)
    A = real_part + 1j * imag_part
    A_hermitian = A + A.getH()
    A_psd = A_hermitian + n * cpx_sparse.identity(n, dtype=cp.complex64)
    return A_psd


A = generate_sparse_complex_psd_matrix(n)
print(A.shape)


def matvec(src):
    return A@src

(12288, 12288)


# Give guage's eigenvalues and eigenvectors

In [5]:
eigen_solver = eigen.solver(
    n=n, k=10,matvec=matvec,dtype=A.dtype)
eigenvalues, eigenvectors = eigen_solver.run()

eigen_index: 0, iter: 0, alpha: 0.000000000, beta: 1.000000000, tol: inf, lambda: 12295.275390625, degree: 20
eigen_index: 0, iter: 1, alpha: 6147.637695312, beta: 1.000000000, tol: 5.470701e-02, lambda: 13006.840820312, degree: 30
eigen_index: 0, iter: 2, alpha: 6147.637695312, beta: 1.000000000, tol: 3.771340e-02, lambda: 13516.597656250, degree: 30
eigen_index: 0, iter: 3, alpha: 6147.637695312, beta: 1.000000000, tol: 1.031611e-04, lambda: 13517.992187500, degree: 30
eigen_index: 0, iter: 4, alpha: 6147.637695312, beta: 1.000000000, tol: 1.444833e-07, lambda: 13517.994140625, degree: 30
eigen_index: 0, time: 0.62s
eigen_index: 1, iter: 0, alpha: 6147.637695312, beta: 12295.275390625, tol: inf, lambda: 12331.339843750, degree: 30
eigen_index: 1, iter: 1, alpha: 6165.669921875, beta: 12295.275390625, tol: 3.209952e-04, lambda: 12327.382812500, degree: 45
eigen_index: 1, iter: 2, alpha: 6165.669921875, beta: 12295.275390625, tol: 4.582540e-04, lambda: 12321.736328125, degree: 45
eigen

In [6]:
print(eigenvalues)

[13517.994+0.j 12321.73 +0.j 12321.729+0.j 12321.729+0.j 12321.729+0.j
 12321.729+0.j 12321.729+0.j 12321.727+0.j 12321.729+0.j 12321.727+0.j]


# Run matvec(eigenvector[.]) ?= eigenvalue[.]*eigenvector[.] for eigen test.

In [7]:
for i, ev in enumerate(eigenvalues):
    print(f"λ_{i} = {ev:.2e}")
    # Verify eigenvector
    v = eigenvectors[i]
    w = cp.zeros_like(v)
    w = matvec(v)
    error = cp.linalg.norm(w - ev * v) / cp.linalg.norm(w)
    print(f"Relative error: {error:.2e}")
    j = i+1
    if j == len(eigenvalues):
        j = 0
    print(
        f"Diff between λ_{i} and λ_{j}: {cp.linalg.norm(eigenvectors[i] - eigenvectors[j])/cp.linalg.norm(eigenvectors[i]):.2e}")

λ_0 = 1.35e+04+0.00e+00j
Relative error: 5.11e-06
Diff between λ_0 and λ_1: 1.41e+00
λ_1 = 1.23e+04+0.00e+00j
Relative error: 2.57e-03
Diff between λ_1 and λ_2: 3.77e-01
λ_2 = 1.23e+04+0.00e+00j
Relative error: 2.57e-03
Diff between λ_2 and λ_3: 1.22e+00
λ_3 = 1.23e+04+0.00e+00j
Relative error: 2.57e-03
Diff between λ_3 and λ_4: 1.96e+00
λ_4 = 1.23e+04+0.00e+00j
Relative error: 2.57e-03
Diff between λ_4 and λ_5: 9.58e-01
λ_5 = 1.23e+04+0.00e+00j
Relative error: 2.57e-03
Diff between λ_5 and λ_6: 1.55e+00
λ_6 = 1.23e+04+0.00e+00j
Relative error: 2.57e-03
Diff between λ_6 and λ_7: 2.00e+00
λ_7 = 1.23e+04+0.00e+00j
Relative error: 2.57e-03
Diff between λ_7 and λ_8: 1.20e+00
λ_8 = 1.23e+04+0.00e+00j
Relative error: 2.57e-03
Diff between λ_8 and λ_9: 1.98e+00
λ_9 = 1.23e+04+0.00e+00j
Relative error: 2.57e-03
Diff between λ_9 and λ_0: 1.41e+00


# Sovle (A-a)x+b = b by BISTABCG

In [8]:
a = eigenvalues[index]
print(a)
b = cp.ones(n, dtype=A.dtype)

(12321.727+0j)


In [9]:
def _matvec(src, a,b):
    return matvec(src)-a+b

In [10]:

bistabcg_solver = bistabcg.slover(
    b=b, matvec=functools.partial(_matvec, a=a, b=b), max_iter=10000, tol=1e-4)
x = bistabcg_solver.run()

Iteration 0: Residual = 5.603251e+12, Time = 0.005901 s
Iteration 1: Residual = 2.344625e+07, Time = 0.001344 s
Iteration 2: Residual = 8.258773e+03, Time = 0.000751 s
Iteration 3: Residual = 1.085775e+04, Time = 0.000622 s
Iteration 4: Residual = 7.676576e+01, Time = 0.000627 s
Iteration 5: Residual = 6.147352e+02, Time = 0.000779 s
Iteration 6: Residual = 6.734724e+02, Time = 0.000701 s
Iteration 7: Residual = 7.973093e+03, Time = 0.000744 s
Iteration 8: Residual = 1.902619e+03, Time = 0.000721 s
Iteration 9: Residual = 9.343336e+03, Time = 0.000676 s
Iteration 10: Residual = 2.137592e+03, Time = 0.000628 s
Iteration 11: Residual = 7.359327e+03, Time = 0.000596 s
Iteration 12: Residual = 5.325643e+02, Time = 0.001140 s
Iteration 13: Residual = 4.537795e+03, Time = 0.000580 s
Iteration 14: Residual = 2.435121e+03, Time = 0.000625 s
Iteration 15: Residual = 1.019555e+04, Time = 0.000680 s
Iteration 16: Residual = 1.446562e+03, Time = 0.001334 s
Iteration 17: Residual = 9.563970e+03, Ti

KeyboardInterrupt: 

In [12]:
b.flatten()[:50]

array([1.+0.j, 1.+0.j, 1.+0.j, 1.+0.j, 1.+0.j, 1.+0.j, 1.+0.j, 1.+0.j,
       1.+0.j, 1.+0.j, 1.+0.j, 1.+0.j, 1.+0.j, 1.+0.j, 1.+0.j, 1.+0.j,
       1.+0.j, 1.+0.j, 1.+0.j, 1.+0.j, 1.+0.j, 1.+0.j, 1.+0.j, 1.+0.j,
       1.+0.j, 1.+0.j, 1.+0.j, 1.+0.j, 1.+0.j, 1.+0.j, 1.+0.j, 1.+0.j,
       1.+0.j, 1.+0.j, 1.+0.j, 1.+0.j, 1.+0.j, 1.+0.j, 1.+0.j, 1.+0.j,
       1.+0.j, 1.+0.j, 1.+0.j, 1.+0.j, 1.+0.j, 1.+0.j, 1.+0.j, 1.+0.j,
       1.+0.j, 1.+0.j], dtype=complex64)

In [11]:
print(b.shape)
bistabcg_solver = bistabcg.slover(
    b=b, matvec=matvec, max_iter=10000, tol=1e-9)
_x = bistabcg_solver.run()

(12288,)
Iteration 0: Residual = 3.738230e+12, Time = 0.002200 s
Iteration 1: Residual = 1.285230e+04, Time = 0.000892 s
Iteration 2: Residual = 3.782762e-04, Time = 0.000801 s
Iteration 3: Residual = 5.442041e-12, Time = 0.001421 s
Converged at iteration 3 with residual 5.442041e-12

Performance Statistics:
Total time: 0.377390 s
Average time per iteration: 0.001328 s


# Verify above

In [None]:
x.flatten()[:50]

In [None]:
eigenvectors[index].flatten()[:50]

In [None]:
Ax = matvec(x)
Ax.flatten()[:50]

In [None]:
ax = a*x
ax.flatten()[:50]

In [None]:
print(cp.linalg.norm(Ax-ax)/cp.linalg.norm(ax))

# End

In [None]:
# bistabcg_solver.end()
