# Init for pyqcu.

In [1]:
import cupy as cp
import functools
import cupyx.scipy.sparse as cpx_sparse
from pyqcu import eigen, bistabcg


    @@@@@@######QCU NOTES START######@@@@@@@
    0. Required: MPI(e.g. 4.1.2), CUDA(e.g. 12.4), CMAKE(e.g. 3.22.1), GCC(e.g. 11.4.0), HDF5-MPI(e.g. 1.10.7,'apt install libhdf5-mpi-dev && export HDF5_MPI="ON" && pip install --no-binary=h5py h5py').
    1. The libqcu.so was compiled when pyqcu setup in download_path/PyQCU/lib, please add this path to your LD_LIBRARY_PATH.
    2. The QCU(PyQCU) splite grid by x->y->z->t, lattice by x->y->z->t->p->d->c->c or x->y->z->t->c->s(->p) and x->y->z->t->c->s->c->s(->p).
    3. The QUDA(PyQUDA) splite grid by t->z->y->x, lattice by c->c->x->y->z->t->p->d or c->s->x->y->z->t(->p) and c->s->c->s->x->y->z->t(->p).
    4. The QCU input params in numpy array(dtype=np.int32), argv in  numpy array(dtype=np.float32 or float64) array, set_ptrs in numpy array(dtype=np.int64), other in cupy array(dtype=cp.complex64 or complex128).
    5. The smallest lattice size is (x=4,y=4,z=4,t=8) that QCU support.
    @@@@@@######QCU NOTES END######@@@@@@@
    


# Give matvec.

In [None]:
index = -1
n = 16**3*3


def generate_sparse_complex_psd_matrix(n, density=0.1):
    real_part = cpx_sparse.random(
        n, n, density=density, format="csr", dtype=cp.float32)
    imag_part = cpx_sparse.random(
        n, n, density=density, format="csr", dtype=cp.float32)
    A = real_part + 1j * imag_part
    A_hermitian = A + A.getH()
    A_psd = A_hermitian + n * cpx_sparse.identity(n, dtype=cp.complex64)
    return A_psd


A = generate_sparse_complex_psd_matrix(n)
print(A.shape)


def matvec(src):
    return A@src

(12288, 12288)


# Give guage's eigenvalues and eigenvectors

In [3]:
eigen_solver = eigen.solver(
    n=n, k=10,matvec=matvec,dtype=A.dtype)
eigenvalues, eigenvectors = eigen_solver.run()

eigen_index: 0, iter: 0, alpha: 0.000000000, beta: 1.000000000, tol: inf, lambda: 12307.875000000, degree: 20
eigen_index: 0, iter: 1, alpha: 6153.937500000, beta: 1.000000000, tol: 8.000148e-02, lambda: 13378.146484375, degree: 30
eigen_index: 0, iter: 2, alpha: 6153.937500000, beta: 1.000000000, tol: 1.031856e-02, lambda: 13517.628906250, degree: 30
eigen_index: 0, iter: 3, alpha: 6153.937500000, beta: 1.000000000, tol: 1.871075e-05, lambda: 13517.881835938, degree: 30
eigen_index: 0, iter: 4, alpha: 6153.937500000, beta: 1.000000000, tol: 7.224228e-08, lambda: 13517.880859375, degree: 30
eigen_index: 0, time: 0.63s
eigen_index: 1, iter: 0, alpha: 6153.937500000, beta: 12307.875000000, tol: inf, lambda: 12331.843750000, degree: 30
eigen_index: 1, iter: 1, alpha: 6165.921875000, beta: 12307.875000000, tol: 3.794655e-04, lambda: 12327.166015625, degree: 45
eigen_index: 1, iter: 2, alpha: 6165.921875000, beta: 12307.875000000, tol: 4.294795e-04, lambda: 12321.874023438, degree: 45
eigen

In [4]:
print(eigenvalues)

[13517.881+0.j 12364.526+0.j 12364.527+0.j 12364.273+0.j 12364.358+0.j
 12364.392+0.j 12364.611+0.j 12364.299+0.j 12364.189+0.j 12363.988+0.j]


# Run matvec(eigenvector[.]) ?= eigenvalue[.]*eigenvector[.] for eigen test.

In [5]:
for i, ev in enumerate(eigenvalues):
    print(f"λ_{i} = {ev:.2e}")
    # Verify eigenvector
    v = eigenvectors[i]
    w = cp.zeros_like(v)
    w = matvec(v)
    error = cp.linalg.norm(w - ev * v) / cp.linalg.norm(w)
    print(f"Relative error: {error:.2e}")
    j = i+1
    if j == len(eigenvalues):
        j = 0
    print(
        f"Diff between λ_{i} and λ_{j}: {cp.linalg.norm(eigenvectors[i] - eigenvectors[j])/cp.linalg.norm(eigenvectors[i]):.2e}")

λ_0 = 1.35e+04+0.00e+00j
Relative error: 2.17e-06
Diff between λ_0 and λ_1: 1.41e+00
λ_1 = 1.24e+04+0.00e+00j
Relative error: 8.32e-05
Diff between λ_1 and λ_2: 1.41e+00
λ_2 = 1.24e+04+0.00e+00j
Relative error: 8.23e-05
Diff between λ_2 and λ_3: 1.41e+00
λ_3 = 1.24e+04+0.00e+00j
Relative error: 8.73e-05
Diff between λ_3 and λ_4: 1.41e+00
λ_4 = 1.24e+04+0.00e+00j
Relative error: 8.96e-05
Diff between λ_4 and λ_5: 1.41e+00
λ_5 = 1.24e+04+0.00e+00j
Relative error: 9.09e-05
Diff between λ_5 and λ_6: 1.41e+00
λ_6 = 1.24e+04+0.00e+00j
Relative error: 8.62e-05
Diff between λ_6 and λ_7: 1.41e+00
λ_7 = 1.24e+04+0.00e+00j
Relative error: 8.25e-05
Diff between λ_7 and λ_8: 1.41e+00
λ_8 = 1.24e+04+0.00e+00j
Relative error: 8.55e-05
Diff between λ_8 and λ_9: 1.41e+00
λ_9 = 1.24e+04+0.00e+00j
Relative error: 8.81e-05
Diff between λ_9 and λ_0: 1.41e+00


# Sovle (A-a)x+b = b by BISTABCG

In [None]:
a = eigenvalues[index]
print(a)
b = cp.ones(n, dtype=A.dtype)

(12363.988+0j)


In [17]:
def _matvec(src, a,b):
    return matvec(src)-a+b

In [18]:

bistabcg_solver = bistabcg.slover(
    b=b, matvec=functools.partial(_matvec, a=a, b=b), max_iter=10000, tol=1e-4)
x = bistabcg_solver.run()

Iteration 0: Residual = 5.638789e+12, Time = 0.002756 s
Iteration 1: Residual = 1.179412e+10, Time = 0.000985 s
Iteration 2: Residual = 2.373721e+07, Time = 0.001278 s
Iteration 3: Residual = 6.301638e+06, Time = 0.000836 s
Iteration 4: Residual = 2.927048e+05, Time = 0.001809 s
Iteration 5: Residual = 2.011423e+03, Time = 0.001413 s
Iteration 6: Residual = 9.527966e+02, Time = 0.000877 s
Iteration 7: Residual = 3.931972e+02, Time = 0.001225 s
Iteration 8: Residual = 6.485209e+02, Time = 0.000986 s
Iteration 9: Residual = 5.845440e+02, Time = 0.001347 s
Iteration 10: Residual = 2.101310e+03, Time = 0.002147 s
Iteration 11: Residual = 1.729602e+03, Time = 0.001158 s
Iteration 12: Residual = 1.834833e+03, Time = 0.001302 s
Iteration 13: Residual = 2.531275e+03, Time = 0.006221 s
Iteration 14: Residual = 1.184557e+03, Time = 0.007631 s
Iteration 15: Residual = 9.978875e+02, Time = 0.004315 s
Iteration 16: Residual = 1.651738e+03, Time = 0.009576 s
Iteration 17: Residual = 1.552989e+03, Ti

# Verify above

In [22]:
x.flatten()[:50]

array([ 0.9352578 +0.17432903j, -0.09809227+0.51266515j,
        0.3494737 +0.1870849j ,  0.05165943+0.78011644j,
        0.0710963 -0.4887783j , -0.06398796+0.06514393j,
        0.9241129 +0.39381865j, -0.08921528+0.65430605j,
        0.7588276 +0.37275597j, -0.31486225-0.4141964j ,
        0.64618415-0.55979717j,  1.002003  -0.5487255j ,
        0.63146096+0.44902462j,  0.6046878 -0.4885354j ,
        0.61724764-0.841704j  ,  1.2617434 +0.32975316j,
        0.35284364-0.2508625j ,  0.49964425-0.29717842j,
       -0.6251878 -0.6205671j ,  1.2348243 +0.6293509j ,
       -0.49272984-0.5138372j ,  0.6097956 +0.04002363j,
        0.5344017 -0.163739j  ,  0.6504715 +0.3774693j ,
        0.5730125 +0.16133827j,  0.49275345-0.5025145j ,
        0.7993311 -1.0734435j ,  0.5607499 -0.66957384j,
        0.13732591+0.4772532j ,  0.33621034-0.5139005j ,
        1.3831662 +0.03789226j, -0.08831639-0.2975732j ,
        0.31111386+0.2456625j ,  0.65722436-0.01432219j,
        0.6438467 -0.44909933j,

In [24]:
eigenvectors[index].flatten()[:50]

array([ 0.00315608-0.00270784j, -0.00554961-0.00174139j,
       -0.00410563+0.00730266j,  0.00417216-0.00306396j,
       -0.00110378-0.00764759j, -0.00191253+0.0075335j ,
        0.00815901+0.00191157j,  0.00063553-0.00498729j,
       -0.0005487 +0.00356006j,  0.00081361-0.00692248j,
        0.00626485-0.006209j  , -0.00153926-0.00121986j,
       -0.00587304+0.00154753j, -0.00678094+0.00286404j,
       -0.00338536+0.01073748j,  0.00182666+0.00124793j,
        0.00235274+0.01024462j,  0.00428433+0.00012084j,
        0.00172741+0.0015106j , -0.00253865-0.00981667j,
        0.00705946+0.00345645j, -0.00428487-0.0072746j ,
        0.00099771-0.0009542j , -0.00928381-0.00969608j,
       -0.00198894-0.00310139j,  0.0012423 -0.00450908j,
       -0.00719493-0.00904782j,  0.00053922-0.01459711j,
        0.00648047+0.00721284j, -0.00598371+0.00123216j,
       -0.00399207-0.00244097j,  0.0161885 -0.0043939j ,
       -0.00076177-0.00382011j,  0.00333099+0.00544337j,
       -0.01618743+0.00415071j,

In [None]:
Ax = matvec(x)
Ax.flatten()[:50]

array([ 12011.987   +2133.6372j ,   -672.66406 +6261.053j  ,
         4842.4395  +2303.1445j ,   1209.1838  +9572.414j  ,
         1441.798   -6025.685j  ,   -253.2507   +804.2609j ,
        11899.895   +4850.2544j ,   -542.088   +8018.943j  ,
         9874.2705  +4577.5107j ,  -3402.2405  -5128.4727j ,
         8475.592   -6853.448j  ,  12897.714   -6716.3613j ,
         8331.053   +5478.143j  ,   8013.0864  -6020.137j  ,
         8126.3564 -10382.615j  ,  16040.745   +4024.9868j ,
         4877.9673  -3089.917j  ,   6696.19    -3647.9539j ,
        -7113.668   -7662.6104j ,  15722.433   +7734.3433j ,
        -5506.0186  -6318.8276j ,   8034.96     +495.06088j,
         7094.9253  -2025.2281j ,   8597.082   +4598.2217j ,
         7616.2065  +1969.8658j ,   6619.573   -6206.2485j ,
        10389.942  -13210.417j  ,   7450.11    -8231.737j  ,
         2256.9521  +5851.51j   ,   4676.015   -6367.512j  ,
        17525.434    +465.35175j,   -543.153   -3635.6892j ,
         4357.1714  +300

In [26]:
ax = a*x
ax.flatten()[:50]

array([ 11563.517   +2155.402j  ,  -1212.8116  +6338.586j  ,
         4320.8887  +2313.1155j ,    638.71655 +9645.351j  ,
          879.0338  -6043.249j  ,   -791.1463   +805.4388j ,
        11425.722   +4869.169j  ,  -1103.0566  +8089.8325j ,
         9382.136   +4608.7505j ,  -3892.9531  -5121.1196j ,
         7989.413   -6921.3257j ,  12388.753   -6784.4355j ,
         7807.376   +5551.7354j ,   7476.353   -6040.246j  ,
         7631.6426 -10406.818j  ,  15600.181   +4077.0642j ,
         4362.5547  -3101.6611j ,   6177.5957  -3674.3105j ,
        -7729.815   -7672.684j  ,  15267.354   +7781.287j  ,
        -6092.106   -6353.077j  ,   7539.5054   +494.85168j,
         6607.3364  -2024.467j  ,   8042.422   +4667.026j  ,
         7084.719   +1994.7844j ,   6092.398   -6213.083j  ,
         9882.921  -13272.043j  ,   6933.105   -8278.604j  ,
         1697.896   +5900.753j  ,   4156.901   -6353.86j   ,
        17101.451    +468.49945j,  -1091.9427  -3679.1917j ,
         3846.6082  +303

In [27]:
print(cp.linalg.norm(Ax-ax)/cp.linalg.norm(ax))

0.050476365


# End

In [None]:
# bistabcg_solver.end()
