# Init for pyqcu.

In [95]:
import cupy as cp
import numpy as np
import functools
from scipy.sparse.linalg import LinearOperator, eigsh
import cupyx.scipy.sparse as cpx_sparse
from pyqcu.cuda import define
from pyqcu.cuda import io
from pyqcu.cuda import qcu
from pyqcu.cuda import eigen, cg, bistabcg
from opt_einsum import contract
from pyqcu.cuda.set import params, argv, set_ptrs
params[define._NODE_RANK_] = define.rank
params[define._NODE_SIZE_] = define.size
index = -1
params[define._LAT_X_] = 8
params[define._LAT_Y_] = 8
params[define._LAT_Z_] = 4
params[define._LAT_T_] = 8
n = int(params[define._LAT_X_] / define._LAT_P_ * params[define._LAT_Y_] *
        params[define._LAT_Z_] * params[define._LAT_T_] *
        define._LAT_S_*define._LAT_C_)
params[define._MG_X_] = 2
params[define._MG_Y_] = 2
params[define._MG_Z_] = 2
params[define._MG_T_] = 2
print(params)

[      8       8       4       8 1048576       1       1       1       1
       0       0       1       0   10000       0       0       0       2
       2       2       2]


# Give matvec.

In [96]:
def generate_sparse_complex_psd_matrix(n, density=0.1):
    real_part = cpx_sparse.random(
        n, n, density=density, format="csr", dtype=cp.float32)
    imag_part = cpx_sparse.random(
        n, n, density=density, format="csr", dtype=cp.float32)
    A = real_part + 1j * imag_part
    A_hermitian = A + A.getH()
    A_psd = A_hermitian + n * cpx_sparse.identity(n, dtype=cp.complex64)
    return A_psd


A = generate_sparse_complex_psd_matrix(n)


def matvec(src):
    return (A@src.flatten()).reshape(src.shape)

# Give guage's eigenvalues and eigenvectors to hdf5 files. (pass, don't run this)

In [97]:
eigenvalues, eigenvectors = eigen.solver(
    n=n, k=params[define._LAT_E_], matvec=matvec, dtype=A.dtype)
eigenvectors = io.eigenvectors2esctzyx(eigenvectors, params)

eigen_index: 0, iter: 0, alpha: 0.000000000, beta: 1.000000000, tol: inf, lambda: 12293.986328125, degree: 20
eigen_index: 0, iter: 1, alpha: 6146.993164062, beta: 1.000000000, tol: 3.775226e-02, lambda: 12776.321289062, degree: 30
eigen_index: 0, iter: 2, alpha: 6146.993164062, beta: 1.000000000, tol: 5.465662e-02, lambda: 13515.005859375, degree: 30
eigen_index: 0, iter: 3, alpha: 6146.993164062, beta: 1.000000000, tol: 2.220707e-04, lambda: 13518.007812500, degree: 30
eigen_index: 0, iter: 4, alpha: 6146.993164062, beta: 1.000000000, tol: 1.444832e-07, lambda: 13518.009765625, degree: 30
eigen_index: 0, time: 0.43s
eigen_index: 1, iter: 0, alpha: 6146.993164062, beta: 12293.986328125, tol: inf, lambda: 12331.649414062, degree: 30
eigen_index: 1, iter: 1, alpha: 6165.824707031, beta: 12293.986328125, tol: 7.999197e-04, lambda: 12321.792968750, degree: 45
eigen_index: 1, iter: 2, alpha: 6165.824707031, beta: 12293.986328125, tol: 3.883641e-05, lambda: 12321.314453125, degree: 45
eigen

# Run matvec(eigenvector[.]) ?= eigenvalue[.]*eigenvector[.] for eigen test. (pass, don't run this)

In [98]:
for i, ev in enumerate(eigenvalues):
    print(f"λ_{i} = {ev:.2e}")
    # Verify eigenvector
    v = eigenvectors[i]
    w = cp.zeros_like(v)
    w = matvec(v)
    error = cp.linalg.norm(w - ev * v) / cp.linalg.norm(w)
    print(f"Relative error: {error:.2e}")
    j = i+1
    if j == len(eigenvalues):
        j = 0
    print(
        f"Diff between λ_{i} and λ_{j}: {cp.linalg.norm(eigenvectors[i] - eigenvectors[j])/cp.linalg.norm(eigenvectors[i]):.2e}")

λ_0 = 1.35e+04+0.00e+00j
Relative error: 7.48e-06
Diff between λ_0 and λ_1: 1.41e+00
λ_1 = 1.23e+04+0.00e+00j
Relative error: 2.54e-03
Diff between λ_1 and λ_2: 1.97e+00
λ_2 = 1.23e+04+0.00e+00j
Relative error: 2.54e-03
Diff between λ_2 and λ_3: 1.56e+00
λ_3 = 1.23e+04+0.00e+00j
Relative error: 2.54e-03
Diff between λ_3 and λ_4: 2.00e+00
λ_4 = 1.23e+04+0.00e+00j
Relative error: 2.54e-03
Diff between λ_4 and λ_5: 1.40e+00
λ_5 = 1.23e+04+0.00e+00j
Relative error: 2.54e-03
Diff between λ_5 and λ_6: 4.72e-01
λ_6 = 1.23e+04+0.00e+00j
Relative error: 2.54e-03
Diff between λ_6 and λ_7: 1.45e+00
λ_7 = 1.23e+04+0.00e+00j
Relative error: 2.54e-03
Diff between λ_7 and λ_8: 1.93e+00
λ_8 = 1.23e+04+0.00e+00j
Relative error: 2.54e-03
Diff between λ_8 and λ_9: 6.81e-02
λ_9 = 1.23e+04+0.00e+00j
Relative error: 2.54e-03
Diff between λ_9 and λ_10: 2.52e-01
λ_10 = 1.23e+04+0.00e+00j
Relative error: 2.54e-03
Diff between λ_10 and λ_11: 1.77e+00
λ_11 = 1.23e+04+0.00e+00j
Relative error: 2.54e-03
Diff betwe

# Give guage's orth_eigenvectors to hdf5 files. (pass, don't run this)

In [99]:
params

array([      8,       8,       4,       8, 1048576,       1,       1,
             1,       1,       0,       0,       1,       0,   10000,
             0,       0,       0,       2,       2,       2,       2],
      dtype=int32)

In [100]:
_eigenvectors = io.xxxtzyx2mg_xxxtzyx(input_array=eigenvectors, params=params)
_eigenvectors.shape  # escTtZzYyXx


def orthogonalize(eigenvectors):
    _eigenvectors = eigenvectors.copy()
    size_e, size_s, size_c, size_T, size_t, size_Z, size_z, size_Y, size_y, size_X, size_x = eigenvectors.shape
    print(size_e, size_s, size_c, size_T, size_t,
          size_Z, size_z, size_Y, size_y, size_X, size_x)
    for T in range(size_T):
        for Z in range(size_Z):
            for Y in range(size_Y):
                for X in range(size_X):
                    origin_matrix = eigenvectors[:,
                                                 :, :, T, :, Z, :, Y, :, X, :]
                    _shape = origin_matrix.shape
                    _origin_matrix = origin_matrix.reshape(size_e, -1)
                    condition_number = np.linalg.cond(_origin_matrix.get())
                    print(f"矩阵条件数: {condition_number}")
                    a = _origin_matrix[:, 0]
                    b = _origin_matrix[:, -1]
                    print(cp.dot(a.conj(), b))
                    Q = cp.linalg.qr(_origin_matrix.T)[0]
                    condition_number = np.linalg.cond(Q.get())
                    print(f"矩阵条件数: {condition_number}")
                    a = Q[:, 0]
                    b = Q[:, -1]
                    print(cp.dot(a.conj(), b))
                    _eigenvectors[:, :, :, T, :, Z, :, Y, :, X, :] = Q.T.reshape(
                        _shape)
    return _eigenvectors


orth_eigenvectors = orthogonalize(_eigenvectors)

Input Array Shape: (24, 4, 3, 8, 4, 8, 4)
Dest Shape: (24, 4, 3, 2, 4, 2, 2, 2, 4, 2, 2)
24 4 3 2 4 2 2 2 4 2 2
矩阵条件数: 93627.3046875
(6.498998e-05+0.0005486412j)
矩阵条件数: 1.000000238418579
(-2.188608e-08-2.561137e-09j)
矩阵条件数: 87661.3125
(0.0012322702-0.0024430056j)
矩阵条件数: 1.0000004768371582
(-3.0035153e-08-1.8626451e-09j)
矩阵条件数: 90922.3203125
(-0.00017779038+0.0004957713j)
矩阵条件数: 1.0000004768371582
(9.313226e-10-2.3283064e-10j)
矩阵条件数: 93629.6484375
(0.00038410607+0.0007837266j)
矩阵条件数: 1.0000004768371582
(8.381903e-09+4.656613e-10j)
矩阵条件数: 89215.3515625
(0.0018776364+0.00020112828j)
矩阵条件数: 1.000000238418579
(-1.5832484e-08+0j)
矩阵条件数: 91480.390625
(0.0013916817-0.004995555j)
矩阵条件数: 1.0000003576278687
(-1.8626451e-09-3.259629e-09j)
矩阵条件数: 88613.359375
(0.0004833568+4.0280393e-06j)
矩阵条件数: 1.0000003576278687
(4.656613e-10+1.3969839e-09j)
矩阵条件数: 91490.0234375
(0.0019504572-0.00028018386j)
矩阵条件数: 1.0000003576278687
(7.450581e-09+3.608875e-09j)
矩阵条件数: 90266.921875
(0.00019055343+0.00088627415j)


# MultiGrid - give grids.

In [101]:
testvectors = orth_eigenvectors
_src = io.xxxtzyx2mg_xxxtzyx(io.fermion2sctzyx(
    cp.ones(n, dtype=A.dtype), params), params)

Input Array Shape: (4, 3, 8, 4, 8, 4)
Dest Shape: (4, 3, 2, 4, 2, 2, 2, 4, 2, 2)


In [102]:
_src.shape

(4, 3, 2, 4, 2, 2, 2, 4, 2, 2)

<!-- # MultiGrid - R*vector.
![](./image0-dev40.png) -->

In [103]:
r_src = _src


def r_vec(src):
    return contract("escTtZzYyXx,scTtZzYyXx->eTZYX", testvectors, src)


r_dest = r_vec(r_src)

In [104]:
r_dest.shape

(24, 2, 2, 2, 2)

<!-- # MultiGrid - P*vector.
![](./image1-dev40.png) -->


In [105]:
p_src = r_dest


def p_vec(src):
    return contract("escTtZzYyXx,eTZYX->scTtZzYyXx", cp.conj(testvectors), src)


p_dest = p_vec(p_src)

In [106]:
p_dest.shape

(4, 3, 2, 4, 2, 2, 2, 4, 2, 2)

<!-- # MultiGrid - verify above.
![](./image2-dev40.png) -->

In [107]:
print(cp.linalg.norm(r_src))
print(cp.linalg.norm(p_dest))

110.85125
110.79802


In [108]:
print(cp.linalg.norm(r_src-p_dest)/cp.linalg.norm(r_src))

0.030987112


In [109]:
print(cp.linalg.norm(r_src-p_vec(r_vec(r_src)))/cp.linalg.norm(r_src))

0.030987112


In [110]:
r_src.flatten()[:50]

array([1.+0.j, 1.+0.j, 1.+0.j, 1.+0.j, 1.+0.j, 1.+0.j, 1.+0.j, 1.+0.j,
       1.+0.j, 1.+0.j, 1.+0.j, 1.+0.j, 1.+0.j, 1.+0.j, 1.+0.j, 1.+0.j,
       1.+0.j, 1.+0.j, 1.+0.j, 1.+0.j, 1.+0.j, 1.+0.j, 1.+0.j, 1.+0.j,
       1.+0.j, 1.+0.j, 1.+0.j, 1.+0.j, 1.+0.j, 1.+0.j, 1.+0.j, 1.+0.j,
       1.+0.j, 1.+0.j, 1.+0.j, 1.+0.j, 1.+0.j, 1.+0.j, 1.+0.j, 1.+0.j,
       1.+0.j, 1.+0.j, 1.+0.j, 1.+0.j, 1.+0.j, 1.+0.j, 1.+0.j, 1.+0.j,
       1.+0.j, 1.+0.j], dtype=complex64)

In [111]:
p_dest.flatten()[:50]

array([1.0029211 -0.04422641j, 1.0492618 +0.00337351j,
       1.0254331 +0.02966752j, 0.9959664 -0.00024045j,
       1.0109321 -0.00957111j, 0.96688414-0.00106187j,
       0.9742395 +0.02075455j, 1.0383718 +0.02606378j,
       1.0121154 -0.00706885j, 1.0123215 +0.02512319j,
       1.0099604 -0.0018152j , 0.9943094 +0.01234987j,
       1.0275712 -0.00189825j, 0.9684179 -0.00100992j,
       1.0281025 +0.00345031j, 0.98409444-0.02159757j,
       0.96370393+0.00463179j, 1.0057769 +0.00320191j,
       0.9571289 -0.02385962j, 1.001062  -0.02123885j,
       1.0082954 +0.03938396j, 1.010008  +0.02224625j,
       0.97985625-0.00603013j, 0.98128134-0.01392784j,
       0.97778386+0.02059032j, 0.9983289 -0.0048799j ,
       1.0003128 -0.01196393j, 0.9823538 +0.04579271j,
       0.9972085 +0.03265612j, 0.99566   +0.0078535j ,
       1.0023263 -0.01307611j, 1.035683  -0.0106416j ,
       0.9510639 +0.00209002j, 1.0062172 -0.02300397j,
       1.0078493 +0.01626898j, 1.0202907 +0.02064342j,
       1.0

In [112]:
cp.linalg.norm(r_src-p_dest)/cp.linalg.norm(r_src)

array(0.03098711, dtype=float32)

In [113]:
p_vec(r_vec(p_vec(r_vec(p_vec(r_vec(p_vec(r_vec(r_src)))))))).flatten()[:50]

array([1.0029235 -0.04422658j, 1.0492628 +0.00337346j,
       1.0254325 +0.02966752j, 0.9959657 -0.00024046j,
       1.0109326 -0.00957122j, 0.96688473-0.0010618j ,
       0.97423905+0.0207546j , 1.0383713 +0.02606381j,
       1.012116  -0.0070688j , 1.0123227 +0.02512308j,
       1.0099598 -0.00181518j, 0.994309  +0.01234985j,
       1.0275722 -0.00189833j, 0.96841896-0.00101005j,
       1.0281019 +0.00345036j, 0.984094  -0.0215975j ,
       0.9637039 +0.00463184j, 1.005777  +0.0032019j ,
       0.9571306 -0.02385959j, 1.0010636 -0.02123877j,
       1.0082957 +0.03938403j, 1.010008  +0.02224617j,
       0.9798576 -0.00603001j, 0.98128265-0.01392786j,
       0.97778386+0.02059011j, 0.9983291 -0.00487991j,
       1.0003139 -0.01196382j, 0.9823554 +0.04579278j,
       0.9972085 +0.03265619j, 0.99565995+0.00785352j,
       1.0023278 -0.01307627j, 1.0356843 -0.01064163j,
       0.95106465+0.0020902j , 1.0062184 -0.02300404j,
       1.0078489 +0.01626895j, 1.0202904 +0.02064345j,
       1.0

In [114]:
cp.linalg.norm(r_src-p_vec(r_vec(p_vec(r_vec(p_vec(r_vec(p_vec(r_vec(r_src))))))))
               )/cp.linalg.norm(r_src)  # ???

array(0.03098711, dtype=float32)

In [115]:
# _mat = contract("escTtZzYyXx,escTtZzYyXx->scTtZzYyXx",
#                 testvectors, cp.conj(testvectors)).flatten()
# print(cp.linalg.norm(_mat))
# print(_mat[:100])

# MultiGrid - R*matvec\*P.

In [116]:
def _r_matvec_p(src, matvec):
    return r_vec(matvec(p_vec(io.xxx2eTZYX(src, params))))


def r_matvec_p(src, matvec):
    return io.array2xxx(_r_matvec_p(src, matvec))

# MultiGrid - verify above.

In [117]:
D_r_src = matvec(r_src)

In [118]:
D_r_src.flatten()[:50]

array([13525.614 +51.138943j , 13583.947  -4.588804j ,
       13548.742 -39.923496j , 13512.052  -8.778692j ,
       13532.334  +4.2114744j, 13480.943  +4.652454j ,
       13484.333 -20.017454j , 13561.258 -25.284828j ,
       13534.023  +1.2555346j, 13534.026 -28.53246j  ,
       13532.782  +4.2668753j, 13507.11  -12.883554j ,
       13553.625  +6.029895j , 13484.212  +2.8756678j,
       13553.09   -4.635832j , 13497.422 +26.32704j  ,
       13470.243  -6.270824j , 13526.808  -1.9159756j,
       13467.607 +27.729565j , 13518.85  +18.462395j ,
       13528.192 -47.67793j  , 13532.38  -31.966732j ,
       13499.602  +7.874366j , 13500.487 +14.594929j ,
       13483.637 -21.05067j  , 13514.058 +10.918467j ,
       13516.571  +6.3392677j, 13498.871 -49.803234j ,
       13514.086 -37.47057j  , 13507.614 -11.730472j ,
       13521.812 +10.665878j , 13568.066  +2.172226j ,
       13456.82   -8.041367j , 13530.816 +30.60871j  ,
       13531.329 -22.532068j , 13543.28  -27.0723j   ,
       135

In [119]:
p_r_D_p_r_dest = p_vec(_r_matvec_p(r_dest, matvec=matvec))

Input Array Shape: (24, 2, 2, 2, 2)
Dest Shape: (24, 2, 2, 2, 2)


In [120]:
p_r_D_p_r_dest.flatten()[:50]

array([13561.351-609.2026j   , 14185.31  +46.03563j  ,
       13858.736+397.37634j  , 13464.096  -5.0390778j,
       13665.685-129.34029j  , 13074.423  -6.1583195j,
       13171.72 +280.82437j  , 14034.33 +354.1926j   ,
       13683.701 -98.89694j  , 13687.681+346.22906j  ,
       13651.487 -20.188673j , 13437.725+165.42598j  ,
       13890.799 -23.612625j , 13098.53  -13.75946j  ,
       13900.523 +53.170708j , 13301.632-294.76755j  ,
       13023.556 +63.433895j , 13590.285 +43.439693j ,
       12942.388-321.62238j  , 13535.248-291.70718j  ,
       13628.302+531.01526j  , 13647.746+294.26624j  ,
       13245.591 -82.04655j  , 13262.193-187.3691j   ,
       13207.511+275.586j    , 13489.073 -67.28671j  ,
       13521.109-161.3211j   , 13281.59 +623.2127j   ,
       13479.552+437.74686j  , 13459.414+103.25305j  ,
       13549.839-175.43552j  , 14003.306-145.58801j  ,
       12852.241 +22.628292j , 13607.764-312.35168j  ,
       13627.06 +216.44722j  , 13793.492+279.4859j   ,
       135

In [121]:
cp.linalg.norm(D_r_src-p_r_D_p_r_dest)/cp.linalg.norm(D_r_src)

array(0.03107861, dtype=float32)

# MultiGrid - CG (BUG!!!)

In [125]:
b = r_src.flatten()
print(b)

[1.+0.j 1.+0.j 1.+0.j ... 1.+0.j 1.+0.j 1.+0.j]


In [126]:
x = cg.slover(b=b, matvec=matvec,
              tol=1e-5, max_iter=1000000)
print(x)
print(cp.linalg.norm(b-matvec(x))/cp.linalg.norm(b))

Iteration 0: Residual = 1.548605e+03, Time = 0.000431 s
Iteration 1: Residual = 3.674324e-01, Time = 0.000486 s
Iteration 2: Residual = 1.219477e-04, Time = 0.000391 s
Iteration 3: Residual = 1.271803e-09, Time = 0.000754 s
Converged at iteration 3 with residual 1.271803e-09

Performance Statistics:
Total time: 0.016833 s
Average time per iteration: 0.000516 s
[7.3928444e-05-3.0772469e-07j 7.3579220e-05+2.9038402e-08j
 7.3789728e-05+2.3994161e-07j ... 7.3610303e-05-1.7673683e-07j
 7.4077027e-05+2.3668535e-07j 7.4016505e-05+4.2214591e-08j]
1.7421447e-05


In [127]:
x = bistabcg.slover(b=b, matvec=matvec,
                    tol=1e-5, max_iter=1000000)
print(x)
print(cp.linalg.norm(b-matvec(x))/cp.linalg.norm(b))

Iteration 0: Residual = 1.510119e+08, Time = 0.001031 s
Iteration 1: Residual = 1.067015e+00, Time = 0.000943 s
Iteration 2: Residual = 8.722468e-08, Time = 0.001166 s
Converged at iteration 2 with residual 8.722468e-08

Performance Statistics:
Total time: 0.022789 s
Average time per iteration: 0.001047 s
[7.3929965e-05-3.0735094e-07j 7.3579475e-05+2.7462988e-08j
 7.3790303e-05+2.3977185e-07j ... 7.3610587e-05-1.7662802e-07j
 7.4076081e-05+2.3658636e-07j 7.4017546e-05+4.1518472e-08j]
1.7694832e-05


In [128]:
mg_b = r_dest.flatten()
print(mg_b)

[-2.76370354e+01+1.85634279e+00j -2.76366310e+01+1.85960412e+00j
 -2.76373291e+01+1.84840429e+00j -2.76362362e+01+1.86809182e+00j
 -2.76372337e+01+1.85425413e+00j -2.76368237e+01+1.85138071e+00j
 -2.76364250e+01+1.85615802e+00j -2.76376534e+01+1.84096825e+00j
 -2.76382523e+01+1.83374929e+00j -2.76387730e+01+1.83146691e+00j
 -2.76392326e+01+1.83316517e+00j -2.76392593e+01+1.81448472e+00j
 -2.76380272e+01+1.83026910e+00j -2.76368313e+01+1.85209405e+00j
 -2.76371193e+01+1.84883261e+00j -2.76394882e+01+1.81558049e+00j
  1.13806129e-03-2.11866498e-02j -2.88379192e-03+3.41542810e-03j
  7.00849295e-03+9.30881500e-03j -4.43624556e-02+2.85775065e-02j
  1.63322389e-02+1.42837614e-02j -2.07075775e-02+3.19457501e-02j
  1.13123655e-02+1.56589001e-02j -2.44826972e-02+1.26793534e-02j
 -1.43474340e-03-4.84739244e-03j  2.86423415e-02-3.23337317e-03j
  6.75889850e-03+8.15093517e-03j -4.91782874e-02+3.34555581e-02j
 -8.03947449e-03-1.47098750e-02j -1.75133646e-02+8.34026933e-03j
 -6.11889362e-03+2.744296

In [129]:
mg_x = cg.slover(b=mg_b, matvec=functools.partial(r_matvec_p, matvec=matvec),
                 tol=1e-5, max_iter=1000000)
print(mg_x) 
print(cp.linalg.norm(mg_b-r_matvec_p(mg_x, matvec=matvec))/cp.linalg.norm(mg_b))

Input Array Shape: (384,)
Dest Shape: (24, 2, 2, 2, 2)
Input Array Shape: (384,)
Dest Shape: (24, 2, 2, 2, 2)
Iteration 0: Residual = 3.070158e+03, Time = 0.001096 s
Input Array Shape: (384,)
Dest Shape: (24, 2, 2, 2, 2)
Iteration 1: Residual = 3.847058e-01, Time = 0.001343 s
Input Array Shape: (384,)
Dest Shape: (24, 2, 2, 2, 2)
Iteration 2: Residual = 1.241405e-07, Time = 0.001261 s
Converged at iteration 2 with residual 1.241405e-07

Performance Statistics:
Total time: 0.013567 s
Average time per iteration: 0.001233 s
[-2.0443527e-03+1.37222945e-04j -2.0446451e-03+1.37464260e-04j
 -2.0450400e-03+1.36740302e-04j -2.0446628e-03+1.38032847e-04j
 -2.0448694e-03+1.37119263e-04j -2.0449078e-03+1.36932227e-04j
 -2.0447888e-03+1.37239287e-04j -2.0446496e-03+1.36185088e-04j
 -2.0448170e-03+1.35752009e-04j -2.0452156e-03+1.35608250e-04j
 -2.0449981e-03+1.35708018e-04j -2.0453406e-03+1.34493283e-04j
 -2.0451404e-03+1.35549693e-04j -2.0446447e-03+1.36940347e-04j
 -2.0445697e-03+1.36746006e-04j 

In [130]:
mg_x = bistabcg.slover(b=mg_b, matvec=functools.partial(r_matvec_p, matvec=matvec),
                 tol=1e-5, max_iter=1000000)
print(mg_x) 
print(cp.linalg.norm(mg_b-r_matvec_p(mg_x, matvec=matvec))/cp.linalg.norm(mg_b))

Input Array Shape: (384,)
Dest Shape: (24, 2, 2, 2, 2)
Input Array Shape: (384,)
Dest Shape: (24, 2, 2, 2, 2)
Input Array Shape: (384,)
Dest Shape: (24, 2, 2, 2, 2)
Iteration 0: Residual = 1.509700e+08, Time = 0.002312 s
Input Array Shape: (384,)
Dest Shape: (24, 2, 2, 2, 2)
Input Array Shape: (384,)
Dest Shape: (24, 2, 2, 2, 2)
Iteration 1: Residual = 3.513710e-01, Time = 0.002770 s
Input Array Shape: (384,)
Dest Shape: (24, 2, 2, 2, 2)
Input Array Shape: (384,)
Dest Shape: (24, 2, 2, 2, 2)
Iteration 2: Residual = 2.802487e-08, Time = 0.002727 s
Converged at iteration 2 with residual 2.802487e-08

Performance Statistics:
Total time: 0.023480 s
Average time per iteration: 0.002603 s
[-2.04435945e-03+1.37220544e-04j -2.04465911e-03+1.37467257e-04j
 -2.04503443e-03+1.36736911e-04j -2.04466563e-03+1.38032570e-04j
 -2.04486446e-03+1.37115509e-04j -2.04490940e-03+1.36924922e-04j
 -2.04478460e-03+1.37237235e-04j -2.04465236e-03+1.36198272e-04j
 -2.04481231e-03+1.35749768e-04j -2.04522023e-03

# End for pyqcu. (pass, don't run this)

In [131]:
# qcu.applyEndQcu(set_ptrs, params)
# qcu.applyEndQcu(set_ptrs, wilson_dslash_eo_params)
# qcu.applyEndQcu(set_ptrs, wilson_dslash_oe_params)
# qcu.applyEndQcu(set_ptrs, wilson_dslash_eo_dag_params)
# qcu.applyEndQcu(set_ptrs, wilson_dslash_oe_dag_params)