In [1]:
import cupy as cp
from pyqcu import define
from pyqcu import io
from pyqcu import qcu
from pyqcu import eigen
from pyqcu.set import params, argv, set_ptrs
print('My rank is ', define.rank)
params[define._SET_PLAN_] = 1
gauge_filename = f"quda_wilson-bistabcg-gauge_-{params[define._LAT_X_]}-{params[define._LAT_Y_]}-{params  [define._LAT_Z_]}-{params[define._LAT_T_]}-{params[define._LAT_XYZT_]}-{params[define._GRID_X_]}-{params[define._GRID_Y_]}-{params[define._GRID_Z_]}-{params[define._GRID_T_]}-{params[define._PARITY_]}-{params[define._NODE_RANK_]}-{params[define._NODE_SIZE_]}-{params[define._DAGGER_]}-f.h5"
params[define._NODE_RANK_] = define.rank
params[define._NODE_SIZE_] = define.size
print("Parameters:", params)



    @@@@@@######QCU NOTES START######@@@@@@@
    0. Required: MPI(e.g. 4.1.2), CUDA(e.g. 12.4), CMAKE(e.g. 3.22.1), GCC(e.g. 11.4.0), HDF5-MPI(e.g. 1.10.7,'apt install libhdf5-mpi-dev && export HDF5_MPI="ON" && pip install --no-binary=h5py h5py').
    1. The libqcu.so was compiled when pyqcu setup in download_path/PyQCU/lib, please add this path to your LD_LIBRARY_PATH.
    2. The QCU(PyQCU) splite grid by x->y->z->t, lattice by x->y->z->t->p->d->c->c or x->y->z->t->c->s(->p) and x->y->z->t->c->s->c->s(->p).
    3. The QUDA(PyQUDA) splite grid by t->z->y->x, lattice by c->c->x->y->z->t->p->d or c->s->x->y->z->t(->p) and c->s->c->s->x->y->z->t(->p).
    4. The QCU input params in numpy array(dtype=np.int32), argv in  numpy array(dtype=np.float32 or float64) array, set_ptrs in numpy array(dtype=np.int64), other in cupy array(dtype=cp.complex64 or complex128).
    5. The smallest lattice size is (x=4,y=4,z=4,t=8) that QCU support.
    @@@@@@######QCU NOTES END######@@@@@@@
    
Parameter

In [2]:
qcu.applyInitQcu(set_ptrs, params, argv)

gridDim.x               :4096
blockDim.x              :128
host_params[_LAT_X_]    :16
host_params[_LAT_Y_]    :32
host_params[_LAT_Z_]    :32
host_params[_LAT_T_]    :32
host_params[_LAT_XYZT_] :524288
host_params[_GRID_X_]   :1
host_params[_GRID_Y_]   :1
host_params[_GRID_Z_]   :1
host_params[_GRID_T_]   :1
host_params[_PARITY_]   :0
host_params[_NODE_RANK_]:0
host_params[_NODE_SIZE_]:1
host_params[_DAGGER_]   :0
host_params[_MAX_ITER_] :10000
host_params[_SET_INDEX_]:2
host_params[_SET_PLAN_] :1
host_argv[_MASS_]       :0.000000e+00
host_argv[_TOL_]        :1.000000e-09
lat_2dim[_XY_]          :512
lat_2dim[_XZ_]          :512
lat_2dim[_XT_]          :512
lat_2dim[_YZ_]          :1024
lat_2dim[_YT_]          :1024
lat_2dim[_ZT_]          :1024
lat_3dim[_YZT_]         :32768
lat_3dim[_XZT_]         :16384
lat_3dim[_XYT_]         :16384
lat_3dim[_XYZ_]         :16384
lat_4dim                :524288
grid_2dim[_XY_]         :1
grid_2dim[_XZ_]         :1
grid_2dim[_XT_]         :1
grid_2

In [3]:
print("Gauge filename:", gauge_filename)
gauge = io.hdf5_xxxtzyx2grid_xxxtzyx(params, gauge_filename)
fermion_in_filename = gauge_filename.replace("gauge", "fermion-in")
print("Fermion in filename:", fermion_in_filename)
fermion_in = io.hdf5_xxxtzyx2grid_xxxtzyx(params, fermion_in_filename)
fermion_out_filename = gauge_filename.replace("gauge", "fermion-out")
print("Fermion out filename:", fermion_out_filename)
quda_fermion_out = io.hdf5_xxxtzyx2grid_xxxtzyx(params, fermion_out_filename)
fermion_out = cp.zeros_like(fermion_in)
print("Fermion out data:", fermion_out.data)
print("Fermion out shape:", fermion_out.shape)

Gauge filename: quda_wilson-bistabcg-gauge_-32-32-32-32-1048576-1-1-1-1-0-0-1-0-f.h5
Grid Index T: 0, Grid Index Z: 0, Grid Index Y: 0, Grid Index X: 0
Grid Lat T: 32, Grid Lat Z: 32, Grid Lat Y: 32, Grid Lat X: 16
All Dset Shape: (3, 3, 4, 2, 32, 32, 32, 16)
Dest Shape: (3, 3, 4, 2, 32, 32, 32, 16)
Fermion in filename: quda_wilson-bistabcg-fermion-in_-32-32-32-32-1048576-1-1-1-1-0-0-1-0-f.h5
Grid Index T: 0, Grid Index Z: 0, Grid Index Y: 0, Grid Index X: 0
Grid Lat T: 32, Grid Lat Z: 32, Grid Lat Y: 32, Grid Lat X: 16
All Dset Shape: (2, 4, 3, 32, 32, 32, 16)
Dest Shape: (2, 4, 3, 32, 32, 32, 16)
Fermion out filename: quda_wilson-bistabcg-fermion-out_-32-32-32-32-1048576-1-1-1-1-0-0-1-0-f.h5
Grid Index T: 0, Grid Index Z: 0, Grid Index Y: 0, Grid Index X: 0
Grid Lat T: 32, Grid Lat Z: 32, Grid Lat Y: 32, Grid Lat X: 16
All Dset Shape: (2, 4, 3, 32, 32, 32, 16)
Dest Shape: (2, 4, 3, 32, 32, 32, 16)
Fermion out data: <MemoryPointer 0xb26200000 device=0 mem=<cupy.cuda.memory.PooledMemor

In [4]:
qcu.applyWilsonBistabCgQcu(fermion_out, fermion_in, gauge, set_ptrs, params)
print("Fermion out data:", fermion_out.data)
print("Fermion out shape:", fermion_out.shape)
print("QUDA Fermion out data:", quda_fermion_out.data)
print("QUDA Fermion out shape:", quda_fermion_out.shape)
print("Difference:", cp.linalg.norm(fermion_out -
      quda_fermion_out)/cp.linalg.norm(quda_fermion_out))

Fermion out data: <MemoryPointer 0xb26200000 device=0 mem=<cupy.cuda.memory.PooledMemory object at 0x7fd810909e70>>
Fermion out shape: (2, 4, 3, 32, 32, 32, 16)
QUDA Fermion out data: <MemoryPointer 0xb20200000 device=0 mem=<cupy.cuda.memory.PooledMemory object at 0x7fd8282429f0>>
QUDA Fermion out shape: (2, 4, 3, 32, 32, 32, 16)
##RANK:0##LOOP:118##Residual:(2.27222e-10,1.97371e-23i)
multi-gpu wilson bistabcg total time: (without malloc free memcpy) :1.838457088 sec
######TIME  :2842.18######
##RANK      :0
##LOOP      :999
##tmp0      :(1.03257e-11,2.49512e-12i)
##tmp1      :(4.79284e-12,-2.12052e-23i)
##rho_prev  :(-2.31288e-06,4.83391e-06i)
##rho       :(-2.31288e-06,4.83391e-06i)
##alpha     :(0.629024,-0.434716i)
##beta      :(0.059529,-0.0243195i)
##omega     :(2.1544,0.520593i)
##send_tmp  :(0.00984323,0i)
##norm2_tmp :(4.97484e+07,0.000224118i)
##diff_tmp  :(1.9786e-10,-8.91365e-22i)
##lat_4dim  :(524288,0i)
Difference: 3.056118e-07


In [5]:
def matvec(src):
    dest = cp.zeros_like(src)
    qcu.applyWilsonCgDslashQcu(
        dest, src, gauge, set_ptrs, params)
    return dest
eigen_solver = eigen.solver(
    n=params[define._LAT_XYZT_] * define._LAT_HALF_SC_, k=define._LAT_Ne_,matvec=matvec,dtype=gauge.dtype)
eigenvalues, eigenvectors = eigen_solver.run()

eigen_index: 0, iter: 0, alpha: 0.000000000, beta: 1.000000000, tol: inf, lambda: 1.529646873, degree: 20
eigen_index: 0, iter: 1, alpha: 0.764823437, beta: 1.000000000, tol: 8.186196e+01, lambda: 0.018460182, degree: 30
eigen_index: 0, iter: 2, alpha: 0.764823437, beta: 1.000000000, tol: 4.725693e-01, lambda: 0.012536037, degree: 30
eigen_index: 0, iter: 3, alpha: 0.764823437, beta: 1.000000000, tol: 4.357962e-01, lambda: 0.008731070, degree: 30
eigen_index: 0, iter: 4, alpha: 0.764823437, beta: 1.000000000, tol: 4.755091e-01, lambda: 0.005917327, degree: 30
eigen_index: 0, iter: 5, alpha: 0.764823437, beta: 1.000000000, tol: 5.143479e-01, lambda: 0.003907508, degree: 30
eigen_index: 0, iter: 6, alpha: 0.764823437, beta: 1.000000000, tol: 8.526384e-01, lambda: 0.002109159, degree: 45
eigen_index: 0, iter: 7, alpha: 0.764823437, beta: 1.000000000, tol: 6.700020e-01, lambda: 0.001262968, degree: 45
eigen_index: 0, iter: 8, alpha: 0.764823437, beta: 1.000000000, tol: 4.058056e-01, lambda

In [6]:
io.xxx2hdf5_xxx(
    eigenvalues, params, gauge_filename.replace("gauge", "eigenvalues"))
io.xxx2hdf5_xxx(
    eigenvectors, params, gauge_filename.replace("gauge", "eigenvectors"))

Input Array Shape: (24,)
Dest Shape: (24,)
Data is saved to quda_wilson-bistabcg-eigenvalues_-32-32-32-32-1048576-1-1-1-1-0-0-1-0-f.h5
Input Array Shape: (24, 6291456)
Dest Shape: (24, 6291456)
Data is saved to quda_wilson-bistabcg-eigenvectors_-32-32-32-32-1048576-1-1-1-1-0-0-1-0-f.h5


In [7]:
eigenvalues=io.hdf5_xxx2xxx(file_name=gauge_filename.replace("gauge", "eigenvalues"))
eigenvectors=io.hdf5_xxx2xxx(file_name=gauge_filename.replace("gauge", "eigenvectors"))

Dest Shape: (24,)
Dest Shape: (24, 6291456)


In [8]:
eigenvalues

array([0.00064598+0.j, 0.00064602+0.j, 0.00064618+0.j, 0.00064629+0.j,
       0.00064608+0.j, 0.00064623+0.j, 0.00064613+0.j, 0.00064618+0.j,
       0.00064627+0.j, 0.00064627+0.j, 0.00064618+0.j, 0.00064605+0.j,
       0.01015083+0.j, 0.01014141+0.j, 0.01013882+0.j, 0.01014776+0.j,
       0.010137  +0.j, 0.01014778+0.j, 0.0101433 +0.j, 0.0101457 +0.j,
       0.0101325 +0.j, 0.01015073+0.j, 0.01014026+0.j, 0.01014756+0.j],
      dtype=complex64)

In [9]:
eigenvectors

array([[-4.4538436e-05-3.3594368e-04j,  4.6027075e-05-3.1315003e-04j,
         4.6916168e-05-3.2615598e-04j, ...,
        -3.3052737e-04+4.0879022e-04j, -2.7962244e-04+3.8240026e-04j,
        -2.4751833e-04+4.2669827e-04j],
       [-8.9250709e-05+2.4262266e-04j, -1.1718755e-04+2.7178347e-04j,
        -2.9075480e-05+2.5365170e-04j, ...,
        -2.3033735e-05-6.7307708e-05j, -4.8489022e-05-6.6539767e-05j,
        -5.0821574e-05-5.3635384e-05j],
       [-3.2359327e-04+4.4108809e-05j, -3.6798188e-04+7.6118940e-06j,
        -4.0128696e-04+2.6989912e-05j, ...,
         5.5418408e-04+3.3377189e-06j,  5.5267330e-04+4.8368697e-06j,
         5.3884793e-04-3.8933526e-05j],
       ...,
       [-9.7565317e-05+2.7066070e-04j, -6.8591937e-05+2.9019025e-04j,
         2.7334419e-05+3.2862771e-04j, ...,
        -3.0471353e-04-3.4614233e-05j, -3.1315640e-04-8.8596164e-05j,
        -3.3918300e-04-1.2381286e-04j],
       [ 2.1402600e-04+3.6907059e-04j,  1.4567052e-04+3.7702126e-04j,
         1.0803329e-04

In [10]:
for i, ev in enumerate(eigenvalues):
    print(f"λ_{i} = {ev:.2e}")
    # Verify eigenvector
    v = eigenvectors[i]
    w = cp.zeros_like(v)
    w = matvec(v)
    error = cp.linalg.norm(w - ev * v) / cp.linalg.norm(w)
    print(f"Relative error: {error:.2e}")
    j = i+1
    if j == len(eigenvalues):
        j = 0
    print(
        f"Diff between λ_{i} and λ_{j}: {cp.linalg.norm(eigenvectors[i] - eigenvectors[j])/cp.linalg.norm(eigenvectors[i]):.2e}")

λ_0 = 6.46e-04+0.00e+00j
Relative error: 8.08e-03
Diff between λ_0 and λ_1: 1.41e+00
λ_1 = 6.46e-04+0.00e+00j
Relative error: 9.35e-03
Diff between λ_1 and λ_2: 1.41e+00
λ_2 = 6.46e-04+0.00e+00j
Relative error: 1.03e-02
Diff between λ_2 and λ_3: 1.41e+00
λ_3 = 6.46e-04+0.00e+00j
Relative error: 9.62e-03
Diff between λ_3 and λ_4: 1.41e+00
λ_4 = 6.46e-04+0.00e+00j
Relative error: 1.00e-02
Diff between λ_4 and λ_5: 1.41e+00
λ_5 = 6.46e-04+0.00e+00j
Relative error: 9.52e-03
Diff between λ_5 and λ_6: 1.41e+00
λ_6 = 6.46e-04+0.00e+00j
Relative error: 9.83e-03
Diff between λ_6 and λ_7: 1.41e+00
λ_7 = 6.46e-04+0.00e+00j
Relative error: 9.38e-03
Diff between λ_7 and λ_8: 1.41e+00
λ_8 = 6.46e-04+0.00e+00j
Relative error: 9.36e-03
Diff between λ_8 and λ_9: 1.41e+00
λ_9 = 6.46e-04+0.00e+00j
Relative error: 1.06e-02
Diff between λ_9 and λ_10: 1.41e+00
λ_10 = 6.46e-04+0.00e+00j
Relative error: 1.00e-02
Diff between λ_10 and λ_11: 1.41e+00
λ_11 = 6.46e-04+0.00e+00j
Relative error: 1.12e-02
Diff betwe

In [11]:
# qcu.applyEndQcu(set_ptrs, params)