In [1]:
import cupy as cp
from pyqcu import define
from pyqcu import io
from pyqcu import qcu
from pyqcu import eigen
from pyqcu.set import params, argv, set_ptrs
print('My rank is ', define.rank)
params[define._SET_PLAN_] = 1
gauge_filename = f"quda_wilson-bistabcg-gauge_-{params[define._LAT_X_]}-{params[define._LAT_Y_]}-{params  [define._LAT_Z_]}-{params[define._LAT_T_]}-{params[define._LAT_XYZT_]}-{params[define._GRID_X_]}-{params[define._GRID_Y_]}-{params[define._GRID_Z_]}-{params[define._GRID_T_]}-{params[define._PARITY_]}-{params[define._NODE_RANK_]}-{params[define._NODE_SIZE_]}-{params[define._DAGGER_]}-f.h5"
params[define._NODE_RANK_] = define.rank
params[define._NODE_SIZE_] = define.size
print("Parameters:", params)



    @@@@@@######QCU NOTES START######@@@@@@@
    0. Required: MPI(e.g. 4.1.2), CUDA(e.g. 12.4), CMAKE(e.g. 3.22.1), GCC(e.g. 11.4.0), HDF5-MPI(e.g. 1.10.7,'apt install libhdf5-mpi-dev && export HDF5_MPI="ON" && pip install --no-binary=h5py h5py').
    1. The libqcu.so was compiled when pyqcu setup in download_path/PyQCU/lib, please add this path to your LD_LIBRARY_PATH.
    2. The QCU(PyQCU) splite grid by x->y->z->t, lattice by x->y->z->t->p->d->c->c or x->y->z->t->c->s(->p) and x->y->z->t->c->s->c->s(->p).
    3. The QUDA(PyQUDA) splite grid by t->z->y->x, lattice by c->c->x->y->z->t->p->d or c->s->x->y->z->t(->p) and c->s->c->s->x->y->z->t(->p).
    4. The QCU input params in numpy array(dtype=np.int32), argv in  numpy array(dtype=np.float32 or float64) array, set_ptrs in numpy array(dtype=np.int64), other in cupy array(dtype=cp.complex64 or complex128).
    5. The smallest lattice size is (x=4,y=4,z=4,t=8) that QCU support.
    @@@@@@######QCU NOTES END######@@@@@@@
    
Parameter

In [2]:
qcu.applyInitQcu(set_ptrs, params, argv)

gridDim.x               :4096
blockDim.x              :128
host_params[_LAT_X_]    :16
host_params[_LAT_Y_]    :32
host_params[_LAT_Z_]    :32
host_params[_LAT_T_]    :32
host_params[_LAT_XYZT_] :524288
host_params[_GRID_X_]   :1
host_params[_GRID_Y_]   :1
host_params[_GRID_Z_]   :1
host_params[_GRID_T_]   :1
host_params[_PARITY_]   :0
host_params[_NODE_RANK_]:0
host_params[_NODE_SIZE_]:1
host_params[_DAGGER_]   :0
host_params[_MAX_ITER_] :10000
host_params[_SET_INDEX_]:2
host_params[_SET_PLAN_] :1
host_argv[_MASS_]       :0.000000e+00
host_argv[_TOL_]        :1.000000e-09
lat_2dim[_XY_]          :512
lat_2dim[_XZ_]          :512
lat_2dim[_XT_]          :512
lat_2dim[_YZ_]          :1024
lat_2dim[_YT_]          :1024
lat_2dim[_ZT_]          :1024
lat_3dim[_YZT_]         :32768
lat_3dim[_XZT_]         :16384
lat_3dim[_XYT_]         :16384
lat_3dim[_XYZ_]         :16384
lat_4dim                :524288
grid_2dim[_XY_]         :1
grid_2dim[_XZ_]         :1
grid_2dim[_XT_]         :1
grid_2

In [3]:
print("Gauge filename:", gauge_filename)
gauge = io.hdf5_xxxtzyx2grid_xxxtzyx(params, gauge_filename)
fermion_in_filename = gauge_filename.replace("gauge", "fermion-in")
print("Fermion in filename:", fermion_in_filename)
fermion_in = io.hdf5_xxxtzyx2grid_xxxtzyx(params, fermion_in_filename)
fermion_out_filename = gauge_filename.replace("gauge", "fermion-out")
print("Fermion out filename:", fermion_out_filename)
quda_fermion_out = io.hdf5_xxxtzyx2grid_xxxtzyx(params, fermion_out_filename)
fermion_out = cp.zeros_like(fermion_in)
print("Fermion out data:", fermion_out.data)
print("Fermion out shape:", fermion_out.shape)

Gauge filename: quda_wilson-bistabcg-gauge_-32-32-32-32-1048576-1-1-1-1-0-0-1-0-f.h5
Grid Index T: 0, Grid Index Z: 0, Grid Index Y: 0, Grid Index X: 0
Grid Lat T: 32, Grid Lat Z: 32, Grid Lat Y: 32, Grid Lat X: 16
All Dset Shape: (3, 3, 4, 2, 32, 32, 32, 16)
Dest Shape: (3, 3, 4, 2, 32, 32, 32, 16)
Fermion in filename: quda_wilson-bistabcg-fermion-in_-32-32-32-32-1048576-1-1-1-1-0-0-1-0-f.h5
Grid Index T: 0, Grid Index Z: 0, Grid Index Y: 0, Grid Index X: 0
Grid Lat T: 32, Grid Lat Z: 32, Grid Lat Y: 32, Grid Lat X: 16
All Dset Shape: (2, 4, 3, 32, 32, 32, 16)
Dest Shape: (2, 4, 3, 32, 32, 32, 16)
Fermion out filename: quda_wilson-bistabcg-fermion-out_-32-32-32-32-1048576-1-1-1-1-0-0-1-0-f.h5
Grid Index T: 0, Grid Index Z: 0, Grid Index Y: 0, Grid Index X: 0
Grid Lat T: 32, Grid Lat Z: 32, Grid Lat Y: 32, Grid Lat X: 16
All Dset Shape: (2, 4, 3, 32, 32, 32, 16)
Dest Shape: (2, 4, 3, 32, 32, 32, 16)
Fermion out data: <MemoryPointer 0xb26200000 device=0 mem=<cupy.cuda.memory.PooledMemor

In [4]:
qcu.applyWilsonBistabCgQcu(fermion_out, fermion_in, gauge, set_ptrs, params)
print("Fermion out data:", fermion_out.data)
print("Fermion out shape:", fermion_out.shape)
print("QUDA Fermion out data:", quda_fermion_out.data)
print("QUDA Fermion out shape:", quda_fermion_out.shape)
print("Difference:", cp.linalg.norm(fermion_out -
      quda_fermion_out)/cp.linalg.norm(quda_fermion_out))

##RANK:0Fermion out data: <MemoryPointer 0xb26200000 device=0 mem=<cupy.cuda.memory.PooledMemory object at 0x7f137cd08eb0>>
Fermion out shape: (2, 4, 3, 32, 32, 32, 16)
QUDA Fermion out data: <MemoryPointer 0xb20200000 device=0 mem=<cupy.cuda.memory.PooledMemory object at 0x7f131adbd2f0>>
QUDA Fermion out shape: (2, 4, 3, 32, 32, 32, 16)
##LOOP:118##Residual:(2.27222e-10,1.97371e-23i)
multi-gpu wilson bistabcg total time: (without malloc free memcpy) :1.681214080 sec
######TIME  :2500.24######
##RANK      :0
##LOOP      :999
##tmp0      :(1.03257e-11,2.49512e-12i)
##tmp1      :(4.79284e-12,-2.12052e-23i)
##rho_prev  :(-2.31288e-06,4.83391e-06i)
##rho       :(-2.31288e-06,4.83391e-06i)
##alpha     :(0.629024,-0.434716i)
##beta      :(0.059529,-0.0243195i)
##omega     :(2.1544,0.520593i)
##send_tmp  :(0.00984323,0i)
##norm2_tmp :(4.97484e+07,0.000224118i)
##diff_tmp  :(1.9786e-10,-8.91365e-22i)
##lat_4dim  :(524288,0i)
Difference: 3.056118e-07


In [5]:
def matvec(src):
    dest = cp.zeros_like(src)
    qcu.applyWilsonCgDslashQcu(
        dest, src, gauge, set_ptrs, params)
    return dest
eigen_solver = eigen.solver(
    n=params[define._LAT_XYZT_] * define._LAT_HALF_SC_, k=define._LAT_Ne_,matvec=matvec,dtype=gauge.dtype)
eigenvalues, eigenvectors = eigen_solver.run()

In [16]:
eigenvalues

array([ 0.0000000e+00+0.77017j   ,  0.0000000e+00+0.770178j  ,
       -0.0000000e+00+0.7702047j , -3.6893488e+19+0.7702561j ,
        0.0000000e+00+0.77019674j, -2.0000000e+00+0.7702283j ,
        2.0000000e+00+0.77025986j,  1.0842022e-19+0.7701675j ,
       -2.0000000e+00+0.7702226j , -3.6893488e+19+0.7702001j ,
       -0.0000000e+00+0.77018124j, -3.6893488e+19+0.77022904j,
       -2.0000000e+00+1.0372533j , -3.6893488e+19+1.0371692j ,
       -2.0000000e+00+1.037208j  ,  2.0000000e+00+1.0373142j ,
       -0.0000000e+00+1.0373944j ,  0.0000000e+00+1.0371494j ,
        3.6893488e+19+1.0373195j , -1.0842022e-19+1.0372378j ,
       -0.0000000e+00+1.0374273j , -0.0000000e+00+1.0373198j ,
       -2.0000000e+00+1.0374235j , -1.0842022e-19+1.0372444j ],
      dtype=complex64)

In [None]:
io.xxx2hdf5_xxx(
    eigenvalues, params, gauge_filename.replace("gauge", "eigenvalues"))
io.xxx2hdf5_xxx(
    eigenvectors, params, gauge_filename.replace("gauge", "eigenvectors"))

In [10]:
eigenvalues=io.hdf5_xxx2xxx(file_name=gauge_filename.replace("gauge", "eigenvalues"))
eigenvectors=io.hdf5_xxx2xxx(file_name=gauge_filename.replace("gauge", "eigenvectors"))

Dest Shape: (24,)
Dest Shape: (150994944,)


In [12]:
eigenvalues

array([-8.43454181e-05, -7.73657302e-05, -8.43242015e-05, -8.33449230e-05,
       -7.76277448e-05, -3.34389188e-05, -8.27592012e-05, -6.90734159e-05,
       -5.00938986e-05, -3.59291153e-06, -7.95493688e-05, -3.78615659e-05,
       -1.43339275e-04, -1.03388760e-04, -8.32715596e-05, -7.68202153e-06,
       -7.76611996e-05, -2.53519684e-05, -3.90986534e-05, -4.24507307e-05,
       -1.12990121e-04, -6.47837005e-05, -1.04293125e-04, -1.18923810e-04],
      dtype=float32)

In [13]:
eigenvectors

array([-0.00038408-0.00026665j, -0.00033693-0.00030289j,
       -0.00032133-0.00034195j, ...,  0.00030356-0.00026165j,
        0.00037381-0.00027764j,  0.00039316-0.00026242j], dtype=complex64)

In [14]:
for i, ev in enumerate(eigenvalues):
    print(f"λ_{i} = {ev:.2e}")
    # Verify eigenvector
    v = eigenvectors[i]
    w = cp.zeros_like(v)
    w = matvec(v)
    error = cp.linalg.norm(w - ev * v) / cp.linalg.norm(w)
    print(f"Relative error: {error:.2e}")
    j = i+1
    if j == len(eigenvalues):
        j = 0
    print(
        f"Diff between λ_{i} and λ_{j}: {cp.linalg.norm(eigenvectors[i] - eigenvectors[j])/cp.linalg.norm(eigenvectors[i]):.2e}")

λ_0 = -8.43e-05
Relative error: 1.00e+00
Diff between λ_0 and λ_1: 1.27e-01
λ_1 = -3.88e-05
Relative error: 1.00e+00
Diff between λ_1 and λ_2: 9.28e-02
λ_2 = -7.36e-05
Relative error: 1.00e+00
Diff between λ_2 and λ_3: 5.39e-02
λ_3 = -8.14e-05
Relative error: 1.00e+00
Diff between λ_3 and λ_4: 1.58e-01
λ_4 = -5.82e-05
Relative error: 1.00e+00
Diff between λ_4 and λ_5: 8.16e-02
λ_5 = -1.73e-05
Relative error: 1.00e+00
Diff between λ_5 and λ_6: 8.69e-02
λ_6 = -6.73e-05
Relative error: 1.00e+00
Diff between λ_6 and λ_7: 1.10e-01
λ_7 = -3.88e-05
Relative error: 1.00e+00
Diff between λ_7 and λ_8: 1.20e-01
λ_8 = -4.60e-05
Relative error: 1.00e+00
Diff between λ_8 and λ_9: 1.38e-01
λ_9 = -2.83e-06
Relative error: 1.00e+00
Diff between λ_9 and λ_10: 9.93e-02
λ_10 = -1.10e-04
Relative error: 1.01e+00
Diff between λ_10 and λ_11: 2.36e-01
λ_11 = -2.33e-05
Relative error: 1.00e+00
Diff between λ_11 and λ_12: 1.24e-01
λ_12 = -1.13e-04
Relative error: 1.00e+00
Diff between λ_12 and λ_13: 1.20e-01
λ_

In [8]:
# qcu.applyEndQcu(set_ptrs, params)