In [1]:
import torch
from pyqcu.ascend import dslash
from pyqcu.ascend.include import *
# latt_size = (32, 32, 32, 32)
# latt_size = (32, 32, 16, 16)
# latt_size = (16, 16, 16, 32)
# latt_size = (16, 16, 16, 16)
# latt_size = (32, 32, 32, 32)
# latt_size = (32, 32, 32, 64)
# latt_size = (4, 8, 8, 8)
# latt_size = (8, 8, 8, 4)
# latt_size = (16, 8, 8, 8)
# latt_size = (8, 8, 8, 16)
latt_size = (8, 8, 8, 8)
# latt_size = (4, 4, 4, 4)
# latt_size = (2, 2, 2, 2)
# mass = -3.5
# mass = -0.8
# mass = -0.5
mass = 0.05
mass = 0.0
mass = -0.05
# kappa = 0.4
# kappa = 0.125
# kappa = 0.5
kappa = 1 / (2 * mass + 8)
dtype = torch.complex128
# dtype = torch.complex64
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")
# Initialize lattice gauge theory
wilson = dslash.wilson_mg(
    latt_size=latt_size,
    kappa=kappa,
    dtype=dtype,
    device=device,
    verbose=False
)
clover = dslash.clover(
    latt_size=latt_size,
    kappa=kappa,
    dtype=dtype,
    device=device,
    verbose=False
)
U = wilson.generate_gauge_field(sigma=0.1, seed=42)

wilson.check_su3(U)
clover_term = clover.make_clover(U=U)

b = torch.randn(4, 3, latt_size[3], latt_size[2], latt_size[1], latt_size[0],
                dtype=dtype, device=device)
verbose = True


    @@@@@@######QCU NOTES START######@@@@@@@
    Guide:
    0. Required: MPI(e.g. 4.1.2), CUDA(e.g. 12.4), CMAKE(e.g. 3.22.1), GCC(e.g. 11.4.0), HDF5-MPI(e.g. 1.10.7,'apt install libhdf5-mpi-dev && export HDF5_MPI="ON" && pip install --no-binary=h5py h5py').
    1. The libqcu.so was compiled when pyqcu setup in download_path/PyQCU/lib, please add this path to your LD_LIBRARY_PATH.
    2. The QCU(PyQCU) splite grid by x->y->z->t, lattice by x->y->z->t->p->d->c->c or x->y->z->t->c->s(->p) and x->y->z->t->c->s->c->s(->p).
    3. The QUDA(PyQUDA) splite grid by t->z->y->x, lattice by c->c->x->y->z->t->p->d or c->s->x->y->z->t(->p) and c->s->c->s->x->y->z->t(->p).
    4. The QCU input params in numpy array(dtype=np.int32), argv in  numpy array(dtype=np.float32 or float64) array, set_ptrs in numpy array(dtype=np.int64), other in cupy array(dtype=cp.complex64 or complex128).
    5. The smallest lattice size is (wilson:x=4,y=4,z=4,t=4;clover:x=8,y=8,z=8,t=8) that QCU support (when '#define _B

In [2]:
hopping_plus_list = []
hopping_minus_list = []
for ward in range(4):
    hopping_plus_list.append(wilson.give_hopping_plus(ward=ward, U=U))
    hopping_minus_list.append(wilson.give_hopping_minus(ward=ward, U=U))
Ab = wilson.give_wilson(src=b, U=U, with_I=False)
_Ab = torch.zeros_like(b)
for ward in range(4):
    _Ab += wilson.give_wilson_plus(ward=ward, src=b.reshape(
        [12]+list(U.shape[-4:])), hopping=hopping_plus_list[ward]).reshape([4, 3]+list(U.shape[-4:]))
    _Ab += wilson.give_wilson_minus(ward=ward, src=b.reshape(
        [12]+list(U.shape[-4:])), hopping=hopping_minus_list[ward]).reshape([4, 3]+list(U.shape[-4:]))
print(torch.norm(Ab-_Ab).item()/torch.norm(Ab).item())

2.1652886022778008e-16


In [None]:
from pyqcu.ascend import inverse
mg = inverse.mg(b=b, wilson=wilson, U=U,
                clover=clover, clover_term=clover.add_I(clover_term=clover_term), verbose=verbose, max_iter=200,max_levels=2)

self.dof_list:[12, 12, 12, 12, 8, 8, 4, 12, 12, 12, 8, 4, 2, 4, 4, 24, 12, 12, 12, 4, 4, 4, 4, 4]
Building grid list:
  Level 0: 8x8x8x8
  Level 1: 4x4x4x4
  Level 2: 2x2x2x2
self.grid_list:[[8, 8, 8, 8], [4, 4, 4, 4], [2, 2, 2, 2]]
Norm of b:1.12115637175306
Norm of r:1.12115637175306
Norm of x0:0.0
BICGSTAB-Iteration 0: Residual = 1.929314e-01, Time = 0.015756 s
BICGSTAB-Iteration 1: Residual = 6.371150e-02, Time = 0.005033 s
BICGSTAB-Iteration 2: Residual = 1.329530e-01, Time = 0.003667 s
BICGSTAB-Iteration 3: Residual = 2.145065e-02, Time = 0.003185 s
BICGSTAB-Iteration 4: Residual = 1.614341e-02, Time = 0.003623 s
BICGSTAB-Iteration 5: Residual = 9.394413e-03, Time = 0.002780 s
BICGSTAB-Iteration 6: Residual = 6.988029e-03, Time = 0.002738 s
BICGSTAB-Iteration 7: Residual = 1.605667e-02, Time = 0.002826 s
BICGSTAB-Iteration 8: Residual = 6.915419e-03, Time = 0.002919 s
BICGSTAB-Iteration 9: Residual = 6.725968e-03, Time = 0.003600 s
BICGSTAB-Iteration 10: Residual = 3.368002e-03, 

In [4]:
def matvec(src: torch.Tensor, U: torch.Tensor = U, clover_term: torch.Tensor = clover_term) -> torch.Tensor:
    return wilson.give_wilson(src, U)+clover.give_clover(clover_term=clover_term, src=src)


def _matvec(src: torch.Tensor) -> torch.Tensor:
    return mg.op_list[0].matvec(src=src)

In [5]:
%%time
%time Ab = matvec(b)
%time _Ab = _matvec(b)
print(torch.norm(U).item())
print(torch.norm(clover_term).item())
print(torch.norm(Ab).item())
print(torch.norm(_Ab).item())
print(torch.norm(Ab-_Ab).item()/torch.norm(_Ab).item())

CPU times: user 2.28 ms, sys: 302 μs, total: 2.58 ms
Wall time: 2.33 ms
CPU times: user 1.09 ms, sys: 144 μs, total: 1.23 ms
Wall time: 1.18 ms


221.70250336881628
10.62561944948037
248.38478708613687
248.38478708613687
1.7505229586402284e-16
CPU times: user 19.2 ms, sys: 512 μs, total: 19.7 ms
Wall time: 18.2 ms


In [6]:
%time Ab = matvec(b)
%time _Ab = _matvec(b)

CPU times: user 0 ns, sys: 3.36 ms, total: 3.36 ms
Wall time: 2.82 ms
CPU times: user 0 ns, sys: 1.21 ms, total: 1.21 ms
Wall time: 1.08 ms


In [7]:
# b0 = mg.b_list[0]
# b1 = inverse.restrict(
#     local_ortho_null_vecs=mg.lonv_list[0], fine_vec=b0)
# _b0 = inverse.prolong(local_ortho_null_vecs=mg.lonv_list[0], coarse_vec=b1)
# _b1 = inverse.restrict(
#     local_ortho_null_vecs=mg.lonv_list[0], fine_vec=_b0)
# print(_b1.flatten()[:100]/b1.flatten()[:100])

In [8]:
# _x = inverse.cg(b=b, matvec=matvec, verbose=verbose)
_x = inverse.bicgstab(b=b, matvec=_matvec, verbose=verbose)
# _x = inverse.bicgstab(b=b, matvec=mg.op_list[0].matvec, verbose=verbose)

Norm of b:221.3797633073902
Norm of r:333.0332119879319
Norm of x0:221.83559935819497
BICGSTAB-Iteration 0: Residual = 7.209077e+01, Time = 0.003198 s
BICGSTAB-Iteration 1: Residual = 3.155898e+01, Time = 0.006532 s
BICGSTAB-Iteration 2: Residual = 3.079086e+01, Time = 0.004847 s
BICGSTAB-Iteration 3: Residual = 2.113155e+01, Time = 0.004148 s
BICGSTAB-Iteration 4: Residual = 1.305621e+01, Time = 0.004034 s
BICGSTAB-Iteration 5: Residual = 1.361682e+01, Time = 0.004713 s
BICGSTAB-Iteration 6: Residual = 8.146999e+00, Time = 0.003348 s
BICGSTAB-Iteration 7: Residual = 1.951110e+01, Time = 0.003041 s
BICGSTAB-Iteration 8: Residual = 5.464250e+00, Time = 0.003139 s
BICGSTAB-Iteration 9: Residual = 5.198128e+00, Time = 0.003122 s
BICGSTAB-Iteration 10: Residual = 4.383360e+00, Time = 0.003355 s
BICGSTAB-Iteration 11: Residual = 4.398059e+00, Time = 0.003212 s
BICGSTAB-Iteration 12: Residual = 7.948375e+00, Time = 0.003275 s
BICGSTAB-Iteration 13: Residual = 8.262339e+00, Time = 0.003229 s


In [None]:
x = mg.solve()
mg.plot()

Norm of b:221.3797633073902
Norm of r:334.37554553826436
Norm of x0:222.05283536782224
restrict:shape,coarse_dof:(torch.Size([12, 12, 4, 2, 4, 2, 4, 2, 4, 2]), 12)
EeTtZzYyXx,eTtZzYyXx->ETZYX
Norm of b:31.421349535746064
Norm of r:31.421349535746064
Norm of x0:0.0
restrict:shape,coarse_dof:(torch.Size([12, 12, 2, 2, 2, 2, 2, 2, 2, 2]), 12)
EeTtZzYyXx,eTtZzYyXx->ETZYX
Norm of b:9.57503342110517
Norm of r:9.57503342110517
Norm of x0:0.0
MG-2-BICGSTAB-Iteration 0: Residual = 7.292981e+01, Time = 0.004500 s
MG-2-BICGSTAB-Iteration 1: Residual = 1.227724e+01, Time = 0.004246 s
MG-2-BICGSTAB-Iteration 2: Residual = 3.134962e+01, Time = 0.004301 s
MG-2-BICGSTAB-Iteration 3: Residual = 1.186968e+01, Time = 0.004380 s
MG-2-BICGSTAB-Iteration 4: Residual = 9.607066e+00, Time = 0.003042 s
MG-2-BICGSTAB-Iteration 5: Residual = 6.092040e+01, Time = 0.002003 s
MG-2-BICGSTAB-Iteration 6: Residual = 4.089392e+00, Time = 0.002000 s
MG-2-BICGSTAB-Iteration 7: Residual = 1.789031e+01, Time = 0.002037 s
M

In [None]:
(mg.b_list[0]-mg.op_list[0].matvec(x.reshape([12]+list(x.shape[-4:])))).flatten()[:100]

tensor([-6.3882e-05-8.6569e-05j, -3.5880e-05-1.1102e-04j,
        -4.2567e-06-6.9358e-05j, -2.7868e-05-4.5128e-05j,
        -6.8099e-05-9.7133e-05j, -8.7099e-06-4.2026e-05j,
        -5.8169e-05-3.4580e-05j, -7.6738e-06-6.9094e-05j,
         1.0885e-05-1.0660e-04j,  8.7736e-06-1.0538e-04j,
        -3.9891e-05-8.5230e-05j, -3.3898e-05-8.7374e-05j,
         3.1298e-05-9.5241e-05j, -1.7502e-05-8.8250e-05j,
        -1.3153e-05-4.8138e-05j, -3.1346e-05-4.4187e-05j,
         7.0443e-06-1.4172e-04j,  6.2169e-06-1.6170e-04j,
         4.0331e-06-1.0166e-04j,  7.4467e-05-1.0721e-04j,
        -2.5926e-05-4.9286e-05j, -6.3130e-06-5.7179e-05j,
        -4.3078e-05-8.8690e-05j,  2.4698e-05-8.6217e-05j,
        -7.0389e-06-1.4694e-04j, -4.5504e-06-1.2572e-04j,
        -4.3045e-05-1.6463e-04j, -1.5958e-05-6.9900e-05j,
        -1.6466e-05-1.3614e-04j, -1.5733e-05-1.1964e-04j,
         2.4516e-05-1.3010e-04j,  4.8226e-06-1.0214e-04j,
         8.4446e-05-1.0443e-04j,  1.1894e-05-1.0864e-04j,
        -9.331

In [None]:
print(torch.norm(x-_x).item()/torch.norm(_x).item())

0.017860814859554368


In [None]:
mg.num_levels

2

In [None]:
index = -1
mg.u_list[-1] = inverse.bicgstab(b=torch.ones_like(mg.b_list[-1]),
                                 matvec=mg.op_list[-1].matvec, verbose=verbose,max_iter=10000)

Norm of b:55.42562584220407
Norm of r:166.65695203238522
Norm of x0:55.594670663712115
BICGSTAB-Iteration 0: Residual = 2.060376e+02, Time = 0.004771 s
BICGSTAB-Iteration 1: Residual = 1.567245e+02, Time = 0.004627 s
BICGSTAB-Iteration 2: Residual = 8.830880e+01, Time = 0.002182 s
BICGSTAB-Iteration 3: Residual = 8.771006e+01, Time = 0.002644 s
BICGSTAB-Iteration 4: Residual = 3.908746e+01, Time = 0.002076 s
BICGSTAB-Iteration 5: Residual = 3.708626e+01, Time = 0.002745 s
BICGSTAB-Iteration 6: Residual = 8.358217e+01, Time = 0.004183 s
BICGSTAB-Iteration 7: Residual = 3.086688e+01, Time = 0.004426 s
BICGSTAB-Iteration 8: Residual = 1.700525e+02, Time = 0.003405 s
BICGSTAB-Iteration 9: Residual = 2.762352e+01, Time = 0.004660 s
BICGSTAB-Iteration 10: Residual = 4.056666e+03, Time = 0.004300 s
BICGSTAB-Iteration 11: Residual = 2.461206e+01, Time = 0.004352 s
BICGSTAB-Iteration 12: Residual = 6.506208e+01, Time = 0.002043 s
BICGSTAB-Iteration 13: Residual = 1.453077e+01, Time = 0.001969 s