In [1]:
import torch
from pyqcu.ascend import dslash
from pyqcu.ascend.include import *
# latt_size = (16, 16, 16, 32)
# latt_size = (16, 16, 16, 16)
# latt_size = (32, 32, 32, 32)
# latt_size = (32, 32, 32, 64)
# latt_size = (4, 8, 8, 8)
# latt_size = (8, 8, 8, 4)
# latt_size = (16, 8, 8, 8)
# latt_size = (8, 8, 16, 16)
latt_size = (8, 8, 8, 8)
# latt_size = (4, 4, 4, 4)
# mass = -3.5
# mass = -0.8
# mass = -0.5
mass = 0.05
mass = 0.0
mass = -0.05
# kappa = 0.4
# kappa = 0.125
# kappa = 0.5
kappa = 1 / (2 * mass + 8)
dtype = torch.complex128
# dtype = torch.complex64
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")
# Initialize lattice gauge theory
wilson = dslash.wilson_parity(
    latt_size=latt_size,
    kappa=kappa,
    dtype=dtype,
    device=device,
    verbose=False
)
clover = dslash.clover_parity(
    latt_size=latt_size,
    kappa=kappa,
    dtype=dtype,
    device=device,
    verbose=False
)
U = wilson.generate_gauge_field(sigma=0.1, seed=42)

wilson.check_su3(U)
clover_term = clover.make_clover(U=U)

b = torch.randn(4, 3, latt_size[3], latt_size[2], latt_size[1], latt_size[0],
                dtype=dtype, device=device)
U_eo = xxxtzyx2pxxxtzyx(input_array=U)
clover_eo = xxxtzyx2pxxxtzyx(input_array=clover.add_I(
    clover=clover_term))  # to make sitting term
verbose = True


    @@@@@@######QCU NOTES START######@@@@@@@
    Guide:
    0. Required: MPI(e.g. 4.1.2), CUDA(e.g. 12.4), CMAKE(e.g. 3.22.1), GCC(e.g. 11.4.0), HDF5-MPI(e.g. 1.10.7,'apt install libhdf5-mpi-dev && export HDF5_MPI="ON" && pip install --no-binary=h5py h5py').
    1. The libqcu.so was compiled when pyqcu setup in download_path/PyQCU/lib, please add this path to your LD_LIBRARY_PATH.
    2. The QCU(PyQCU) splite grid by x->y->z->t, lattice by x->y->z->t->p->d->c->c or x->y->z->t->c->s(->p) and x->y->z->t->c->s->c->s(->p).
    3. The QUDA(PyQUDA) splite grid by t->z->y->x, lattice by c->c->x->y->z->t->p->d or c->s->x->y->z->t(->p) and c->s->c->s->x->y->z->t(->p).
    4. The QCU input params in numpy array(dtype=np.int32), argv in  numpy array(dtype=np.float32 or float64) array, set_ptrs in numpy array(dtype=np.int64), other in cupy array(dtype=cp.complex64 or complex128).
    5. The smallest lattice size is (wilson:x=4,y=4,z=4,t=4;clover:x=8,y=8,z=8,t=8) that QCU support (when '#define _B

In [None]:
from pyqcu.ascend import inverse
mg = inverse.mg(b=b, wilson=wilson, U_eo=U_eo,
                clover=clover, clover_eo=clover_eo, verbose=verbose, max_iter=20, max_restarts=5, max_levels=2)

self.dof_list:[12, 12, 12, 12, 8, 8, 4, 12, 12, 12, 8, 4, 2, 4, 4, 24, 12, 12, 12, 4, 4, 4, 4, 4]
Building grid list:
  Level 0: 8x8x8x8
  Level 1: 4x4x4x4
  Level 2: 2x2x2x2
self.grid_list:[[8, 8, 8, 8], [4, 4, 4, 4], [2, 2, 2, 2]]
Norm of b:1.12115637175306
Norm of r:1.12115637175306
Norm of x0:0.0
BICGSTAB-Iteration 0: Residual = 1.929314e-01, Time = 0.036751 s
BICGSTAB-Iteration 1: Residual = 6.371150e-02, Time = 0.033227 s
BICGSTAB-Iteration 2: Residual = 1.329530e-01, Time = 0.033338 s
BICGSTAB-Iteration 3: Residual = 2.145065e-02, Time = 0.034425 s
BICGSTAB-Iteration 4: Residual = 1.614341e-02, Time = 0.039026 s
BICGSTAB-Iteration 5: Residual = 9.394413e-03, Time = 0.033659 s
BICGSTAB-Iteration 6: Residual = 6.988029e-03, Time = 0.033661 s
BICGSTAB-Iteration 7: Residual = 1.605667e-02, Time = 0.032199 s
BICGSTAB-Iteration 8: Residual = 6.915419e-03, Time = 0.033199 s
BICGSTAB-Iteration 9: Residual = 6.725968e-03, Time = 0.032759 s
BICGSTAB-Iteration 10: Residual = 3.368002e-03, 

In [None]:
%%time

def matvec(src: torch.Tensor, U: torch.Tensor = U, clover_term: torch.Tensor = clover_term) -> torch.Tensor:
    return wilson.give_wilson(src, U)+clover.give_clover(clover=clover_term, src=src)


def _matvec(src: torch.Tensor) -> torch.Tensor:
    return mg.op_list[0].matvec(src=src)

%time Ab = matvec(b)
%time _Ab = _matvec(b)
print(torch.norm(U).item())
print(torch.norm(clover_term).item())
print(torch.norm(Ab).item())
print(torch.norm(_Ab).item())
print(torch.norm(Ab-_Ab).item()/torch.norm(_Ab).item())

CPU times: user 15.7 ms, sys: 0 ns, total: 15.7 ms
Wall time: 15.7 ms
CPU times: user 19.4 ms, sys: 0 ns, total: 19.4 ms
Wall time: 19.4 ms
221.70250336881628
10.62561944948037
248.38478708613687
248.3847870861369
1.5373286404702776e-16
CPU times: user 47.4 ms, sys: 9.62 ms, total: 57 ms
Wall time: 57 ms


In [None]:
# b0 = mg.b_list[0]
# b1 = inverse.restrict(
#     local_ortho_null_vecs=mg.lonv_list[0], fine_vec=b0)
# _b0 = inverse.prolong(local_ortho_null_vecs=mg.lonv_list[0], coarse_vec=b1)
# _b1 = inverse.restrict(
#     local_ortho_null_vecs=mg.lonv_list[0], fine_vec=_b0)
# print(_b1.flatten()[:100]/b1.flatten()[:100])

In [None]:
# _x = inverse.cg(b=b, matvec=matvec, verbose=verbose)
_x = inverse.bicgstab(b=b, matvec=matvec, verbose=verbose)
# _x = inverse.bicgstab(b=b, matvec=mg.op_list[0].matvec, verbose=verbose)

Norm of b:221.3797633073902
Norm of r:334.6221023775494
Norm of x0:222.09112893478067
BICGSTAB-Iteration 0: Residual = 7.178554e+01, Time = 0.028549 s
BICGSTAB-Iteration 1: Residual = 3.155911e+01, Time = 0.026419 s
BICGSTAB-Iteration 2: Residual = 3.141660e+01, Time = 0.026672 s
BICGSTAB-Iteration 3: Residual = 2.030351e+01, Time = 0.027364 s
BICGSTAB-Iteration 4: Residual = 1.264354e+01, Time = 0.029387 s
BICGSTAB-Iteration 5: Residual = 1.587883e+01, Time = 0.028441 s
BICGSTAB-Iteration 6: Residual = 7.264899e+00, Time = 0.028432 s
BICGSTAB-Iteration 7: Residual = 5.471070e+00, Time = 0.028234 s
BICGSTAB-Iteration 8: Residual = 1.345958e+01, Time = 0.027831 s
BICGSTAB-Iteration 9: Residual = 3.986343e+00, Time = 0.027903 s
BICGSTAB-Iteration 10: Residual = 4.821581e+00, Time = 0.027564 s
BICGSTAB-Iteration 11: Residual = 4.500271e+00, Time = 0.028343 s
BICGSTAB-Iteration 12: Residual = 6.530433e+01, Time = 0.028898 s
BICGSTAB-Iteration 13: Residual = 1.249997e+01, Time = 0.028102 s


In [None]:
x = mg.solve()
mg.plot()


MG:Iteration 1:
Norm of b:221.3797633073902
Norm of r:334.37554553826436
Norm of x0:222.05283536782224


IndexError: list index out of range

In [None]:
index = -1
mg.u_list[-1] = inverse.bicgstab(b=torch.ones_like(mg.b_list[-1]),
                                 matvec=mg.op_list[-1].matvec, verbose=verbose,max_iter=10000)

Norm of b:221.70250336881628
Norm of r:333.1680735607465
Norm of x0:221.451346053286
BICGSTAB-Iteration 0: Residual = 2.329600e+02, Time = 0.036290 s
BICGSTAB-Iteration 1: Residual = 1.787446e+03, Time = 0.036590 s
BICGSTAB-Iteration 2: Residual = 1.145856e+03, Time = 0.037481 s
BICGSTAB-Iteration 3: Residual = 7.163388e+02, Time = 0.031953 s
BICGSTAB-Iteration 4: Residual = 3.072688e+02, Time = 0.034425 s
BICGSTAB-Iteration 5: Residual = 1.962980e+02, Time = 0.035388 s
BICGSTAB-Iteration 6: Residual = 1.719798e+02, Time = 0.040762 s
BICGSTAB-Iteration 7: Residual = 1.222064e+02, Time = 0.038263 s
BICGSTAB-Iteration 8: Residual = 1.124191e+02, Time = 0.034015 s
BICGSTAB-Iteration 9: Residual = 1.019038e+02, Time = 0.033211 s
BICGSTAB-Iteration 10: Residual = 8.384537e+01, Time = 0.033292 s
BICGSTAB-Iteration 11: Residual = 6.490974e+01, Time = 0.037013 s
BICGSTAB-Iteration 12: Residual = 7.940376e+01, Time = 0.035929 s
BICGSTAB-Iteration 13: Residual = 6.959360e+01, Time = 0.032928 s
B

In [None]:
print(torch.norm(x-_x).item()/torch.norm(_x).item())

In [None]:
mg.b_list[-1].flatten()[:100]