In [1]:
import torch
from pyqcu.ascend import dslash
from pyqcu.ascend.define import *
# latt_size = (32, 32, 32, 32)
# latt_size = (32, 32, 16, 16)
# latt_size = (16, 16, 16, 32)
# latt_size = (16, 16, 16, 16)
latt_size = (16, 16, 8, 8)
# latt_size = (8, 16, 16, 32)
# latt_size = (32, 32, 32, 32)
# latt_size = (32, 32, 32, 64)
# latt_size = (4, 8, 8, 8)
# latt_size = (8, 8, 8, 4)
# latt_size = (16, 8, 8, 8)
# latt_size = (8, 8, 8, 16)
# latt_size = (8, 8, 8, 8)
# latt_size = (4, 4, 4, 4)
# latt_size = (2, 2, 2, 2)
# mass = -3.5
# mass = -0.8
# mass = -0.5
# mass = 0.05
# mass = 0.0
mass = -0.05
# kappa = 0.4
# kappa = 0.125
# kappa = 0.5
kappa = 1 / (2 * mass + 8)
dtype = torch.complex128
# dtype = torch.complex64
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")
# Initialize lattice gauge theory
wilson = dslash.wilson_mg(
    latt_size=latt_size,
    kappa=kappa,
    dtype=dtype,
    device=device,
    verbose=False
)
clover = dslash.clover(
    latt_size=latt_size,
    kappa=kappa,
    dtype=dtype,
    device=device,
    verbose=False
)
U = wilson.generate_gauge_field(sigma=0.1, seed=42)

wilson.check_su3(U)
clover_term = clover.make_clover(U=U)
# clover_term = torch.zeros_like(clover_term) # just for test, just wilson

b = torch.randn(4, 3, latt_size[3], latt_size[2], latt_size[1], latt_size[0],
                dtype=dtype, device=device)
verbose = True
tol = 1e-6

Using device: cuda


In [2]:
hopping_plus_list = []
hopping_minus_list = []
for ward in range(4):
    hopping_plus_list.append(wilson.give_hopping_plus(ward=ward, U=U))
    hopping_minus_list.append(wilson.give_hopping_minus(ward=ward, U=U))
Ab = wilson.give_wilson(src=b, U=U, with_I=False)
_Ab = torch.zeros_like(b)
for ward in range(4):
    _Ab += wilson.give_wilson_plus(ward=ward, src=b.reshape(
        [12]+list(U.shape[-4:])), hopping=hopping_plus_list[ward]).reshape([4, 3]+list(U.shape[-4:]))
    _Ab += wilson.give_wilson_minus(ward=ward, src=b.reshape(
        [12]+list(U.shape[-4:])), hopping=hopping_minus_list[ward]).reshape([4, 3]+list(U.shape[-4:]))
print(torch.norm(Ab-_Ab).item()/torch.norm(Ab).item())

2.172392291371309e-16


In [3]:
from pyqcu.ascend import inverse
mg = inverse.mg(b=b, wilson=wilson, U=U,
                clover=clover, clover_term=clover_term, tol=tol, verbose=verbose)
mg.init()

self.dof_list:[12, 24, 24, 24, 24, 4, 4, 24, 12, 12, 12, 24, 24, 24, 24, 48, 48, 24, 8, 8, 8, 4, 12, 12, 12, 8, 4, 2, 4, 4, 24, 12, 12, 12, 4, 4, 4, 4, 4]
Building grid list:
  Level 0: 16x16x8x8
  Level 1: 8x8x4x4
  Level 2: 4x4x2x2
self.grid_list:[[8, 8, 16, 16], [4, 4, 8, 8], [2, 2, 4, 4]]
Norm of b:497.2407917253912
Norm of r:702.6616497734033
Norm of x0:442.665398691082
BICGSTAB-Iteration 0: Residual = 1.209971e+02, Time = 0.013475 s
BICGSTAB-Iteration 1: Residual = 3.998150e+01, Time = 0.012782 s
BICGSTAB-Iteration 2: Residual = 7.410494e+01, Time = 0.012733 s
BICGSTAB-Iteration 3: Residual = 1.342343e+01, Time = 0.012458 s
BICGSTAB-Iteration 4: Residual = 1.179542e+01, Time = 0.012395 s
BICGSTAB-Iteration 5: Residual = 5.891327e+00, Time = 0.012484 s
BICGSTAB-Iteration 6: Residual = 4.151641e+00, Time = 0.012537 s
BICGSTAB-Iteration 7: Residual = 8.868268e+00, Time = 0.012895 s
BICGSTAB-Iteration 8: Residual = 4.632159e+00, Time = 0.012459 s
BICGSTAB-Iteration 9: Residual = 3.71

In [4]:
def matvec(src: torch.Tensor, U: torch.Tensor = U, clover_term: torch.Tensor = clover_term) -> torch.Tensor:
    return wilson.give_wilson(src, U)+clover.give_clover(clover_term=clover_term, src=src)


def _matvec(src: torch.Tensor) -> torch.Tensor:
    return mg.op_list[0].matvec(src=src)

In [5]:
%%time
%time Ab = matvec(b)
%time _Ab = _matvec(b)
print(torch.norm(U).item())
print(torch.norm(clover_term).item())
print(torch.norm(Ab).item())
print(torch.norm(_Ab).item())
print(torch.norm(Ab-_Ab).item()/torch.norm(_Ab).item())

CPU times: user 2.45 ms, sys: 134 μs, total: 2.59 ms
Wall time: 2.34 ms
CPU times: user 1.08 ms, sys: 59 μs, total: 1.14 ms
Wall time: 1.04 ms
443.40500673763256
21.33381650433473
497.493563254175
497.493563254175
1.739681673800062e-16
CPU times: user 64.1 ms, sys: 226 μs, total: 64.4 ms
Wall time: 63.2 ms


In [6]:
%time Ab = matvec(b)
%time _Ab = _matvec(b)

CPU times: user 3.23 ms, sys: 177 μs, total: 3.41 ms
Wall time: 2.82 ms
CPU times: user 1.37 ms, sys: 75 μs, total: 1.44 ms
Wall time: 1.37 ms


In [7]:
b0 = mg.b_list[0]
b1 = inverse.restrict(
    local_ortho_null_vecs=mg.lonv_list[0], fine_vec=b0)
_b0 = inverse.prolong(local_ortho_null_vecs=mg.lonv_list[0], coarse_vec=b1)
_b1 = inverse.restrict(
    local_ortho_null_vecs=mg.lonv_list[0], fine_vec=_b0)
print(_b1.flatten()[:100]/b1.flatten()[:100])

restrict:shape,coarse_dof:(torch.Size([24, 12, 4, 2, 4, 2, 8, 2, 8, 2]), 24)
EeTtZzYyXx,eTtZzYyXx->ETZYX
prolong:shape,fine_dof:(torch.Size([24, 12, 4, 2, 4, 2, 8, 2, 8, 2]), 12)
EeTtZzYyXx,ETZYX->eTtZzYyXx
restrict:shape,coarse_dof:(torch.Size([24, 12, 4, 2, 4, 2, 8, 2, 8, 2]), 24)
EeTtZzYyXx,eTtZzYyXx->ETZYX
tensor([1.0000-1.8371e-16j, 1.0000+7.5208e-16j, 1.0000-4.6671e-16j,
        1.0000+2.5687e-16j, 1.0000+2.3789e-16j, 1.0000-4.9437e-17j,
        1.0000+2.7901e-16j, 1.0000+2.5448e-16j, 1.0000-3.6578e-16j,
        1.0000-4.7636e-17j, 1.0000+1.0201e-17j, 1.0000+3.1485e-16j,
        1.0000-1.3197e-16j, 1.0000-8.3309e-17j, 1.0000+4.6016e-18j,
        1.0000+3.6738e-17j, 1.0000+2.5476e-18j, 1.0000+8.3125e-17j,
        1.0000+4.6411e-15j, 1.0000+5.1056e-17j, 1.0000-1.3107e-16j,
        1.0000-2.9002e-16j, 1.0000-4.7693e-17j, 1.0000-3.8142e-16j,
        1.0000+2.9224e-17j, 1.0000+3.4551e-16j, 1.0000-6.5806e-17j,
        1.0000-1.1147e-16j, 1.0000+4.7913e-16j, 1.0000+1.4819e-16j,
        

In [23]:
# _x = inverse.cg(b=b, matvec=matvec, tol=tol, verbose=verbose)
_x = inverse.bicgstab(b=b, matvec=_matvec, tol=tol, verbose=verbose)
# _x = inverse.bicgstab(b=b, matvec=mg.op_list[0].matvec, tol=tol, verbose=verbose)

Norm of b:442.82798787465305
Norm of r:666.571846911041
Norm of x0:443.74610024546007
BICGSTAB-Iteration 0: Residual = 1.438821e+02, Time = 0.013314 s
BICGSTAB-Iteration 1: Residual = 6.460841e+01, Time = 0.013444 s
BICGSTAB-Iteration 2: Residual = 6.174098e+01, Time = 0.012596 s
BICGSTAB-Iteration 3: Residual = 4.266300e+01, Time = 0.013760 s
BICGSTAB-Iteration 4: Residual = 2.673146e+01, Time = 0.012853 s
BICGSTAB-Iteration 5: Residual = 2.324308e+01, Time = 0.012411 s
BICGSTAB-Iteration 6: Residual = 2.434831e+01, Time = 0.012359 s
BICGSTAB-Iteration 7: Residual = 2.010660e+01, Time = 0.012563 s
BICGSTAB-Iteration 8: Residual = 1.612315e+01, Time = 0.012651 s
BICGSTAB-Iteration 9: Residual = 1.038880e+01, Time = 0.012448 s
BICGSTAB-Iteration 10: Residual = 7.211959e+00, Time = 0.012317 s
BICGSTAB-Iteration 11: Residual = 6.965224e+00, Time = 0.012792 s
BICGSTAB-Iteration 12: Residual = 6.202597e+00, Time = 0.012813 s
BICGSTAB-Iteration 13: Residual = 5.704536e+00, Time = 0.012457 s


In [22]:
mg.num_levels = 2
x = mg.solve()
mg.plot()

MG-0:Norm of b:442.82798787465305
MG-0:Norm of r:442.82798787465305
MG-0:Norm of x0:0.0
B-MG-0-BICGSTAB-Iteration 0: Residual = 1.137885e+02
restrict:shape,coarse_dof:(torch.Size([24, 12, 4, 2, 4, 2, 8, 2, 8, 2]), 24)
EeTtZzYyXx,eTtZzYyXx->ETZYX
MG-1:Norm of b:66.07399744584725
MG-1:Norm of r:66.07399744584725
MG-1:Norm of x0:0.0
B-MG-1-BICGSTAB-Iteration 0: Residual = 2.555943e+01
F-MG-1-BICGSTAB-Iteration 0: Residual = 2.555943e+01, Time = 0.006658 s
B-MG-1-BICGSTAB-Iteration 1: Residual = 1.399430e+01
F-MG-1-BICGSTAB-Iteration 1: Residual = 1.399430e+01, Time = 0.007098 s
B-MG-1-BICGSTAB-Iteration 2: Residual = 2.259892e+01
F-MG-1-BICGSTAB-Iteration 2: Residual = 2.259892e+01, Time = 0.006675 s
B-MG-1-BICGSTAB-Iteration 3: Residual = 1.088963e+01
F-MG-1-BICGSTAB-Iteration 3: Residual = 1.088963e+01, Time = 0.007382 s
B-MG-1-BICGSTAB-Iteration 4: Residual = 8.647826e+00
F-MG-1-BICGSTAB-Iteration 4: Residual = 8.647826e+00, Time = 0.006654 s
B-MG-1-BICGSTAB-Iteration 5: Residual = 6.7

In [10]:
mg.lonv_list[0].shape

torch.Size([24, 12, 4, 2, 4, 2, 8, 2, 8, 2])

In [11]:
(mg.b_list[0]-mg.op_list[0].matvec(x.reshape([12]+list(x.shape[-4:])))).flatten()[:100]

tensor([ 9.9657e-10+2.8013e-10j,  9.9771e-10+2.3946e-10j,
         2.0577e-09+1.9556e-09j, -5.0749e-10+8.2253e-10j,
         1.4770e-09+1.1545e-09j, -8.4456e-10+7.5069e-11j,
        -3.0523e-10-9.4027e-10j, -1.2779e-09+3.5768e-10j,
        -6.3425e-10+1.4493e-09j, -1.2140e-09+7.2135e-10j,
         1.5105e-10+1.5525e-09j, -1.4937e-09+1.4642e-09j,
        -1.4048e-09+1.1646e-09j, -1.7218e-09+8.0959e-10j,
        -1.2647e-09+1.2248e-09j,  1.6409e-10+8.5415e-10j,
         8.8484e-10+2.3965e-10j,  2.5331e-10+1.7208e-09j,
         2.5264e-11+5.2928e-10j, -1.0779e-10+6.3010e-10j,
        -5.6109e-10+3.8707e-10j,  7.7213e-10+1.2246e-09j,
        -8.5518e-10-1.3061e-09j, -2.0364e-10+3.5057e-10j,
        -1.0548e-09+8.2596e-10j,  7.2645e-10+1.3622e-09j,
        -6.3591e-11+2.7686e-09j,  3.2417e-10+9.4679e-10j,
        -1.0065e-09+1.2693e-09j, -1.0214e-10-7.1026e-10j,
        -8.0306e-10+2.3862e-10j,  2.1875e-10+5.1494e-10j,
         1.1188e-09+4.1783e-10j,  8.4400e-10+5.6871e-10j,
        -4.939

In [12]:
print(torch.norm(x-_x).item()/torch.norm(_x).item())

5.837216543333661e-08


In [13]:
mg.num_levels

2

In [14]:
index = 0
null_vecs = mg.nv_list[index]
matvec = mg.op_list[index].matvec
for i in range(null_vecs.shape[0]):
    print(
        f"(matvec(null_vecs[i])/null_vecs[i]).flatten()[:10]:{(matvec(null_vecs[i])/null_vecs[i]).flatten()[:10]}")

(matvec(null_vecs[i])/null_vecs[i]).flatten()[:10]:tensor([0.0068+0.0004j, 0.0070+0.0006j, 0.0072+0.0004j, 0.0076+0.0002j,
        0.0078+0.0004j, 0.0075+0.0002j, 0.0072+0.0007j, 0.0072+0.0004j,
        0.0072+0.0005j, 0.0071+0.0007j], device='cuda:0',
       dtype=torch.complex128)
(matvec(null_vecs[i])/null_vecs[i]).flatten()[:10]:tensor([0.0112-0.0005j, 0.0126+0.0003j, 0.0129-0.0010j, 0.0153+0.0009j,
        0.0145-0.0031j, 0.0117-0.0023j, 0.0098-0.0022j, 0.0103-0.0016j,
        0.0071-0.0005j, 0.0065-0.0006j], device='cuda:0',
       dtype=torch.complex128)
(matvec(null_vecs[i])/null_vecs[i]).flatten()[:10]:tensor([ 9.6876e-03+0.0022j,  6.0471e-03+0.0071j,  3.8554e-03+0.0058j,
         8.0699e-03+0.0190j,  7.1146e-03+0.0131j, -1.3352e-02+0.0114j,
        -3.7976e-02+0.0017j,  8.8516e-05+0.0271j, -9.7693e-02-0.1213j,
         2.5410e-01-0.1453j], device='cuda:0', dtype=torch.complex128)
(matvec(null_vecs[i])/null_vecs[i]).flatten()[:10]:tensor([ 0.0103-0.0013j,  0.0058-0.0030j,  0.0

In [15]:
mg.nv_list[-1].flatten()[:100]

tensor([-0.0110-0.0032j, -0.0121-0.0037j, -0.0121-0.0034j, -0.0117-0.0035j,
        -0.0123-0.0039j, -0.0131-0.0027j, -0.0128-0.0028j, -0.0112-0.0025j,
        -0.0104-0.0036j, -0.0113-0.0039j, -0.0111-0.0034j, -0.0112-0.0042j,
        -0.0110-0.0050j, -0.0122-0.0039j, -0.0117-0.0026j, -0.0111-0.0032j,
        -0.0112-0.0018j, -0.0114-0.0027j, -0.0112-0.0026j, -0.0106-0.0022j,
        -0.0105-0.0038j, -0.0115-0.0042j, -0.0118-0.0029j, -0.0124-0.0013j,
        -0.0119-0.0006j, -0.0101-0.0020j, -0.0105-0.0029j, -0.0110-0.0017j,
        -0.0105-0.0027j, -0.0112-0.0030j, -0.0114-0.0018j, -0.0108-0.0020j,
        -0.0107-0.0011j, -0.0120-0.0014j, -0.0110-0.0032j, -0.0120-0.0036j,
        -0.0115-0.0030j, -0.0105-0.0025j, -0.0113-0.0025j, -0.0108-0.0020j,
        -0.0122-0.0016j, -0.0124-0.0014j, -0.0114-0.0033j, -0.0122-0.0036j,
        -0.0117-0.0039j, -0.0113-0.0028j, -0.0117-0.0017j, -0.0118-0.0017j,
        -0.0131-0.0025j, -0.0137-0.0022j, -0.0125-0.0029j, -0.0117-0.0043j,
        -0.0

In [16]:
_null_vec = torch.randn_like(null_vecs[0])
print(torch.norm(matvec(_null_vec)))
print(matvec(_null_vec).flatten()[:20])
_null_vec -= inverse.bicgstab(b=matvec(_null_vec), matvec=matvec, tol=5e-5)
print(torch.norm(_null_vec))
print(_null_vec.flatten()[:20])
_null_vec/=torch.norm(_null_vec).item()
print(
    f"(matvec(_null_vec)/_null_vec).flatten()[:10]:{(matvec(_null_vec)/_null_vec).flatten()[:10]}")

tensor(496.0772, device='cuda:0', dtype=torch.float64)
tensor([-0.9637+0.1624j,  1.0497-1.0570j,  0.0871+1.7358j,  0.5495-1.1345j,
        -0.0685+0.6246j, -0.6139-2.5904j, -0.7475+0.5982j, -0.0673+0.1713j,
        -0.1051-0.0893j,  0.1405+0.5951j, -0.7144-0.2735j,  0.5137-0.0632j,
         0.9069+0.3658j, -0.5006+1.0500j, -0.1709-0.7850j, -0.1347+0.4834j,
         1.7745-0.0144j,  0.8418-0.7627j, -0.1055+0.3573j, -0.5661+1.0749j],
       device='cuda:0', dtype=torch.complex128)
Norm of b:496.0772284172597
Norm of r:701.090681300054
Norm of x0:442.8708318604005
BICGSTAB-Iteration 0: Residual = 1.208747e+02, Time = 0.012877 s
BICGSTAB-Iteration 1: Residual = 3.980901e+01, Time = 0.013194 s
BICGSTAB-Iteration 2: Residual = 7.653912e+01, Time = 0.012839 s
BICGSTAB-Iteration 3: Residual = 1.336006e+01, Time = 0.013096 s
BICGSTAB-Iteration 4: Residual = 1.042262e+01, Time = 0.013488 s
BICGSTAB-Iteration 5: Residual = 5.677389e+00, Time = 0.012980 s
BICGSTAB-Iteration 6: Residual = 4.157304e

In [17]:
_b = torch.randn_like(null_vecs[0])
_x = inverse.bicgstab(b=_b, matvec=matvec, tol=1e-8)
print((_b-matvec(_x)).flatten()[:100])
print(torch.norm(_b))
print(torch.norm(_x))

Norm of b:443.7193747128637
Norm of r:667.4432101594302
Norm of x0:443.6490785219461
BICGSTAB-Iteration 0: Residual = 1.443843e+02, Time = 0.014116 s
BICGSTAB-Iteration 1: Residual = 6.986188e+01, Time = 0.013254 s
BICGSTAB-Iteration 2: Residual = 6.174083e+01, Time = 0.013236 s
BICGSTAB-Iteration 3: Residual = 4.290146e+01, Time = 0.012827 s
BICGSTAB-Iteration 4: Residual = 2.713945e+01, Time = 0.013220 s
BICGSTAB-Iteration 5: Residual = 2.088906e+01, Time = 0.013293 s
BICGSTAB-Iteration 6: Residual = 1.737192e+01, Time = 0.012977 s
BICGSTAB-Iteration 7: Residual = 3.546751e+01, Time = 0.012890 s
BICGSTAB-Iteration 8: Residual = 1.101402e+01, Time = 0.012985 s
BICGSTAB-Iteration 9: Residual = 8.841156e+00, Time = 0.013412 s
BICGSTAB-Iteration 10: Residual = 8.683400e+00, Time = 0.013192 s
BICGSTAB-Iteration 11: Residual = 9.334867e+00, Time = 0.013065 s
BICGSTAB-Iteration 12: Residual = 5.844311e+00, Time = 0.012766 s
BICGSTAB-Iteration 13: Residual = 5.274800e+00, Time = 0.012829 s
B

In [18]:
_null_vec.flatten()[:100]

tensor([-0.0003-6.9729e-05j, -0.0006+1.4785e-04j, -0.0005+3.3954e-04j,
        -0.0003+4.2791e-04j, -0.0004+4.8406e-04j, -0.0005+2.9796e-04j,
        -0.0006+4.3477e-04j, -0.0003+5.9396e-04j, -0.0004+2.7453e-04j,
        -0.0004+4.1049e-04j, -0.0002+3.8888e-04j, -0.0005+1.4199e-04j,
        -0.0002+2.6116e-04j, -0.0004+4.1884e-04j, -0.0001+3.4321e-04j,
        -0.0003+3.7868e-04j, -0.0004-1.9007e-04j, -0.0006+2.4059e-04j,
        -0.0007+1.1751e-05j, -0.0003+2.9892e-04j, -0.0006+3.9048e-04j,
        -0.0005+2.2113e-04j, -0.0004+2.1093e-04j, -0.0005+3.6058e-04j,
        -0.0005+4.1892e-04j, -0.0006+2.5627e-04j, -0.0002+1.4222e-04j,
        -0.0004+2.6776e-04j, -0.0005+3.2364e-04j, -0.0005+3.2462e-04j,
        -0.0006+4.2986e-06j, -0.0007-1.3664e-04j, -0.0006+6.0036e-05j,
        -0.0003-4.5947e-05j, -0.0004-1.4776e-05j, -0.0005-2.3810e-04j,
        -0.0006+8.5368e-05j, -0.0007-7.1898e-05j, -0.0005-1.3842e-04j,
        -0.0007+7.0448e-05j, -0.0007+1.4166e-04j, -0.0006+1.0611e-04j,
      