# Init for pyqcu.

In [None]:
import cupy as cp
import numpy as np
import functools
from pyqcu import define, io, qcu, eigen, cg, bistabcg, amg, linalg, gauge, demo
from time import perf_counter
from opt_einsum import contract
from pyqcu.set import params, argv
params[define._LAT_X_] = 8
params[define._LAT_Y_] = 8
params[define._LAT_Z_] = 8
params[define._LAT_T_] = 8
params[define._LAT_XYZT_] = params[define._LAT_X_] * \
    params[define._LAT_Y_] * params[define._LAT_Z_] * params[define._LAT_T_]
params[define._DATA_TYPE_] = define._LAT_C64_
sigma = 1.0
seed = 12138
params[define._NODE_RANK_] = define.rank
params[define._NODE_SIZE_] = define.size
argv[define._TOL_] = 1e-12
kappa = 1 / (2 * argv[define._MASS_] + 8)
U, src, dest, set_ptrs, wilson_cg_params, wilson_dslash_eo_params, wilson_dslash_oe_params, wilson_dslash_eo_dag_params, wilson_dslash_oe_dag_params = demo.give(
    params=params, sigma=sigma, seed=seed)

# Give CG & BISTABCG Dslash.
> src_o-set_ptr->kappa()**2*dslash_oe(dslash_eo(src_o))

In [None]:
def pdslash_no_dag(src):
    tmp0 = cp.zeros_like(src)
    tmp1 = cp.zeros_like(src)
    qcu.applyWilsonDslashQcu(
        tmp0, src, U, set_ptrs, wilson_dslash_eo_params)
    qcu.applyWilsonDslashQcu(
        tmp1, tmp0, U, set_ptrs, wilson_dslash_oe_params)
    return src-kappa**2*tmp1
def pdslash_dag(src):
    tmp0 = cp.zeros_like(src)
    tmp1 = cp.zeros_like(src)
    qcu.applyWilsonDslashQcu(
        tmp0, src, U, set_ptrs, wilson_dslash_eo_dag_params)
    qcu.applyWilsonDslashQcu(
        tmp1, tmp0, U, set_ptrs, wilson_dslash_oe_dag_params)
    return src-kappa**2*tmp1
def cg_dslash(src):
    return pdslash_dag(pdslash_no_dag(src))
def dslash_no_dag(src):
    dest = cp.zeros_like(src)
    qcu.applyWilsonDslashQcu(
        dest, src, U, set_ptrs, wilson_dslash_eo_params)
    return dest
def dslash_dag(src):
    dest = cp.zeros_like(src)
    qcu.applyWilsonDslashQcu(
        dest, src, U, set_ptrs, wilson_dslash_eo_dag_params)
    return dest
def dslash(src):
    return dslash_no_dag(src)
def bistabcg_dslash(src):
    return pdslash_no_dag(src)
print(cp.linalg.norm((dest[define._EVEN_]-kappa *
                      dslash(dest[define._ODD_]))-src[define._ODD_]))

# Verify $(\gamma_5 D)^\dag = D^\dag {\gamma_5}^\dag = D^\dag \gamma_5  = \gamma_5 D$

In [None]:
gamma5 = cp.array([[1, 0, 0, 0], [0, 1, 0, 0], [
                   0, 0, -1, 0], [0, 0, 0, -1]]).astype(src.dtype)
print(gamma5)
print(gamma5.T)


def gamma5_vec(src):
    return contract("ss,sctzyx->sctzyx", gamma5, io.fermion2sctzyx(src, params))


def vec_gamma5(src):
    return contract("sctzyx,ss->sctzyx", io.fermion2sctzyx(src, params), gamma5)


_src = dest[define._EVEN_]
print(_src.shape)
print(cp.linalg.norm(gamma5_vec(dslash_no_dag(_src))-dslash_dag(gamma5_vec(_src))))
print(cp.linalg.norm(gamma5_vec(pdslash_no_dag(_src))-pdslash_dag(gamma5_vec(_src))))

# Clover

In [None]:
clover_even = cp.zeros((define._LAT_S_, define._LAT_C_, define._LAT_S_, define._LAT_C_,
                       params[define._LAT_T_], params[define._LAT_Z_], params[define._LAT_Y_], int(params[define._LAT_X_]/define._LAT_P_),), dtype=src.dtype)
clover_odd = cp.zeros((define._LAT_S_, define._LAT_C_, define._LAT_S_, define._LAT_C_,
                       params[define._LAT_T_], params[define._LAT_Z_], params[define._LAT_Y_], int(params[define._LAT_X_]/define._LAT_P_),), dtype=src.dtype)
clover_dslash_eo_params = params.copy()
clover_dslash_eo_params[define._SET_INDEX_] = 5
clover_dslash_eo_params[define._SET_PLAN_] = define._SET_PLAN2_
clover_dslash_eo_params[define._PARITY_] = define._EVEN_
clover_dslash_eo_params[define._DAGGER_] = define._NO_USE_
qcu.applyInitQcu(set_ptrs, clover_dslash_eo_params, argv)
clover_dslash_oe_params = params.copy()
clover_dslash_oe_params[define._SET_INDEX_] = 6
clover_dslash_oe_params[define._SET_PLAN_] = define._SET_PLAN2_
clover_dslash_oe_params[define._PARITY_] = define._ODD_
clover_dslash_oe_params[define._DAGGER_] = define._NO_USE_
qcu.applyInitQcu(set_ptrs, clover_dslash_oe_params, argv)

In [None]:
clover_dslash_eo_params

In [None]:
clover_even.shape

In [None]:
clover_even

In [66]:
clover_src = cp.zeros_like(src[define._EVEN_])
clover_src = (linalg.initialize_random_vector(clover_src.flatten())).reshape(clover_src.shape)
clover_dest = cp.zeros_like(clover_src)
_clover_dest = cp.zeros_like(clover_src)
qcu.applyCloverDslashQcu(_clover_dest, clover_src, U, set_ptrs, clover_dslash_eo_params)
qcu.applyCloverQcu(clover_even, U, set_ptrs, clover_dslash_eo_params)
qcu.applyDslashQcu(clover_dest, clover_src, clover_even,
                   U, set_ptrs, clover_dslash_eo_params)

multi-gpu wilson dslash total time: (without malloc free memcpy) :0.000414743 sec
make clover total time: (without malloc free memcpy) :0.005527234 sec
 inverse clover total time: (without malloc free memcpy) :0.000476537 sec
 give clover total time: (without malloc free memcpy) :0.000046449 sec
 make clover total time: (without malloc free memcpy) :0.004128930 sec
 inverse clover total time: (without malloc free memcpy) :0.000242353 sec
 multi-gpu wilson dslash total time: (without malloc free memcpy) :0.000368582 sec
clover:0x904a00000
long long clover:38732300288
give clover total time: (without malloc free memcpy) :0.000155360 sec
 

In [67]:
clover_dest

array([[[[[[-8.72796252e-02-8.52132365e-02j,
             7.74365216e-02-1.58794060e-01j,
             2.20583635e-03-2.14014687e-02j,
             2.48817764e-02+7.43237361e-02j],
           [-3.09630595e-02+5.89200854e-03j,
            -2.66518965e-02+5.25765643e-02j,
             7.93559253e-02+1.58306286e-01j,
            -4.11410257e-02+5.75599819e-03j],
           [-6.03435412e-02-1.25136971e-02j,
             1.33348722e-02+5.37771061e-02j,
             3.26906666e-02+3.39860767e-02j,
            -7.74066597e-02-4.04087827e-02j],
           ...,
           [ 8.27339441e-02-3.26872617e-02j,
             2.49487329e-02+1.33576766e-02j,
             7.82020092e-02+5.64777106e-03j,
            -1.06963444e+00-1.19962883e+00j],
           [ 2.68764105e-02+8.16772729e-02j,
             1.24155574e-01-4.80661392e-02j,
            -2.34788619e-02+3.30555141e-02j,
            -4.50811647e-02+4.90423106e-02j],
           [-5.95556907e-02-4.28164750e-03j,
             1.00843348e-02-1.0729

In [None]:
clover_dest-_clover_dest

array([[[[[[0.+0.j, 0.+0.j, 0.+0.j, 0.+0.j],
           [0.+0.j, 0.+0.j, 0.+0.j, 0.+0.j],
           [0.+0.j, 0.+0.j, 0.+0.j, 0.+0.j],
           ...,
           [0.+0.j, 0.+0.j, 0.+0.j, 0.+0.j],
           [0.+0.j, 0.+0.j, 0.+0.j, 0.+0.j],
           [0.+0.j, 0.+0.j, 0.+0.j, 0.+0.j]],

          [[0.+0.j, 0.+0.j, 0.+0.j, 0.+0.j],
           [0.+0.j, 0.+0.j, 0.+0.j, 0.+0.j],
           [0.+0.j, 0.+0.j, 0.+0.j, 0.+0.j],
           ...,
           [0.+0.j, 0.+0.j, 0.+0.j, 0.+0.j],
           [0.+0.j, 0.+0.j, 0.+0.j, 0.+0.j],
           [0.+0.j, 0.+0.j, 0.+0.j, 0.+0.j]],

          [[0.+0.j, 0.+0.j, 0.+0.j, 0.+0.j],
           [0.+0.j, 0.+0.j, 0.+0.j, 0.+0.j],
           [0.+0.j, 0.+0.j, 0.+0.j, 0.+0.j],
           ...,
           [0.+0.j, 0.+0.j, 0.+0.j, 0.+0.j],
           [0.+0.j, 0.+0.j, 0.+0.j, 0.+0.j],
           [0.+0.j, 0.+0.j, 0.+0.j, 0.+0.j]],

          ...,

          [[0.+0.j, 0.+0.j, 0.+0.j, 0.+0.j],
           [0.+0.j, 0.+0.j, 0.+0.j, 0.+0.j],
           [0.+0.j, 0.+0.j, 0.

# Origin CG. (pass, don't run this)

In [None]:
# b_e = src[define._EVEN_].flatten()
# b_o = src[define._ODD_].flatten()
# b__o = cp.zeros_like(b_o)
# tmp = cp.zeros_like(b_o)
# # b__o=b_o+kappa*D_oe(b_e)
# qcu.applyWilsonDslashQcu(tmp, b_e, U, set_ptrs, wilson_dslash_oe_params)
# b__o = b_o+kappa*tmp
# # b__o -> Dslash^dag b__o
# b__o = pdslash_dag(b__o)
# # Dslash(x_o)=b__o
# x_o = cg.slover(b=b__o, matvec=bistabcg_dslash, tol=1e-10, max_iter=1000000)
# # x_e  =b_e+kappa*D_eo(x_o)
# qcu.applyWilsonDslashQcu(tmp, x_o, U, set_ptrs, wilson_dslash_eo_params)
# x_e = b_e+kappa*tmp
# # give _dest
# _dest = cp.zeros_like(dest)
# _dest[define._EVEN_] = x_e.reshape(
#     dest[define._EVEN_].shape)
# _dest[define._ODD_] = x_o.reshape(
#     dest[define._ODD_].shape)
# print(np.linalg.norm(_dest-dest) /
#       np.linalg.norm(dest))

# End for pyqcu. (pass, don't run this)

In [None]:
# demo.end(set_ptrs=set_ptrs,params=params)