In [1]:
from numpy.polynomial.legendre import leggauss
from numpwd.integrate.angular import ReducedAngularPolynomial, get_x_mesh, get_phi_mesh
from numpwd.integrate.mesh.trns import get_trns_mesh
import numpy as np
import cupy as cp

from sympy import S, sqrt, expand_trig
from pandas import DataFrame, set_option, Series

from numpwd.integrate.numeric import ExpressionMap
from numpwd.qchannels.spin import get_spin_matrix_element, dict_to_data
from numpwd.integrate.analytic import SPHERICAL_BASE_SUBS, ANGLE_BASE_SUBS, integrate
from numpwd.integrate.angular import ReducedAngularPolynomial

set_option("max_colwidth", None)

In [2]:
sig1_k1 = S("sigma11 * l11 + sigma12  * l12 + sigma13 * l13")
sig2_k2 = S("sigma21 * l21 + sigma22  * l22 + sigma23 * l23")

kernel = sig1_k1 * sig2_k2
kernel = kernel.subs(
    {"l11": "+p_i1 - p_o1", "l12": "+p_i2 - p_o2", "l13": "+p_i3 - p_o3 + q/2"}
)
kernel = kernel.subs(
    {"l21": "-p_i1 + p_o1", "l22": "-p_i2 + p_o2", "l23": "-p_i3 + p_o3 + q/2"}
)
kernel
pwd = DataFrame(get_spin_matrix_element(kernel))
pwd.head()

Unnamed: 0,s_o,ms_o,s_i,ms_i,val
0,0,0,0,0,p_i1**2 - 2*p_i1*p_o1 + p_i2**2 - 2*p_i2*p_o2 + p_i3**2 - 2*p_i3*p_o3 + p_o1**2 + p_o2**2 + p_o3**2 - q**2/4
1,0,0,1,-1,sqrt(2)*q*(-p_i1 + I*p_i2 + p_o1 - I*p_o2)/2
2,0,0,1,1,sqrt(2)*q*(-p_i1 - I*p_i2 + p_o1 + I*p_o2)/2
3,1,-1,0,0,sqrt(2)*q*(-p_i1 - I*p_i2 + p_o1 + I*p_o2)/2
4,1,1,0,0,sqrt(2)*q*(-p_i1 + I*p_i2 + p_o1 - I*p_o2)/2


In [3]:
df = pwd.copy()

df["val"] = df.apply(
    lambda el: expand_trig(
        el["val"]
        .subs(SPHERICAL_BASE_SUBS)
        .subs(ANGLE_BASE_SUBS)
        .rewrite(S("exp"))
        .expand()
    ),
    axis=1,
)
df.head()

Unnamed: 0,s_o,ms_o,s_i,ms_i,val
0,0,0,0,0,p_i**2 - 2*p_i*p_o*x_i*x_o - p_i*p_o*sqrt(1 - x_i**2)*sqrt(1 - x_o**2)*exp(I*phi) - p_i*p_o*sqrt(1 - x_i**2)*sqrt(1 - x_o**2)*exp(-I*phi) + p_o**2 - q**2/4
1,0,0,1,-1,-sqrt(2)*p_i*q*sqrt(1 - x_i**2)*exp(-I*Phi)*exp(-I*phi/2)/2 + sqrt(2)*p_o*q*sqrt(1 - x_o**2)*exp(-I*Phi)*exp(I*phi/2)/2
2,0,0,1,1,-sqrt(2)*p_i*q*sqrt(1 - x_i**2)*exp(I*Phi)*exp(I*phi/2)/2 + sqrt(2)*p_o*q*sqrt(1 - x_o**2)*exp(I*Phi)*exp(-I*phi/2)/2
3,1,-1,0,0,-sqrt(2)*p_i*q*sqrt(1 - x_i**2)*exp(I*Phi)*exp(I*phi/2)/2 + sqrt(2)*p_o*q*sqrt(1 - x_o**2)*exp(I*Phi)*exp(-I*phi/2)/2
4,1,1,0,0,-sqrt(2)*p_i*q*sqrt(1 - x_i**2)*exp(-I*Phi)*exp(-I*phi/2)/2 + sqrt(2)*p_o*q*sqrt(1 - x_o**2)*exp(-I*Phi)*exp(I*phi/2)/2


In [4]:
def integrate_out_big_phi(expr):
    res = {}
    for mla in range(-2, 3):
        res[mla] = integrate(expr * S(f"exp(-I*{mla}*Phi)"))

    return Series(res)

In [5]:
tf = (
    df.set_index(["s_o", "ms_o", "s_i", "ms_i"])
    .val.apply(integrate_out_big_phi)
    .stack()
)
tf.index.names = ["s_o", "ms_o", "s_i", "ms_i", "mla"]
tf = (
    tf[tf != 0]
    .reset_index()
    .rename(columns={0: "val"})
    .set_index(["s_o", "ms_o", "s_i", "ms_i", "mla"])
    .sort_index()
)
tf

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,val
s_o,ms_o,s_i,ms_i,mla,Unnamed: 5_level_1
0,0,0,0,0,2*pi*p_i**2 - 4*pi*p_i*p_o*x_i*x_o - 2*pi*p_i*p_o*sqrt(1 - x_i**2)*sqrt(1 - x_o**2)*exp(I*phi) - 2*pi*p_i*p_o*sqrt(1 - x_i**2)*sqrt(1 - x_o**2)*exp(-I*phi) + 2*pi*p_o**2 - pi*q**2/2
0,0,1,-1,-1,-sqrt(2)*pi*p_i*q*sqrt(1 - x_i**2)*exp(-I*phi/2) + sqrt(2)*pi*p_o*q*sqrt(1 - x_o**2)*exp(I*phi/2)
0,0,1,1,1,-sqrt(2)*pi*p_i*q*sqrt(1 - x_i**2)*exp(I*phi/2) + sqrt(2)*pi*p_o*q*sqrt(1 - x_o**2)*exp(-I*phi/2)
1,-1,0,0,1,-sqrt(2)*pi*p_i*q*sqrt(1 - x_i**2)*exp(I*phi/2) + sqrt(2)*pi*p_o*q*sqrt(1 - x_o**2)*exp(-I*phi/2)
1,-1,1,-1,0,-2*pi*p_i**2*x_i**2 + 4*pi*p_i*p_o*x_i*x_o - 2*pi*p_o**2*x_o**2 + pi*q**2/2
1,-1,1,0,1,2*sqrt(2)*pi*p_i**2*x_i*sqrt(1 - x_i**2)*exp(I*phi/2) - 2*sqrt(2)*pi*p_i*p_o*x_i*sqrt(1 - x_o**2)*exp(-I*phi/2) - 2*sqrt(2)*pi*p_i*p_o*x_o*sqrt(1 - x_i**2)*exp(I*phi/2) + 2*sqrt(2)*pi*p_o**2*x_o*sqrt(1 - x_o**2)*exp(-I*phi/2)
1,-1,1,1,2,2*pi*p_i**2*x_i**2*exp(I*phi) - 2*pi*p_i**2*exp(I*phi) + 4*pi*p_i*p_o*sqrt(1 - x_i**2)*sqrt(1 - x_o**2) + 2*pi*p_o**2*x_o**2*exp(-I*phi) - 2*pi*p_o**2*exp(-I*phi)
1,0,1,-1,-1,2*sqrt(2)*pi*p_i**2*x_i*sqrt(1 - x_i**2)*exp(-I*phi/2) - 2*sqrt(2)*pi*p_i*p_o*x_i*sqrt(1 - x_o**2)*exp(I*phi/2) - 2*sqrt(2)*pi*p_i*p_o*x_o*sqrt(1 - x_i**2)*exp(-I*phi/2) + 2*sqrt(2)*pi*p_o**2*x_o*sqrt(1 - x_o**2)*exp(I*phi/2)
1,0,1,0,0,4*pi*p_i**2*x_i**2 - 2*pi*p_i**2 - 4*pi*p_i*p_o*x_i*x_o + 2*pi*p_i*p_o*sqrt(1 - x_i**2)*sqrt(1 - x_o**2)*exp(I*phi) + 2*pi*p_i*p_o*sqrt(1 - x_i**2)*sqrt(1 - x_o**2)*exp(-I*phi) + 4*pi*p_o**2*x_o**2 - 2*pi*p_o**2 - pi*q**2/2
1,0,1,1,1,-2*sqrt(2)*pi*p_i**2*x_i*sqrt(1 - x_i**2)*exp(I*phi/2) + 2*sqrt(2)*pi*p_i*p_o*x_i*sqrt(1 - x_o**2)*exp(-I*phi/2) + 2*sqrt(2)*pi*p_i*p_o*x_o*sqrt(1 - x_i**2)*exp(I*phi/2) - 2*sqrt(2)*pi*p_o**2*x_o*sqrt(1 - x_o**2)*exp(-I*phi/2)


In [6]:
NPHI = 20
NX = 30
NP1 = 40
NP2 = 20
NQ = 2

In [7]:
phi, wphi = get_phi_mesh(NPHI)
x, wx = leggauss(NX)
p, wp = get_trns_mesh(NP1, NP2)
q = np.linspace(0, 1, NQ)

In [53]:
poly = ReducedAngularPolynomial(x, phi, lmax=4, wx=wx, wphi=wphi)

In [8]:
expr = tf.iloc[1].val
mla = tf.iloc[1].name[-1]
op = ExpressionMap(expr, ("p_o", "p_i", "q", "x_o", "x_i", "phi"))
mat = op(p, p, q, x, x, phi)

In [None]:
tensor_cpu = op(p, p, q, x, x, phi)
%timeit op(p, p, q, x, x, phi)
tensor_cpu.nbytes / 1024 ** 3

In [56]:
%%timeit
mat = op(p, p, q, x, x, phi)
res = poly.integrate(mat, mla, max_chunk_size=10)

35.8 s ± 214 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [9]:
phi_gpu = cp.array(phi)
x_gpu = cp.array(x)
p_gpu = cp.array(p)
q_gpu = cp.array(q)

wphi_gpu = cp.array(wphi)
wx_gpu = cp.array(wx)
wp_gpu = cp.array(wp)

In [10]:
tensor_gpu = op(p_gpu, p_gpu, q_gpu, x_gpu, x_gpu, phi_gpu)

In [11]:
poly_gpu = ReducedAngularPolynomial(x, phi, lmax=4, wx=wx_gpu, wphi=wphi_gpu)
poly_gpu.x = x_gpu
poly_gpu.phi = phi_gpu
poly_gpu.matrix = cp.array(poly_gpu.matrix)

In [12]:
%%timeit
tensor_gpu = op(p_gpu, p_gpu, q_gpu, x_gpu, x_gpu, phi_gpu)
poly_gpu.integrate(tensor_gpu, mla, max_chunk_size=1)

1.28 s ± 463 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [None]:
tensor_gpu = op(p_gpu, p_gpu, q_gpu, x_gpu, x_gpu, phi_gpu)
%timeit op(p_gpu, p_gpu, q_gpu, x_gpu, x_gpu, phi_gpu)
tensor_gpu.nbytes / 1024 ** 3

In [None]:
np.abs(cp.asnumpy(tensor_gpu) - tensor_cpu).mean()

In [None]:
%%timeit
(
    tensor_cpu
    * tensor_cpu
    * wx.reshape(1, 1, 1, NX, 1, 1)
    * wx.reshape(1, 1, 1, 1, NX, 1)
    * wphi.reshape(1, 1, 1, 1, 1, NPHI)
)

In [None]:
%%timeit
(
    tensor_gpu
    * tensor_gpu
    * wx_gpu.reshape(1, 1, 1, NX, 1, 1)
    * wx_gpu.reshape(1, 1, 1, 1, NX, 1)
    * wphi_gpu.reshape(1, 1, 1, 1, 1, NPHI)
)