In [3]:
import numpy as np
from scipy.linalg import expm, kron, svd
from scipy.optimize import least_squares

# Pauli matrices
X = np.array([[0, 1], [1, 0]], dtype=complex)
Y = np.array([[0, -1j], [1j, 0]], dtype=complex)
Z = np.array([[1, 0], [0, -1]], dtype=complex)
I = np.eye(2, dtype=complex)

def B_gate():
    """Return the 4x4 B gate: exp(-i * pi/4 * (XX + 1/2 YY))."""
    H = np.kron(X, X) + 0.5 * np.kron(Y, Y)
    return expm(-1j * np.pi / 4.0 * H)

def u3_from_angles(theta, phi, lam):
    """
    U3 (SU(2) up to overall phase) parameterization (like Qiskit's U3):
    U(theta,phi,lambda) =
      [[cos(theta/2), -exp(i lambda) sin(theta/2)],
       [exp(i phi) sin(theta/2), exp(i(phi+lambda)) cos(theta/2)]]
    This is a universal single-qubit gate (global phase ignored).
    """
    th = theta / 2.0
    c = np.cos(th)
    s = np.sin(th)
    eip = np.exp(1j * phi)
    eil = np.exp(1j * lam)
    return np.array([[c, -eil * s],
                     [eip * s, eip * eil * c]], dtype=complex)

def two_qubit_compose(params, B):
    """
    Build (L0⊗L1) B (M0⊗M1) B (R0⊗R1) from a parameter vector of length 18:
    params = [theta_r0, phi_r0, lam_r0, theta_r1, phi_r1, lam_r1,   # R0,R1
              theta_m0, phi_m0, lam_m0, theta_m1, phi_m1, lam_m1,   # M0,M1
              theta_l0, phi_l0, lam_l0, theta_l1, phi_l1, lam_l1]   # L0,L1
    """
    assert params.size == 18
    def gate_from_block(p, i):
        return u3_from_angles(p[3*i], p[3*i+1], p[3*i+2])
    # R layer (rightmost)
    R0 = gate_from_block(params, 0)
    R1 = gate_from_block(params, 1)
    # M (middle)
    M0 = gate_from_block(params, 2)
    M1 = gate_from_block(params, 3)
    # L (leftmost)
    L0 = gate_from_block(params, 4)
    L1 = gate_from_block(params, 5)

    R = kron(R0, R1)
    M = kron(M0, M1)
    L = kron(L0, L1)

    return L @ B @ M @ B @ R

def residual(params, U_target, B):
    """Return stacked real/imag residual between composed unitary and target U (16 complex -> 32 real)"""
    U_try = two_qubit_compose(params, B)
    diff = U_try - U_target
    return np.concatenate([diff.real.ravel(), diff.imag.ravel()])

def decompose_with_two_B(U_target, n_restarts=8, tol=1e-9, max_nfev=2000):
    """
    Try to decompose U_target (4x4 unitary) as (L⊗L) B (M⊗M) B (R⊗R) with numeric optimization.
    Returns best found parameter set and reconstruction info.
    """
    if U_target.shape != (4,4):
        raise ValueError("U_target must be 4x4.")
    # Ensure U_target is unitary (we will not force global-phase)
    # Multi-start random initial guesses to avoid local minima
    B = B_gate()
    best = None
    best_cost = np.inf
    for restart in range(n_restarts):
        # random initial angles in reasonable ranges
        init = np.random.uniform(low=-np.pi, high=np.pi, size=18)
        res = least_squares(residual, init, args=(U_target, B), xtol=tol, ftol=tol, gtol=tol, max_nfev=max_nfev, verbose=0)
        cost = np.linalg.norm(residual(res.x, U_target, B))
        if cost < best_cost:
            best_cost = cost
            best = res
    # Build final decomposition matrices
    params = best.x
    def gate_from_block(p, i):
        return u3_from_angles(p[3*i], p[3*i+1], p[3*i+2])
    R0 = gate_from_block(params, 0)
    R1 = gate_from_block(params, 1)
    M0 = gate_from_block(params, 2)
    M1 = gate_from_block(params, 3)
    L0 = gate_from_block(params, 4)
    L1 = gate_from_block(params, 5)

    R = kron(R0, R1)
    M = kron(M0, M1)
    L = kron(L0, L1)
    U_recon = L @ B @ M @ B @ R

    # compute reconstruction error and average gate fidelity
    frob_err = np.linalg.norm(U_recon - U_target, ord='fro')
    # fidelity-like measure (absolute of normalized trace):
    # up to global phase maximum overlap
    ph = np.vdot(U_recon.ravel(), U_target.ravel())
    # better: compute unitary fidelity = |Tr(U_target^\dagger U_recon)|/4
    fid = abs(np.trace(U_target.conj().T @ U_recon)) / 4.0

    return {
        'params': params,
        'L0': L0, 'L1': L1,
        'M0': M0, 'M1': M1,
        'R0': R0, 'R1': R1,
        'B': B,
        'U_recon': U_recon,
        'frob_err': frob_err,
        'trace_fidelity': fid,
        'optimizer_result': best,
        'final_cost_norm': best_cost
    }

# Example usage / quick test
if __name__ == "__main__":
    # create a random 4x4 unitary via QR
    Xrand = (np.random.randn(4,4) + 1j*np.random.randn(4,4))
    Q, R = np.linalg.qr(Xrand)
    # make Q unitary (fix phases)
    lam = np.diag(R) / np.abs(np.diag(R))
    U = Q @ np.diag(1/lam)

    result = decompose_with_two_B(U, n_restarts=12)
    print("Frobenius error:", result['frob_err'])
    print("Trace fidelity (|Tr(U^† U_recon)|/4):", result['trace_fidelity'])
    print("Final optimizer cost norm:", result['final_cost_norm'])
    # You can inspect single-qubit gates:
    print("R0:\n", result['R0'])
    print("R1:\n", result['R1'])
    print("M0:\n", result['M0'])
    print("M1:\n", result['M1'])
    print("L0:\n", result['L0'])
    print("L1:\n", result['L1'])


Frobenius error: 7.184708641343273e-16
Trace fidelity (|Tr(U^† U_recon)|/4): 1.0
Final optimizer cost norm: 7.184708641343273e-16
R0:
 [[0.73770821+0.j         0.09073197+0.66899499j]
 [0.57641779+0.35146709j 0.29591775-0.67575594j]]
R1:
 [[ 0.36334192+0.j          0.56987501+0.73703808j]
 [ 0.81517432+0.45108034j -0.05529106-0.35911036j]]
M0:
 [[ 0.95313461+0.j          0.05740457-0.29705072j]
 [ 0.27378439+0.12874985j -0.56189548+0.76989548j]]
M1:
 [[ 0.49907294+0.j         -0.36496848+0.78595433j]
 [-0.83017451+0.24846827j -0.07158035+0.493913j  ]]
L0:
 [[ 0.87552372+0.j          0.44818177+0.18053067j]
 [ 0.47798106+0.07065637j -0.75554814-0.44236726j]]
L1:
 [[ 0.76701063+0.j          0.43054807+0.47573422j]
 [-0.37485769-0.520746j   -0.16086088+0.74995273j]]


In [6]:
import numpy as np
from scipy.linalg import expm, kron
from scipy.optimize import least_squares

# Pauli matrices
X = np.array([[0, 1], [1, 0]], dtype=complex)
Y = np.array([[0, -1j], [1j, 0]], dtype=complex)
I = np.eye(2, dtype=complex)

def B_gate(alpha):
    """Return 4x4 B(alpha) = exp(-i * alpha * (XX + 0.5 YY))."""
    H = np.kron(X, X) + 0.5 * np.kron(Y, Y)
    return expm(-1j * alpha * H)

def u3_from_angles(theta, phi, lam):
    """U3-like SU(2) param (global phase ignored)."""
    th = theta / 2.0
    c = np.cos(th)
    s = np.sin(th)
    eip = np.exp(1j * phi)
    eil = np.exp(1j * lam)
    return np.array([[c, -eil * s],
                     [eip * s, eip * eil * c]], dtype=complex)

def two_qubit_compose_with_alpha(params):
    """
    params = length 19:
      - first 18: 6 single-qubit U3 blocks (3 angles each) in order R0,R1, M0,M1, L0,L1
      - params[18] = alpha
    """
    assert params.size == 19
    def gate_from_block(p, i):
        return u3_from_angles(p[3*i], p[3*i+1], p[3*i+2])

    R0 = gate_from_block(params, 0)
    R1 = gate_from_block(params, 1)
    M0 = gate_from_block(params, 2)
    M1 = gate_from_block(params, 3)
    L0 = gate_from_block(params, 4)
    L1 = gate_from_block(params, 5)
    alpha = params[18]

    R = kron(R0, R1)
    M = kron(M0, M1)
    L = kron(L0, L1)
    B = B_gate(alpha)
    return L @ B @ M @ B @ R, alpha

def residual_with_phase_penalty(params, U_target, phase_weight):
    """
    Returns stacked real+imag residual (32 reals) plus one scalar penalty residual:
      penalty = sqrt(phase_weight) * (alpha / alpha_norm)
    Use alpha_norm to normalize the penalty scale (default pi/4).
    """
    U_try, alpha = two_qubit_compose_with_alpha(params)
    diff = U_try - U_target
    res_vec = np.concatenate([diff.real.ravel(), diff.imag.ravel()])
    # phase penalty: encourage smaller alpha; scale by normalization (pi/4) so weight is intuitive
    alpha_norm = np.pi / 4.0
    pen = np.sqrt(phase_weight) * (alpha / alpha_norm)
    return np.concatenate([res_vec, np.array([pen])])

def decompose_with_two_B_variable_alpha(U_target,
                                       n_restarts=10,
                                       phase_weight=1.0,
                                       alpha_bounds=(0.0, np.pi/2),
                                       tol=1e-9,
                                       max_nfev=3000):
    """
    Decompose U_target using two B(alpha) gates where alpha is optimized.
    - phase_weight: larger => stronger preference for small alpha
    - alpha_bounds: tuple (min_alpha, max_alpha) to respect hardware
    Returns dict with single-qubit gates, alpha, reconstruction, fidelity, etc.
    """
    if U_target.shape != (4,4):
        raise ValueError("U_target must be 4x4.")

    best = None
    best_cost = np.inf

    lb_angles = -np.pi * np.ones(18)   # single-qubit angle lower bounds
    ub_angles = np.pi * np.ones(18)    # single-qubit angle upper bounds
    lb = np.concatenate([lb_angles, np.array([alpha_bounds[0]])])
    ub = np.concatenate([ub_angles, np.array([alpha_bounds[1]])])

    for restart in range(n_restarts):
        # random init for angles and alpha (within bounds)
        init_angles = np.random.uniform(-np.pi, np.pi, size=18)
        init_alpha = np.random.uniform(alpha_bounds[0], alpha_bounds[1])
        init = np.concatenate([init_angles, np.array([init_alpha])])

        res = least_squares(residual_with_phase_penalty, init,
                            args=(U_target, phase_weight),
                            bounds=(lb, ub),
                            xtol=tol, ftol=tol, gtol=tol,
                            max_nfev=max_nfev, verbose=0)

        # compute true complex residual norm (without penalty) for comparison
        final_res = residual_with_phase_penalty(res.x, U_target, phase_weight)
        # drop last element (penalty) when measuring fidelity error
        err_norm = np.linalg.norm(final_res[:-1])
        if err_norm < best_cost:
            best_cost = err_norm
            best = res

    # unpack final result
    params = best.x
    def gate_from_block(p, i):
        return u3_from_angles(p[3*i], p[3*i+1], p[3*i+2])

    R0 = gate_from_block(params, 0)
    R1 = gate_from_block(params, 1)
    M0 = gate_from_block(params, 2)
    M1 = gate_from_block(params, 3)
    L0 = gate_from_block(params, 4)
    L1 = gate_from_block(params, 5)
    alpha = params[18]

    R = kron(R0, R1)
    M = kron(M0, M1)
    L = kron(L0, L1)
    B = B_gate(alpha)
    U_recon = L @ B @ M @ B @ R

    frob_err = np.linalg.norm(U_recon - U_target, ord='fro')
    trace_fid = abs(np.trace(U_target.conj().T @ U_recon)) / 4.0

    return {
        'params': params,
        'L0': L0, 'L1': L1,
        'M0': M0, 'M1': M1,
        'R0': R0, 'R1': R1,
        'alpha': alpha,
        'B': B,
        'U_recon': U_recon,
        'frob_err': frob_err,
        'trace_fidelity': trace_fid,
        'optimizer_result': best,
        'final_err_norm_without_penalty': best_cost,
        'phase_weight': phase_weight
    }

# -------------------------
# Example usage / quick test
# -------------------------
if __name__ == "__main__":
    # random 4x4 unitary via QR
    Xrand = (np.random.randn(4,4) + 1j*np.random.randn(4,4))
    Q, R = np.linalg.qr(Xrand)
    lam = np.diag(R) / np.abs(np.diag(R))
    U = Q @ np.diag(1/lam)

    # Try with small and larger phase weight to see trade-off
    print("=== low penalty (favor fidelity) ===")
    res1 = decompose_with_two_B_variable_alpha(U, n_restarts=8, phase_weight=0.001, alpha_bounds=(0.0, np.pi/2))
    print("alpha:", res1['alpha'])
    print("trace fidelity:", res1['trace_fidelity'])
    print("frob_err:", res1['frob_err'])

    print("\n=== stronger penalty (favor smaller alpha) ===")
    res2 = decompose_with_two_B_variable_alpha(U, n_restarts=8, phase_weight=1, alpha_bounds=(0.0, np.pi/2))
    print("alpha:", res2['alpha'])
    print("trace fidelity:", res2['trace_fidelity'])
    print("frob_err:", res2['frob_err'])


=== low penalty (favor fidelity) ===
alpha: 0.5028331915801256
trace fidelity: 0.9999999931537803
frob_err: 0.0002340325447622344

=== stronger penalty (favor smaller alpha) ===
alpha: 0.3955441280440749
trace fidelity: 0.9960978363638039
frob_err: 0.17669792018855687
