In [1]:
import numpy as np
import matplotlib.pyplot as plt
import cvxpy as cp

## Solving problem 
$$
\begin{array}{ll}
\text{minimize} & q(z) + h(x) \\
\text{subject to} & x = Az
\end{array}
$$


### Special case
$$
\begin{array}{ll}
\text{minimize} & g^Tz  \\
\text{subject to} & \|Az\|_{\max} \leq \beta 
\end{array}
$$
where $g \in \mathcal{R}(A^T)$

In [2]:
def prox_lin(x, g, gamma):
    # proximal operator for linear function <g, x>
    return x - gamma * g



def prox_l1(x, rho):
    # proximal operator for l1 norm rho * ||x||_1
    return np.sign(x) * np.maximum(np.abs(x) - rho, 0)  




def pdhg_method_A(prox_h_conj, prox_q, A, lamb_max=None, max_iter=100, tol=1e-4):
    if lamb_max is None: 
        lamb_max = np.linalg.norm(A, ord=2)

    rho, gamma = [1.0 / lamb_max] * 2

    for t in range(max_iter):
        if t == 0:
            z = np.zeros(A.shape[1])
            y = np.zeros(A.shape[0])
            z_bar = z.copy()

        y = prox_h_conj(y + rho * A @ z_bar, rho)
        z_new = prox_q(z - gamma * A.T @ y, gamma)
        z_bar = 2 * z_new - z

        if np.linalg.norm(z_new - z) / (np.linalg.norm(z) + 1e-10) < tol:
            z = z_new
            print(f"{t=}")
            break
        z = z_new

    return z, y



def pdhg_adapt_method_A(prox_h_conj, prox_q, A, lamb_max=None, max_iter=100, tol=1e-4, ada_freq=10, theta=0.9):
    if lamb_max is None: 
        lamb_max = np.linalg.norm(A, ord=2)

    eta = 1.0 / lamb_max
    w_k = 1.0
    rho, gamma = eta * w_k, eta / w_k

    for t in range(max_iter):
        if t == 0:
            z = np.zeros(A.shape[1])
            y = np.zeros(A.shape[0])
            z_bar = z.copy()

        y_new = prox_h_conj(y + rho * A @ z_bar, rho)
        delta_y = np.sqrt(np.square(y_new - y).sum())
        y = y_new 
        z_new = prox_q(z - gamma * A.T @ y, gamma)
        z_bar = 2 * z_new - z
        delta_z = np.sqrt(np.square(z_new - z).sum())

        if np.linalg.norm(z_new - z) / (np.linalg.norm(z) + 1e-10) < tol:
            z = z_new
            print(f"{t=}")
            break
        z = z_new  
        if t % ada_freq == 0 and delta_y > 0: 
            w_k = w_k**theta * (delta_y / (delta_z + 1e-10))**(1 - theta)
            rho, gamma = eta * w_k, eta / w_k

    return z, y



def cvxpy_A(g, A, beta):
    z = cp.Variable(A.shape[1])
    objective = cp.Minimize(g.T @ z)
    constraints = [cp.norm(A @ z, "inf") <= beta]
    prob = cp.Problem(objective, constraints)
    prob.solve()
    return z.value, constraints[0].dual_value


In [3]:
np.random.seed(0)
for (m, n) in [(50, 100), (100, 50)]:
    for _ in range(5):
        A = np.random.randn(m, n)
        g = A.T @ np.random.randn(m)
        beta = 1
        z0, y0 = cvxpy_A(g, A, beta) 

        prox_q = lambda x, gamma: prox_lin(x, g, gamma)
        prox_h_conj = lambda y, rho: prox_l1(y, rho * beta)

        z1, y1 = pdhg_method_A(prox_h_conj, prox_q, A, max_iter=20000, tol=1e-8)
        assert np.allclose(z0.T @ g, z1.T @ g, atol=1e-3, rtol=1e-3)
        print(z0.T @ g, z1.T @ g)
        assert np.linalg.norm(A @ z1, np.inf) <= beta + 1e-2 and np.linalg.norm(A @ z0, np.inf) <= beta + 1e-2

t=633
-46.01906170928369 -46.01906190350042
t=811
-40.62663189839801 -40.62662887191583
t=699
-37.27648039933609 -37.27649630029944
t=739
-37.63276209554086 -37.63276323717219
t=752
-38.23857644116115 -38.2385747083143
-43.956465293535246 -43.9566950870936
-53.06017444202124 -53.05993406803106
-43.4909407282683 -43.49047921271461
-50.358566899510286 -50.36004397604967
-52.2328014966546 -52.232794692293986
