In [None]:
from gptopt.optim.fast_pdhg import *
from gptopt.optim.attn_utils import *
from gptopt.optim.least_squares import *
from utils import *

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
import torch

from gptopt.utils import set_seed
set_seed(42)

device = "cuda" if torch.cuda.is_available() else "cpu"

# Solve $(A_2^\top A_2) Y + Y (A_1^\top A_1) = - (G_1^\top A_1 + A_2^\top G_2)$

In [3]:
for (m, n) in [(30, 60), (60, 30), (60, 60)]:
    print(f"{m}x{n}")
    for _ in range(5): 
        std2 = 0.1
        std1 = 1
        rank_ratio = 0.5
        A, B, G1, G2, A_np, B_np, G1_np, G2_np, lamb_max = gaussian_data(m, n, std1=std1, std2=std2, 
                                                                 rank_ratio=rank_ratio, G_in_range=True)

        Y1, res1 = Y_dual_feasible(A1=B, A2=A, G1=G1, G2=G2, method="kron")
        Y2, res2 = Y_dual_feasible(A1=B, A2=A, G1=G1, G2=G2, method="cg", tol=1e-10, maxit=1000)
        Y3, res3 = Y_dual_feasible(A1=B, A2=A, G1=G1, G2=G2, method="lsqr", tol=1e-10, maxit=1000)
        Y4, res4 = cvxpy_Y_sylvester_solve(A1=B, A2=A, G1=G1, G2=G2)
        print(f"Kron {res1}, PCG {res2}, LSQR {res3}, CVXPY {res4}")

30x60
PCG breakdown at iter 53
Kron {'res': 3.828778257803715e-05, 'iter': 1}, PCG {'res': 9.515234494093819e-08, 'iter': 53}, LSQR {'res': 1.7871048440632308e-07, 'iter': 46}, CVXPY {'res': np.float64(1.5334848759719625e-09), 'iter': 1}
PCG breakdown at iter 50
Kron {'res': 2.8582385997055806e-05, 'iter': 1}, PCG {'res': 7.7006751612756e-08, 'iter': 50}, LSQR {'res': 1.6357265806172404e-07, 'iter': 42}, CVXPY {'res': np.float64(1.4957379239737243e-09), 'iter': 1}
PCG breakdown at iter 47
Kron {'res': 9.696723251072354e-06, 'iter': 1}, PCG {'res': 8.842053598951027e-08, 'iter': 47}, LSQR {'res': 1.8409528731442913e-07, 'iter': 41}, CVXPY {'res': np.float64(1.5876683954099374e-09), 'iter': 1}
PCG breakdown at iter 48
Kron {'res': 2.7507328413564358e-05, 'iter': 1}, PCG {'res': 9.049894796994295e-08, 'iter': 48}, LSQR {'res': 1.767509546423147e-07, 'iter': 43}, CVXPY {'res': np.float64(1.6143633871192281e-09), 'iter': 1}
PCG breakdown at iter 51
Kron {'res': 3.998503556061123e-05, 'iter'

In [4]:
for (m, n) in [(40, 40), (60, 60)]:
    print(f"{m}x{n}")
    for _ in range(5): 
        std2 = 0.1
        std1 = 1
        rank_ratio = 0.5
        A, B, G1, G2, A_np, B_np, G1_np, G2_np, lamb_max = gaussian_data(m, n, std1=std1, std2=std2, 
                                                                 rank_ratio=rank_ratio, G_in_range=False)

        Y1, res1 = Y_dual_feasible(A1=B, A2=A, G1=G1, G2=G2, method="kron")
        Y2, res2 = Y_dual_feasible(A1=B, A2=A, G1=G1, G2=G2, method="cg", tol=1e-10, maxit=1000)
        Y3, res3 = Y_dual_feasible(A1=B, A2=A, G1=G1, G2=G2, method="lsqr", tol=1e-10, maxit=1000)
        Y4, res4 = cvxpy_Y_sylvester_solve(A1=B, A2=A, G1=G1, G2=G2)
        print(f"Kron {res1}, PCG {res2}, LSQR {res3}, CVXPY {res4}")

40x40
PCG breakdown at iter 247
Kron {'res': 0.11548649719401304, 'iter': 1}, PCG {'res': 0.11548649719401304, 'iter': 247}, LSQR {'res': 0.11548649719401304, 'iter': 1000}, CVXPY {'res': np.float64(0.11548650306872876), 'iter': 1}
PCG breakdown at iter 245
Kron {'res': 0.10541598561353993, 'iter': 1}, PCG {'res': 0.10541597922187573, 'iter': 245}, LSQR {'res': 0.10541598561353993, 'iter': 1000}, CVXPY {'res': np.float64(0.10541598134323737), 'iter': 1}
PCG breakdown at iter 251
Kron {'res': 0.1103084846735251, 'iter': 1}, PCG {'res': 0.1103084846735251, 'iter': 251}, LSQR {'res': 0.1103084846735251, 'iter': 1000}, CVXPY {'res': np.float64(0.11030847986039378), 'iter': 1}
PCG breakdown at iter 219
Kron {'res': 0.11475754530022186, 'iter': 1}, PCG {'res': 0.11475754530022186, 'iter': 219}, LSQR {'res': 0.11475754530022186, 'iter': 1000}, CVXPY {'res': np.float64(0.11475754591525149), 'iter': 1}
PCG breakdown at iter 208
Kron {'res': 0.1116960151673029, 'iter': 1}, PCG {'res': 0.11169601

In [5]:
for (m, n) in [(30, 60), (60, 30), (60, 60)]:
    print(f"{m}x{n}")
    for _ in range(5):
        A_np = np.random.randn(m, n)
        B_np = np.random.randn(m, n)
        Y0_np = np.random.randn(n, n)
        G1_np = B_np @ Y0_np.T
        G2_np = A_np @ Y0_np

        A = torch.from_numpy(A_np).to(torch.float64)
        B = torch.from_numpy(B_np).to(torch.float64)
        G1 = torch.from_numpy(G1_np).to(torch.float64)
        G2 = torch.from_numpy(G2_np).to(torch.float64)

        Y1, res1 = Y_dual_feasible(A1=B, A2=A, G1=G1, G2=G2, method="kron")
        Y2, res2 = Y_dual_feasible(A1=B.to(torch.float32), A2=A.to(torch.float32), G1=G1.to(torch.float32), G2=G2.to(torch.float32), 
                                   method="cg", tol=1e-6, maxit=1000)
        Y3, res3 = Y_dual_feasible(A1=B.to(torch.float32), A2=A.to(torch.float32), G1=G1.to(torch.float32), G2=G2.to(torch.float32), 
                                   method="lsqr", tol=1e-6, maxit=1000)
        print("double:", res1, "PCG (float32):", res2, "LSQR (float32):", res3)

30x60
k=46
double: {'res': 7.316691856439262e-14, 'iter': 1} PCG (float32): {'res': 1.4735666853744296e-07, 'iter': 46} LSQR (float32): {'res': 9.049372842637705e-08, 'iter': 45}
k=52
double: {'res': 1.4528861595617728e-14, 'iter': 1} PCG (float32): {'res': 1.8809915593662733e-07, 'iter': 52} LSQR (float32): {'res': 8.482273780145464e-08, 'iter': 47}
k=40
double: {'res': 7.485621103165854e-14, 'iter': 1} PCG (float32): {'res': 1.4262472222949097e-07, 'iter': 40} LSQR (float32): {'res': 7.89690871392536e-08, 'iter': 41}
k=48
double: {'res': 1.219726957449007e-13, 'iter': 1} PCG (float32): {'res': 1.467488263462123e-07, 'iter': 48} LSQR (float32): {'res': 7.589791288214521e-08, 'iter': 46}
k=55
double: {'res': 9.887529359830461e-14, 'iter': 1} PCG (float32): {'res': 1.919315360130509e-07, 'iter': 55} LSQR (float32): {'res': 8.817418522615675e-08, 'iter': 48}
60x30
k=26
double: {'res': 3.963565753259053e-17, 'iter': 1} PCG (float32): {'res': 8.46069171360399e-08, 'iter': 26} LSQR (float32

In [6]:
sizes = [(30, 60), (60, 30), (60, 60)]
trials = 3
sync = torch.cuda.synchronize if torch.cuda.is_available() else (lambda: None)

for (m, n) in sizes:
    print(f"\nSize {m}x{n}")
    for t in range(1, trials + 1):
        # Structured instance so a dual-feasible Y exists
        A, B, G1, G2, *_ = gaussian_data(m, n, std1=1.0, std2=0.1, rank_ratio=0.5, G_in_range=True)

        # Force CG branch by keeping max_kron small
        sync(); t0 = time.time()
        _, res_dp  = Y_dual_feasible(A1=B, A2=A, G1=G1, G2=G2,
                                     method="cg", diag_scaling=True,  tol=1e-10, verbose=False)
        sync(); t1 = time.time()

        sync(); t2 = time.time()
        _, res_ns = Y_dual_feasible(A1=B, A2=A, G1=G1, G2=G2,
                                     method="cg", diag_scaling=False, tol=1e-10, verbose=False)
        sync(); t3 = time.time()

        print(f"trial {t}: diag res={res_dp}, time={(t1-t0)*1e3:.1f}ms | "
              f"no-scale res={res_ns}, time={(t3-t2)*1e3:.1f}ms")


Size 30x60
trial 1: diag res={'res': 7.544865587955668e-08, 'iter': 48}, time=16.8ms | no-scale res={'res': 48.40919472659386, 'iter': 1000}, time=270.9ms
trial 2: diag res={'res': 8.615465568755743e-08, 'iter': 52}, time=14.4ms | no-scale res={'res': 48.512828866866144, 'iter': 1000}, time=268.9ms
trial 3: diag res={'res': 1.0614596072304302e-07, 'iter': 56}, time=15.8ms | no-scale res={'res': 49.0146490758429, 'iter': 1000}, time=268.2ms

Size 60x30
trial 1: diag res={'res': 6.959396264037831e-08, 'iter': 28}, time=8.1ms | no-scale res={'res': 1.7476333448714715e-08, 'iter': 38}, time=10.5ms
trial 2: diag res={'res': 8.789793605460769e-08, 'iter': 30}, time=8.6ms | no-scale res={'res': 1.827796149498115e-08, 'iter': 42}, time=11.6ms
trial 3: diag res={'res': 7.414106948746771e-08, 'iter': 29}, time=8.3ms | no-scale res={'res': 1.710683635947765e-08, 'iter': 40}, time=11.1ms

Size 60x60
trial 1: diag res={'res': 2.1173108583647777e-07, 'iter': 251}, time=67.7ms | no-scale res={'res':

# Solve $\mathcal{A}^*\mathcal{A}(Z) = -\beta \mathcal{A}^*(\mathbf{sign}(Y^0))$, where  $\mathcal{A}(Z) = Z_1^\top A_1 + A_2^\top Z_2$

In [7]:
beta = 0.1
for (m, n) in [(10, 10), (30, 30), (40, 40)]:
    print(f"{m}x{n}")
    for _ in range(5): 
        std2 = 0.1
        std1 = 1
        rank_ratio = 0.5
        A, B, G1, G2, A_np, B_np, G1_np, G2_np, lamb_max = gaussian_data(m, n, std1=std1, std2=std2, 
                                                                 rank_ratio=rank_ratio, G_in_range=True)

        Y0, _ = Y_dual_feasible(A1=B, A2=A, G1=G1, G2=G2, method="lsqr")

        Z1_1, Z2_1, res1 = Z_sylvester_solve(A1=B, A2=A, Y0=Y0, beta=beta, method="kron", tol=1e-10)
        Z1_2, Z2_2, res2 = Z_sylvester_solve(A1=B, A2=A, Y0=Y0, beta=beta, method="cg", tol=1e-10, maxit=100, lambda_reg0=1e-8)
        Z1_3, Z2_3, res3 = Z_sylvester_solve(A1=B, A2=A, Y0=Y0, beta=beta, method="lsqr", tol=1e-10, maxit=100, lambda_reg0=0)
        Z1_4, Z2_4, res4 = cvxpy_Z_sylvester_solve(A1=B, A2=A, Y0=Y0, beta=beta)
        print(f"Kron {res1}, PCG {res2}, LSQR {res3}, CVXPY {res4}")

10x10
PCG breakdown at iter 63
Kron {'res': 2.349188912333509e-05, 'iter': 1}, PCG {'res': 1.0385433665316371e-06, 'iter': 63}, LSQR {'res': 4.18114697339042e-07, 'iter': 68}, CVXPY 1.4272079370105087e-14
PCG breakdown at iter 60
Kron {'res': 4.271110614259117e-06, 'iter': 1}, PCG {'res': 1.2194287704959116e-07, 'iter': 60}, LSQR {'res': 4.536133174421126e-07, 'iter': 66}, CVXPY 1.4367697188856887e-15
PCG breakdown at iter 65
Kron {'res': 6.100146864713231e-06, 'iter': 1}, PCG {'res': 6.150493446757035e-07, 'iter': 65}, LSQR {'res': 4.6552855444168624e-07, 'iter': 74}, CVXPY 4.173309704604708e-15
PCG breakdown at iter 50
Kron {'res': 5.842345182905879e-07, 'iter': 1}, PCG {'res': 1.0601490617982356e-07, 'iter': 50}, LSQR {'res': 3.256655103068981e-07, 'iter': 52}, CVXPY 5.054593665167723e-16
PCG breakdown at iter 59
Kron {'res': 2.3973186003783637e-06, 'iter': 1}, PCG {'res': 1.2844272772106813e-07, 'iter': 59}, LSQR {'res': 3.373298335631669e-07, 'iter': 64}, CVXPY 1.222487746008403e-

In [8]:
beta = 0.1
for (m, n, G_in_range) in [(500, 500, True), (500, 500, False)]:
    print(f"{m}x{n}, {G_in_range=}")
    for _ in range(5): 
        std2 = 0.1
        std1 = 1
        rank_ratio = 1
        A, B, G1, G2, A_np, B_np, G1_np, G2_np, lamb_max = gaussian_data(m, n, std1=std1, std2=std2, 
                                                                 rank_ratio=rank_ratio, G_in_range=G_in_range)

        Y0, y_res = Y_dual_feasible(A1=B, A2=A, G1=G1, G2=G2, method="lsqr")
 
        Z1_2, Z2_2, res2 = Z_sylvester_solve(A1=B, A2=A, Y0=Y0, beta=beta, method="cg", tol=1e-10, maxit=100, lambda_reg0=1e-8)
        Z1_3, Z2_3, res3 = Z_sylvester_solve(A1=B, A2=A, Y0=Y0, beta=beta, method="lsqr", tol=1e-10, maxit=100, lambda_reg0=0)

        print(f"PCG {res2}, LSQR {res3}, Y res{y_res}")

500x500, G_in_range=True
PCG {'res': 0.00044874938669454875, 'iter': 100}, LSQR {'res': 0.00045136634852050665, 'iter': 100}, Y res{'res': 9.143254306240594e-08, 'iter': 1000}
PCG {'res': 0.0004958924768346206, 'iter': 100}, LSQR {'res': 0.0004990041570575793, 'iter': 100}, Y res{'res': 8.030641110331488e-08, 'iter': 1000}
PCG {'res': 0.0005508587830323976, 'iter': 100}, LSQR {'res': 0.0005534461391383631, 'iter': 100}, Y res{'res': 1.890193182042304e-07, 'iter': 1000}
PCG {'res': 0.000481925313062815, 'iter': 100}, LSQR {'res': 0.000484312672165828, 'iter': 100}, Y res{'res': 1.418570057153821e-07, 'iter': 1000}
PCG {'res': 0.00048458311508584517, 'iter': 100}, LSQR {'res': 0.0004862718822991401, 'iter': 100}, Y res{'res': 9.697277933259341e-08, 'iter': 1000}
500x500, G_in_range=False
PCG {'res': 0.022113537196106575, 'iter': 100}, LSQR {'res': 0.022124115801562774, 'iter': 100}, Y res{'res': 0.0316621587808363, 'iter': 1000}
PCG {'res': 0.01926609186077865, 'iter': 100}, LSQR {'res':