<a href="https://colab.research.google.com/github/dnguyend/ManNullRange/blob/master/tests/stiefel_test.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
!git clone https://github.com/dnguyend/ManNullRange.git

Cloning into 'ManNullRange'...
remote: Enumerating objects: 125, done.[K
remote: Counting objects: 100% (125/125), done.[K
remote: Compressing objects: 100% (70/70), done.[K
remote: Total 125 (delta 83), reused 90 (delta 54), pack-reused 0[K
Receiving objects: 100% (125/125), 121.34 KiB | 10.11 MiB/s, done.
Resolving deltas: 100% (83/83), done.


We define the operators. Note that J_adjoint is defined in term of the index-raising operator instead of explicitly defined

In [4]:
from collections import OrderedDict
from IPython.display import display, Math
from sympy import symbols, Integer
from ManNullRange.symbolic import SymMat as sm
from ManNullRange.symbolic.SymMat import (
    matrices, t, scalars, mat_spfy, xtrace, trace, stiefels, DDR,
    latex_map, mat_latex, simplify_stiefel_tangent)


def pprint(expr):
    """pretty print
    """
    display(Math(latex_map(mat_latex(expr), OrderedDict(
        [('fYY', r'f_{YY}'), ('fY', 'f_Y'), ('al', r'\alpha')]))))


if True:
    # Y is a matrix point
    eta = matrices('eta')
    Y = stiefels('Y')
    a = sm.sym_symb('a')
    al0, al1 = scalars('al0 al1')
    # scalars are symmetric
    sm.g_symms.update((al0, al1))
    
    def J(Y, eta):
        return mat_spfy(t(Y) * eta + t(eta) * Y).doit()

    def J_adj(Y, a):
        dY = symbols('dY', commutative=False)
        return xtrace(trace(mat_spfy(J(Y, dY) * a)), dY)

    def g(Y, eta):
        return al0*eta+(al1-al0)*Y*t(Y)*eta

    def g_inv(Y, eta):
        return mat_spfy(1/al0*eta + (1/al1-1/al0)*Y*t(Y)*eta)


Deriving the gradient

In [5]:
    
J_giv_J_adj = J(Y, g_inv(Y, J_adj(Y, a)))
pprint("\\text{This is }Jg^{-1}J^T")
pprint(J_giv_J_adj)
    
def proj(Y, omg):
  jo = mat_spfy(J(Y, omg))
  ifactor = al1/Integer(4)
  return omg - mat_spfy(
      g_inv(Y, mat_spfy(J_adj(Y, ifactor*jo))))

def r_gradient(Y, omg):
  return mat_spfy(
      proj(Y, mat_spfy(g_inv(Y, omg))))
print("This is the projection:")  
pprint(proj(Y, eta))
print("This is the Riemanian gradient")
pprint(r_gradient(Y, eta))


<IPython.core.display.Math object>

<IPython.core.display.Math object>

This is the projection:


<IPython.core.display.Math object>

This is the Riemanian gradient


<IPython.core.display.Math object>

And now finish with the Hessian:

In [6]:

xi, phi = matrices('xi phi')
trilinear = mat_spfy(trace(DDR(g(Y, eta), Y, phi) * t(xi)))
xcross = xtrace(trilinear, phi)
K = (Integer(1)/Integer(2))*(DDR(g(Y, eta), Y, xi) +
                              DDR(g(Y, xi), Y, eta) - xcross)

def d_proj(Y, xi, omg):
    e = matrices('e')
    r = mat_spfy(proj(Y, e))
    expr = DDR(r, Y, xi)
    return expr.xreplace({e: omg})

dp_xi_eta = d_proj(Y, xi, eta)
prK = simplify_stiefel_tangent(proj(Y, g_inv(Y, K)), Y, (xi, eta))
Gamma = mat_spfy(
    simplify_stiefel_tangent(prK - dp_xi_eta, Y, (xi, eta)))
print("This is the Christoffel function:")
pprint(Gamma)
fY, fYY = matrices('fY fYY')
rhess02 = trace(mat_spfy(t(eta)*fYY*xi-Gamma * t(fY)))
print("This is the Riemannian Hessian bilinear form")
pprint(rhess02)
rhess11_bf_gr = xtrace(rhess02, eta)
print("This is the Riemannian Hessian Vector Product: (before projection)")
pprint(rhess11_bf_gr)

This is the Christoffel function:


<IPython.core.display.Math object>

This is the Riemannian Hessian bilinear form


<IPython.core.display.Math object>

This is the Riemannian Hessian Vector Product: (before projection)


<IPython.core.display.Math object>

# For the full test, run modules of test_real_stiefel.py and test_complex_stiefel.py
For now this we just demonstrate a trust-region optimization problem

In [9]:

import sys
!git clone https://github.com/pymanopt/pymanopt.git

sys.path.append("/content/pymanopt")

In [15]:
import numpy as np
import pymanopt
from numpy import trace
from numpy.random import (randint)
from ManNullRange.manifolds.RealStiefel import RealStiefel
from ManNullRange.manifolds.tools import (crandn, hsym)
from ManNullRange.tests.test_tools import check_zero, random_orthogonal, make_sym_pos

In [16]:
from pymanopt import Problem
from pymanopt.solvers import TrustRegions
from pymanopt.function import Callable

n = 1000
d = 50
# problem Tr(AXBX^T)
for i in range(1):
    D = randint(1, 10, n) * 0.02 + 1
    OO = random_orthogonal(n)
    A = OO @ np.diag(D) @ OO.T
    B = make_sym_pos(d)
    B = np.diag(randint(1, 10, d) * .2)
    
    alpha = randint(1, 10, 2) * .1
    alpha = alpha/alpha[0]
    alpha = np.array([1, .6])
    print(alpha)
    man = RealStiefel(n, d, alpha)

    @Callable
    def cost(X):
        return trace(A @ X @ B @ X.T)

    @Callable
    def egrad(X):
        return 2*A @ X @ B

    @Callable
    def ehess(X, H):
        return 2*A @ H @ B

    if False:
        X = man.rand()
        xi = man.randvec(X)
        d1 = num_deriv(man, X, xi, cost)
        d2 = trace(egrad(X) @ xi.T)
        print(check_zero(d1-d2))
    
    prob = Problem(
        man, cost, egrad=egrad)
    XInit = man.rand()

    prob = Problem(
        man, cost, egrad=egrad, ehess=ehess)

    solver = TrustRegions(maxtime=100000, maxiter=100)
    opt = solver.solve(prob, x=XInit, Delta_bar=250)
    print(cost(opt))
    man1 = RealStiefel(n, d, alpha=np.array([1, 1]))
    prob = Problem(
        man1, cost, egrad=egrad, ehess=ehess)

    solver = TrustRegions(maxtime=100000, maxiter=100)
    opt = solver.solve(prob, x=XInit, Delta_bar=250)

    man1 = RealStiefel(n, d, alpha=np.array([1, .5]))
    prob = Problem(
        man1, cost, egrad=egrad, ehess=ehess)

    solver = TrustRegions(maxtime=100000, maxiter=100)
    opt = solver.solve(prob, x=XInit, Delta_bar=250)


[1.  0.6]
Optimizing...
                                            f: +5.652233e+01   |grad|: 8.202959e-01
REJ TR-   k:     1     num_inner:     0     f: +5.652233e+01   |grad|: 8.202959e-01   exceeded trust region
acc       k:     2     num_inner:     0     f: +5.413918e+01   |grad|: 6.500723e-01   exceeded trust region
acc       k:     3     num_inner:     1     f: +5.306831e+01   |grad|: 4.518646e-01   exceeded trust region
acc       k:     4     num_inner:     3     f: +5.267024e+01   |grad|: 2.315953e-01   exceeded trust region
acc       k:     5     num_inner:     7     f: +5.255150e+01   |grad|: 1.319260e-01   negative curvature
acc TR-   k:     6     num_inner:     6     f: +5.251750e+01   |grad|: 1.227519e-01   exceeded trust region
acc TR+   k:     7     num_inner:     3     f: +5.243845e+01   |grad|: 4.478239e-02   exceeded trust region
acc       k:     8     num_inner:    22     f: +5.243292e+01   |grad|: 2.798892e-02   negative curvature
acc TR-   k:     9     num_inner: 

In [17]:
# run a random generator check
min_val = 1e190
# min_X = None
for i in range(100):
    Xi = man.rand()
    c = cost(Xi)
    if c < min_val:
        # min_X = Xi
        min_val = c
    if i % 1000 == 0:
        print('i=%d min=%f' % (i, min_val))
print(min_val)


i=0 min=56.525687
56.484467734195505


Complex Stiefel problem: Run a few tests with different alpha

In [30]:
from ManNullRange.manifolds.ComplexStiefel import ComplexStiefel
from ManNullRange.manifolds.tools import (crandn, hsym, rtrace)
from ManNullRange.tests.test_tools import check_zero, random_orthogonal, make_sym_pos

from pymanopt import Problem
from pymanopt.solvers import TrustRegions
from pymanopt.function import Callable

n = 1000
d = 50
# problem Tr(AXBX^T)
for i in range(1):
    D = randint(1, 10, n) * 0.02 + 1
    OO = random_orthogonal(n)
    A = OO @ np.diag(D) @ OO.T.conj()
    B = make_sym_pos(d)
    B = np.diag(randint(1, 10, d) * .2)
    
    alpha = randint(1, 10, 2) * .1
    alpha = alpha/alpha[0]
    alpha = np.array([1, .6])
    print(alpha)
    man = ComplexStiefel(n, d, alpha)

    @Callable
    def cost(X):
        return rtrace(A @ X @ B @ X.T.conj())

    @Callable
    def egrad(X):
        return 2*A @ X @ B

    @Callable
    def ehess(X, H):
        return 2*A @ H @ B

    if False:
        X = man.rand()
        xi = man.randvec(X)
        d1 = num_deriv(man, X, xi, cost)
        d2 = rtrace(egrad(X) @ xi.T.conj())
        print(check_zero(d1-d2))
    
    prob = Problem(
        man, cost, egrad=egrad)
    XInit = man.rand()

    prob = Problem(
        man, cost, egrad=egrad, ehess=ehess)

    solver = TrustRegions(maxtime=100000, maxiter=100)
    opt = solver.solve(prob, x=XInit, Delta_bar=250)
    print(cost(opt))
    man1 = ComplexStiefel(n, d, alpha=np.array([1, 1]))
    prob = Problem(
        man1, cost, egrad=egrad, ehess=ehess)

    solver = TrustRegions(maxtime=100000, maxiter=100)
    opt = solver.solve(prob, x=XInit, Delta_bar=250)

    man1 = ComplexStiefel(n, d, alpha=np.array([1, .5]))
    prob = Problem(
        man1, cost, egrad=egrad, ehess=ehess)

    solver = TrustRegions(maxtime=100000, maxiter=100)
    opt = solver.solve(prob, x=XInit, Delta_bar=250)


[1.  0.6]
Optimizing...
                                            f: +4.976565e+01   |grad|: 7.378670e-01
REJ TR-   k:     1     num_inner:     0     f: +4.976565e+01   |grad|: 7.378670e-01   exceeded trust region
acc       k:     2     num_inner:     0     f: +4.791940e+01   |grad|: 6.335381e-01   exceeded trust region
acc       k:     3     num_inner:     1     f: +4.691978e+01   |grad|: 4.403963e-01   exceeded trust region
acc       k:     4     num_inner:     3     f: +4.655066e+01   |grad|: 2.174963e-01   exceeded trust region
acc TR-   k:     5     num_inner:     7     f: +4.645393e+01   |grad|: 1.738556e-01   negative curvature
acc TR+   k:     6     num_inner:     2     f: +4.633279e+01   |grad|: 6.440190e-02   exceeded trust region
acc       k:     7     num_inner:    15     f: +4.631876e+01   |grad|: 3.094653e-02   exceeded trust region
acc TR-   k:     8     num_inner:    11     f: +4.631488e+01   |grad|: 2.398875e-02   exceeded trust region
acc TR+   k:     9     num_inne

In [31]:
# Run a random generator check
min_val = 1e190
# min_X = None
for i in range(1000):
    Xi = man.rand()
    c = cost(Xi)
    if c < min_val:
        # min_X = Xi
        min_val = c
    if i % 1000 == 0:
        print('i=%d min=%f' % (i, min_val))
print(min_val)


i=0 min=49.769125
49.731752192422434


In [52]:
from pymanopt import Problem
from pymanopt.solvers import TrustRegions
from pymanopt.function import Callable
n = 50
d = 10

for i in range(1):
    B = np.diag(
        np.concatenate([randint(1, 10, d), np.zeros(n-d)]))
    D = randint(1, 10, n) * 0.2+.01
    OO = random_orthogonal(n)
    A = OO @ np.diag(D) @ OO.T.conj()

    alpha = np.ones(2)
    # alpha[1] = randint(2, 10)*.1
    alpha[1] = 2
    
    print('alpha=%s' % str(alpha))
    man = ComplexStiefel(n, d, alpha)
    cf = 10
    B2 = B @ B

    @Callable
    def cost(X):
        return cf * rtrace(
            B @ X @ X.T.conj() @ B2 @ X @ X.T.conj() @ B) +\
            rtrace(X.T.conj() @ A @ X)
    
    @Callable
    def egrad(X):
        R = cf*4*B2 @ X @ X.T.conj() @ B2 @ X + 2*A @ X
        return R

    @Callable
    def ehess(X, H):
        return 4*cf*B2 @ H @ X.T.conj() @ B2 @ X +\
            4*cf*B2 @ X @ H.T.conj() @ B2 @ X +\
            4*cf*B2 @ X @ X.T.conj() @ B2 @ H + 2*A @ H
    
    if False:
        X = man.rand()
        xi = man.randvec(X)
        d1 = num_deriv(man, X, xi, cost)
        d2 = rtrace(egrad(X) @ xi.T.conj())
        print(check_zero(d1-d2))
        d3 = num_deriv(man, X, xi, egrad)
        d4 = ehess(X, xi)
        print(check_zero(d3-d4))
        
    XInit = man.rand()
    prob = Problem(
        man, cost, egrad=egrad, ehess=ehess)

    solver = TrustRegions(maxtime=100000, maxiter=200)
    opt = solver.solve(prob, x=XInit, Delta_bar=2500)
    print(cost(opt))
    man1 = ComplexStiefel(n, d, alpha=np.array([1, 1]))
    prob = Problem(
        man1, cost, egrad=egrad, ehess=ehess)

    solver = TrustRegions(maxtime=100000, maxiter=200)
    print('alpha=%s' % str(man1.alpha))
    opt = solver.solve(prob, x=XInit, Delta_bar=250)

    man1 = ComplexStiefel(n, d, alpha=np.array([1, .5]))
    # man1 = ComplexStiefel(n, d, alpha=np.array([1, 1]))
    prob = Problem(
        man1, cost, egrad=egrad, ehess=ehess)

    solver = TrustRegions(maxtime=100000, maxiter=200)
    print('alpha=%s' % str(man1.alpha))
    opt = solver.solve(prob, x=XInit, Delta_bar=250)


alpha=[1. 2.]
Optimizing...
                                            f: +6.860890e+03   |grad|: 2.309833e+04
REJ TR-   k:     1     num_inner:     4     f: +6.860890e+03   |grad|: 2.309833e+04   negative curvature
REJ TR-   k:     2     num_inner:     4     f: +6.860890e+03   |grad|: 2.309833e+04   negative curvature
REJ TR-   k:     3     num_inner:     4     f: +6.860890e+03   |grad|: 2.309833e+04   negative curvature
REJ TR-   k:     4     num_inner:     4     f: +6.860890e+03   |grad|: 2.309833e+04   negative curvature
acc TR+   k:     5     num_inner:     3     f: +8.883401e+02   |grad|: 6.093799e+03   exceeded trust region
acc       k:     6     num_inner:     2     f: +1.733489e+02   |grad|: 1.700735e+03   reached target residual-kappa (linear)
acc       k:     7     num_inner:     2     f: +4.572483e+01   |grad|: 4.978847e+02   reached target residual-kappa (linear)
acc       k:     8     num_inner:     2     f: +1.859649e+01   |grad|: 1.480872e+02   reached target residual-