<a href="https://colab.research.google.com/github/dnguyend/rayleigh_newton/blob/master/colab/SimpleRQI_RChebyshev.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# A simple example of Rayleigh and Rayleigh Chebyshev with non trivial Hessian of the retraction.

Set up the main functions
$$L(X, \lambda) = A(X) - \lambda X = \begin{bmatrix}5 + 2x_0x_1\\
 -1 + x_0^2\end{bmatrix} - \lambda\begin{bmatrix}x_0\\x_1\end{bmatrix}
$$
With the constraint $X^TX = 1$. We show the term $L_X r_{\eta\eta}(X, .)\eta^{[2]}$ is required for the Chebyshev step to have cubic convergence.

We consider the solution at $X = \begin{bmatrix}1\\0\end{bmatrix}$. The circle is parametrized by

$X = \left[\begin{matrix}\frac{4-t^2}{4+t^2}\\ \frac{4t}{4+t^2}\end{matrix}\right]$
$    = \begin{bmatrix} 1 -\frac{1}{2}t^2\\ t\end{bmatrix}+ O(t^3)$

In [1]:
import sympy as sp

# a1 = sp.symbols('a1')
# c000, c001, c011, c111 = sp.symbols('c000 c001 c011 c111')
a0 = sp.Integer(5)

c001 = sp.Integer(1)
a1 = - c001

def L(x, lbd):
  l0 = a0 + 2*c001*x[0]*x[1]
  l1 = -c001 + c001*x[0]*x[0]
  return sp.Matrix([[l0 - lbd*x[0], l1 - lbd*x[1]]]).T

def Lx(x, lbd):
  l00 = sp.Integer(2)*c001*x[1] 
  l01 = sp.Integer(2)*c001*x[0]
  l10 = sp.Integer(2)*c001*x[0]
  l11 = sp.Integer(0)
  return sp.Matrix([[l00 - lbd, l01], [l10 , l11 - lbd]])

def Llbd(x, lbd):
  return -x

def Lxx(x, lbd, eta):
  lxx0 = 2*c001*eta[1]*eta[0] + 2*c001*eta[0]*eta[1]
  lxx1 = 2*c001*eta[0]*eta[0]
  return sp.Matrix([[lxx0, lxx1]]).T

def R(x):
  l0 = a0 + 2*c001*x[0]*x[1]
  l1 = a1 + c001*x[0]*x[0]
  return sp.expand(sp.simplify(x[0]*l0 + x[1]*l1))

def DR(x, eta):
  # l0 = a0*x[0] + c000*x[0]*x[0]*x[0] + 2*c001*x[0]*x[0]*x[1] + c011*x[0]*x[1]*x[1]
  # l1 = a1*x[1] + c001*x[0]*x[0]*x[1] + 2*c011*x[0]*x[1]*x[1] + c111*x[1]*x[1]*x[1]
  l0 = a0*eta[0] + 4*c001*x[0]*eta[0]*x[1]
  l0 = l0 + 2*c001*x[0]*x[0]*eta[1]
  l1 = 2*c001*x[0]*eta[0]*x[1]
  l1 = l1 + a1*eta[1] + c001*x[0]*x[0]*eta[1]
  return l0 + l1

def retr(x, eta):
  nrm = 1 + eta[0]*eta[0] + eta[1]*eta[1]
  return (x + eta)/sp.sqrt(nrm)

def DC(x, omg):
  return x[0]*omg[0] + x[1]*omg[1]

def SchurProj(x, omg):
  lbd = R(x)
  llx = Lx(x, lbd)

  zeta = -llx.inv()*x
  return omg - zeta /DC(x, zeta)*DC(x, omg)

def NewtonInc(x):
  lbd = R(x)
  llx = Lx(x, lbd)
  omg = -llx.inv()*L(x, lbd)
  return SchurProj(x, omg)

def ChebyshevInc(x, f):
  nst = NewtonInc(x)
  lbd = R(x)
  llx = Lx(x, lbd)
  dr = DR(x, nst)
  G = Lxx(x, lbd, nst) +\
        -2*nst*dr + f*(llx*x)*(nst[0]*nst[0] + nst[1]*nst[1])
  T = -1/2*llx.inv()*G
  return nst + SchurProj(x, T)



Some symbolic variables: - $xx$ is a point on the circle, $v$ is a vector in $R^2$
* test the partial derivative $L_x$

In [2]:
u, v0, v1, x0, x1, e = sp.symbols('u v0 v1 x0 x1 e')
xx = sp.Matrix([[x0, x1]]).T
v = sp.Matrix([[v0, v1]]).T
ex2 = Lx(xx, e)*v
ex1 = sp.diff(sp.expand(L(xx+u*v, e)), u).subs(u, 0)
display(sp.expand(ex1-ex2))
# display(ex2)


Matrix([
[0],
[0]])

Test the Hessian $L_{xx}$. Test the Schur projection gives us a point on the tangent space

In [3]:
display(sp.expand(xx.T*L(xx, e).subs(e,0))[0] - R(xx))

ex3 = (L(xx+u*v, e) -L(xx, e) - Lx(xx, e)*v*u)/u/u
ex3 = sp.expand(2*sp.simplify(ex3)).subs(u, 0)
ex3 = sp.simplify((Lx(xx +u*v, e) - Lx(xx, e))/u)
display(sp.expand(sp.expand(ex3*v).subs(u, 0) - Lxx(xx, e, v)))
display(sp.simplify(ex2 -ex1))
# SchurProj(x, v).subs(t, 2)
t = sp.symbols('t')
x = sp.Matrix([[(sp.Integer(4)-t*t)/(sp.Integer(4)+t*t), sp.Integer(4)*t/(sp.Integer(4)+t*t)]]).T

x2 = x.subs(t, 3)
exp1 = SchurProj(xx, v)

exp2 = exp1.subs(x0, x2[0]).subs(x1, x2[1])
display(sp.simplify(x2[0]*exp2[0] + x2[1]*exp2[1]))



# ex3 = sp.expand(sp.simplify(ex2))
# ex4 = sp.expand(sp.simplify(sp.diff(L(xx+u*v, e), u)))
# ex3 - ex4

0

Matrix([
[0],
[0]])

Matrix([
[0],
[0]])

0

Test derivative of the Rayleigh quotient

In [4]:
display(sp.simplify((R(xx + u*v) - R(xx))/u).subs(u, 0) - DR(xx, v))


0

Show some expressions for $L_x$ and $L_{xx}$

In [5]:

display(Lx(xx, u))
display(Lxx(xx, u, v))

Matrix([
[-u + 2*x1, 2*x0],
[     2*x0,   -u]])

Matrix([
[4*v0*v1],
[2*v0**2]])

Evaluate the Newton increment on
$$x = \begin{bmatrix}\frac{4-t^2}{4+t^2}\\
 \frac{4t}{4+t^2}\end{bmatrix}$$
$t$ is the symbolic variable

In [6]:
t = sp.symbols('t')
x = sp.Matrix([[(sp.Integer(4)-t*t)/(sp.Integer(4)+t*t), sp.Integer(4)*t/(sp.Integer(4)+t*t)]]).T

nt = NewtonInc(xx)
nt = sp.simplify(nt)
ntx = nt.subs(x0, x[0]).subs(x1, x[1])
ntx = sp.simplify(ntx)

In [7]:
display(nt)
display(ntx)

Matrix([
[x1*(-x0**3 + 2*x0*x1**2 + x0 + 5*x1)/(3*x0**4*x1 + 5*x0**3 + 3*x0**2*x1**3 + 3*x0**2*x1 + 5*x0*x1**2 - 3*x1**3)],
[ x0*(x0**3 - 2*x0*x1**2 - x0 - 5*x1)/(3*x0**4*x1 + 5*x0**3 + 3*x0**2*x1**3 + 3*x0**2*x1 + 5*x0*x1**2 - 3*x1**3)]])

Matrix([
[               t**2*(-80*t**4 + 192*t**3 - 640*t**2 - 768*t - 1280)/(5*t**8 - 24*t**7 + 40*t**6 + 288*t**5 + 1152*t**3 - 640*t**2 - 1536*t - 1280)],
[4*t*(-5*t**6 + 12*t**5 - 20*t**4 - 96*t**3 + 80*t**2 + 192*t + 320)/(5*t**8 - 24*t**7 + 40*t**6 + 288*t**5 + 1152*t**3 - 640*t**2 - 1536*t - 1280)]])

Show $(x+\eta)/|x+\eta| - v_1$ is of order $t^2$, where $v_1=\begin{bmatrix}1\\0\end{bmatrix}$ and $\eta$ is the Newton increment

In [8]:
v1 = sp.Matrix([[sp.Integer(1), sp.Integer(0)]]).T

Nadd = sp.simplify(x+ntx)
display(Nadd)
NewtonStep = Nadd/sp.sqrt(Nadd.dot(Nadd))
NewtonTaylor = sp.Matrix([sp.series(NewtonStep[0], t, 0), sp.series(NewtonStep[1], t, 0)])
display(NewtonTaylor)
display(NewtonTaylor -v1)


Matrix([
[(-5*t**8 + 24*t**7 - 80*t**6 - 288*t**5 - 480*t**4 + 1152*t**3 - 1280*t**2 - 1536*t - 1280)/(5*t**8 - 24*t**7 + 40*t**6 + 288*t**5 + 1152*t**3 - 640*t**2 - 1536*t - 1280)],
[                                                             48*t**2*(-t**4 + 24*t**2 - 16)/(5*t**8 - 24*t**7 + 40*t**6 + 288*t**5 + 1152*t**3 - 640*t**2 - 1536*t - 1280)]])

Matrix([
[                          1 - 9*t**4/50 + 54*t**5/125 + O(t**6)],
[3*t**2/5 - 18*t**3/25 - 159*t**4/250 + 2529*t**5/1250 + O(t**6)]])

Matrix([
[                             -9*t**4/50 + 54*t**5/125 + O(t**6)],
[3*t**2/5 - 18*t**3/25 - 159*t**4/250 + 2529*t**5/1250 + O(t**6)]])

Now do the Chebyshev step

In [9]:
lbdx = R(x)
llxx = sp.simplify(Lx(x, lbdx))
llxxinv = sp.simplify(llxx.inv())
# llxinv = llxx.inv()


In [10]:
retractHessTerm = sp.simplify(llxx*x)
retractHessTaylor = sp.Matrix([sp.series(retractHessTerm[0], t, 0),
  sp.series(retractHessTerm[1], t, 0)])
retractHessTaylor

Matrix([
[-5 + 2*t + 5*t**2 + 3*t**3/2 - 5*t**4/2 - 25*t**5/8 + O(t**6)],
[ 2 - 5*t - 4*t**2 + 15*t**3/4 + 5*t**4 - 25*t**5/16 + O(t**6)]])

In [11]:
f = sp.symbols('f')
ntx = sp.simplify(ntx)

In [12]:
lbdt = sp.simplify(R(x))
llx = sp.simplify(Lx(x, lbdt))
jr = sp.simplify(DR(x, ntx))

G = Lxx(x, lbdt, ntx) +\
      - 2*ntx*jr + f*(llx*x)*(ntx[0]*ntx[0] + ntx[1]*ntx[1])
G = sp.simplify(G)

T = -sp.Integer(1)/sp.Integer(2)*sp.simplify(llx.inv()*G)
ct = sp.simplify(ntx + SchurProj(x, T))
ct




Matrix([
[                                          t**2*(-1600*f*t**15 + 7680*f*t**14 + 35584*f*t**13 - 307200*f*t**12 + 1170432*f*t**11 - 122880*f*t**10 - 3067904*f*t**9 + 9830400*f*t**8 + 12271616*f*t**7 - 1966080*f*t**6 - 74907648*f*t**5 - 78643200*f*t**4 - 36438016*f*t**3 + 31457280*f*t**2 + 26214400*f*t - 2000*t**16 + 27200*t**15 - 107520*t**14 - 325376*t**13 + 4019200*t**12 - 13888512*t**11 + 6963200*t**10 + 43405312*t**9 - 85647360*t**8 - 173621248*t**7 + 111411200*t**6 + 888864768*t**5 + 1028915200*t**4 + 333185024*t**3 - 440401920*t**2 - 445644800*t - 131072000)/(125*t**20 - 1800*t**19 + 10640*t**18 - 6624*t**17 - 201360*t**16 + 896256*t**15 + 212480*t**14 - 7934976*t**13 + 13928960*t**12 + 25362432*t**11 + 101449728*t**9 - 222863360*t**8 - 507838464*t**7 - 54394880*t**6 + 917766144*t**5 + 824770560*t**4 - 108527616*t**3 - 697303040*t**2 - 471859200*t - 131072000)],
[4*t*(-100*f*t**17 + 480*f*t**16 + 2624*f*t**15 - 21120*f*t**14 + 64256*f*t**13 + 69120*f*t**12 - 484352*f*t**1

# Power series expansion of ct

In [13]:
xc = x + ct
ctnew = xc / sp.sqrt(xc.dot(xc))

In [14]:
taylor = sp.Matrix([sp.series(xc[0], t, 0), sp.series(xc[1], t, 0)])

In [15]:
xChev = taylor / sp.sqrt(taylor.dot(taylor))
TaylorChev = sp.Matrix([sp.series(xChev[0], t, 0), sp.series(xChev[1], t, 0)])
TaylorChev


Matrix([
[                                            1 + t**4*(-f**2/50 - f/25 - 1/50) + t**5*(12*f**2/125 - 19*f/125 - 31/125) + O(t**6)],
[t**2*(f/5 + 1/5) + t**3*(31/25 - 12*f/25) + t**4*(59*f/250 - 577/250) + t**5*(f*(f/5 + 1/5)/5 + 499*f/625 + 4341/2500) + O(t**6)]])

In [16]:
from sympy import print_latex
print_latex(ntx)
print_latex(NewtonTaylor)
print_latex(retractHessTaylor)

\left[\begin{matrix}\frac{t^{2} \left(- 80 t^{4} + 192 t^{3} - 640 t^{2} - 768 t - 1280\right)}{5 t^{8} - 24 t^{7} + 40 t^{6} + 288 t^{5} + 1152 t^{3} - 640 t^{2} - 1536 t - 1280}\\\frac{4 t \left(- 5 t^{6} + 12 t^{5} - 20 t^{4} - 96 t^{3} + 80 t^{2} + 192 t + 320\right)}{5 t^{8} - 24 t^{7} + 40 t^{6} + 288 t^{5} + 1152 t^{3} - 640 t^{2} - 1536 t - 1280}\end{matrix}\right]
\left[\begin{matrix}1 - \frac{9 t^{4}}{50} + \frac{54 t^{5}}{125} + O\left(t^{6}\right)\\\frac{3 t^{2}}{5} - \frac{18 t^{3}}{25} - \frac{159 t^{4}}{250} + \frac{2529 t^{5}}{1250} + O\left(t^{6}\right)\end{matrix}\right]
\left[\begin{matrix}-5 + 2 t + 5 t^{2} + \frac{3 t^{3}}{2} - \frac{5 t^{4}}{2} - \frac{25 t^{5}}{8} + O\left(t^{6}\right)\\2 - 5 t - 4 t^{2} + \frac{15 t^{3}}{4} + 5 t^{4} - \frac{25 t^{5}}{16} + O\left(t^{6}\right)\end{matrix}\right]


In [17]:
def makeTaylor(vec):
  return sp.Matrix([sp.series(vec[0], t, 0), sp.series(vec[1], t, 0)])
makeTaylor(ct)

Matrix([
[                              t**2 + t**3*(-f/5 - 1/5) + t**4*(12*f/25 - 31/25) + t**5*(1079/500 - 193*f/500) + O(t**6)],
[-t + t**2*(f/5 + 1/5) + t**3*(149/100 - 12*f/25) + t**4*(42*f/125 - 276/125) + t**5*(324*f/625 + 22539/10000) + O(t**6)]])

In [18]:
print_latex(makeTaylor(ct))
print_latex(TaylorChev)

\left[\begin{matrix}t^{2} + t^{3} \left(- \frac{f}{5} - \frac{1}{5}\right) + t^{4} \left(\frac{12 f}{25} - \frac{31}{25}\right) + t^{5} \left(\frac{1079}{500} - \frac{193 f}{500}\right) + O\left(t^{6}\right)\\- t + t^{2} \left(\frac{f}{5} + \frac{1}{5}\right) + t^{3} \left(\frac{149}{100} - \frac{12 f}{25}\right) + t^{4} \left(\frac{42 f}{125} - \frac{276}{125}\right) + t^{5} \left(\frac{324 f}{625} + \frac{22539}{10000}\right) + O\left(t^{6}\right)\end{matrix}\right]
\left[\begin{matrix}1 + t^{4} \left(- \frac{f^{2}}{50} - \frac{f}{25} - \frac{1}{50}\right) + t^{5} \left(\frac{12 f^{2}}{125} - \frac{19 f}{125} - \frac{31}{125}\right) + O\left(t^{6}\right)\\t^{2} \left(\frac{f}{5} + \frac{1}{5}\right) + t^{3} \left(\frac{31}{25} - \frac{12 f}{25}\right) + t^{4} \left(\frac{59 f}{250} - \frac{577}{250}\right) + t^{5} \left(\frac{f \left(\frac{f}{5} + \frac{1}{5}\right)}{5} + \frac{499 f}{625} + \frac{4341}{2500}\right) + O\left(t^{6}\right)\end{matrix}\right]
