In [None]:
from julia.api import Julia
jl = Julia(compiled_modules=False)

import julia; julia.install()
from julia import Main

import numpy     as np
import panel     as pn; pn.extension()
import holoviews as hv; hv.extension( "bokeh", logo=False)

from panel.interact import interact

def raster(img):  return hv.Image(img).opts(cmap="gray", xaxis=None, yaxis=None, frame_width=200, aspect='equal')

In [2]:
%load_ext julia.magic

Initializing Julia interpreter. This may take some time...


In [3]:
%%julia
using LinearAlgebra, NearestNeighbors, LaTeXStrings, Latexify;

<div style="height:2cm;">
<div style="float:center;width:100%;text-align:center;"><strong style="height:100px;color:darkred;font-size:40px;">Eigenvalues Using the Arnoldi Algorithm</strong>
</div></div>

# 1. The Arnoldi Algorithm

## 1.1 Krylov Subspaces $\mathbf{\mathcal{K}_k}$

A brief [reminder about Krylov Spaces:](KrylovMethods.ipynb)

<div style="background-color:#F2F5A9;color:black;">

**Definition:**
Given matrix $A$ and a vector $v$, the $k^{th}$ Krylov matrix $\mathcal{K}_k(v)$ is given by<br>
$\qquad \mathcal{K}_k(v) = \left( \; b \; A b \; A^2 b \; \dots A^{k-1} b \; \right)$

$\qquad$ the column space $\mathscr{C}\left(  \mathcal{K}_k(v) \right)$ is the $k^{th}$ Krylov subspace.
</div>

We will write $\mathcal{K}_k$ for the $k^{th}$ Krylov subspace $\mathscr{C}\left( \mathcal{K}_k(v) \right)$.

<div style="background-color:#F2F5A9;color:black;">

**Theorem:** Given $A \in \mathbb{R}^{n \times n}$ and a vector $v \in \mathbb{R}^n$.
Let $x \in \mathcal{K}_m(v)$ for some $0 < m < n$. Then
* $x = \mathcal{K}_m z$ for some $z \in \mathbb{R}^n$, i.e., $x$ is a linear combintation of the columns of $\mathcal{K}_m$.)
* $x \in \mathcal{K}_{m+1}$
* $A x \in \mathcal{K}_{m+1}$.
</div>

## 1.2 Orthonormal Basis for  $\mathbf{\mathcal{K}_k}$

The discussion of the [Power Method](IterativeMethods_python.ipynb) to compute dominant eigenpairs for $A x = \lambda x$<br>
showed that the columns of $\mathcal{K}_k$ increasingly become parallel to a dominant eigenvector,<br>
resulting in a large condition number.

To combat this, we may choose to use an **orthonormal basis** for $\mathcal{K}_k$.

## 1.3 Derivation of the Algorithm

If we start with a unit length vector $q_1$, the Krylov matrix $\mathcal{K}_k = \left( q_1 \; A q_1 \; \dots A^{k-1} q_1 \right)$. Let us apply $A$ to eacch of the vectors $q_i$.

Since $q_1, q_2, \dots q_m \in \mathcal{K}_m$, we know that $A q_i \in \mathcal{K}_{m+1}.$ We therefore have<br>
$\qquad\begin{align}
A q_1 &= h_{1 1} q_1 + h_{2 1} q_2\\
A q_2 &= h_{1 2} q_1 + h_{2 2} q_2 + h_{3 2} q_3\\
\dots & \\
A q_k &=  h_{1 k} q_1 + h_{2 k} q_2 + h_{3 k} q_3 + \dots h_{k+1 k} q_{k+1}\\
\end{align}$<br>
for some constants $h_{i j}$.

We can rewrite these equations in matrix form:<br>
$\qquad
(\xi) \Leftrightarrow A Q_k = Q_{k+1} \begin{pmatrix} H_k \\ \text{row} ({k+1}) \end{pmatrix}
$

where we have set $Q_k = \left( q_1\; q_2 \dots q_k \right),$ and
$H_k = \begin{pmatrix} h_{1 1} & h_{1 2} & \dots  & h_{1 k-1} & h_{1 k} \\
                       h_{2 1} & h_{2 2} & \dots  & h_{2 k-1} & h_{2 k} \\
                               & h_{3 2} & \dots  & h_{3 k-1} & h_{3 k} \\
                               &         & \ddots & \vdots & \vdots \\
                               &         &        & h_{k k-1} & h_{k k}\\
\end{pmatrix}$<br>

with the final $\;\;\text{row} ({k+1}) = \left( 0\; \dots\;0\; h_{k+1 k} \right).$

Taking dot products of each equation with $q_i$, i.e., multiplying from the left with $Q_k^t$, we obtain<br>
$\qquad (\xi) \Rightarrow Q^t_k A Q_k = \left( I_k \; 0 \right) \begin{pmatrix} H_k \\ \text{row} ({k+1}) \end{pmatrix} = H_k
$

**Remarks:**
* The matrix $H_k$ has a special shape: it is in **upper Hessenberg** form<br>
(non zero entries on the first subdiagonal and above, all zero below the first subdiagonal)
* It has a simple geometric interpretation: $H_k = Q^t_k A Q_k$ is the **orthogonal projection** of $A$ onto the Krylov space<br> for the basis $q_1, q_2 \dots q_k$.
* For a matrix of size $n \times n$, the matrix $Q_k$ has size $n \times k$ and the matrix $H_k$ has size $k \times k$.<br>
Thus for $\mathbf{k \ll n}$, the $H_k$ matrix is **much smaller** than $A$.

## 1.4 The Arnoldi Algorithm

Given $q_1, q_2, \dots q_{m-1}$, we obtain $v_m = A q_m$<br>
Since $v_m =  h_{1 m} q_1 + h_{2 m} q_2 + h_{3 m} q_m + \dots h_{m+1 m} q_{m+1}$,<br>
* $h_{i m} = q_i \cdot v_m,\;\; i =1,2,\dots m$
* $w = v_m - \sum_{i=1}^m h_{i m} q_i$
* $h_{m+1 m} = \Vert w \Vert$
* $q_{m+1} = \frac{1}{h_{m+1 m}} w$

In [4]:
%%julia
function arnoldi_algorithm(A, v, k, tol=1e-6)
    n = size(A, 1)
    Q = zeros(eltype(A), n, k+1)
    H = zeros(eltype(A), k+1, k)

    Q[:, 1] = v / norm(v)

    m = 0
    while m < k
        m += 1
        w = A * Q[:, m]          # Compute the next Krylov vector
        for i in 1:m
            H[i, m] = dot(Q[:, i], w)
            w      -= H[i, m] * Q[:, i]
        end
        H[m+1,m] = norm(w)

        if abs(H[m+1,m]) < tol break end  # we have reached the maximal dimension of the Krylov spaces

        Q[:, m+1] = w / H[m+1, m]
    end

    return Q[:, 1:m], H[1:m, 1:m]
end

# Example usage
A = [2. 6 5 9;
     1 2 1 -7;
     0 1 2 4;
     1 1 1 8
]

v = [1., 1, 1, 1]

m = 4

Q, H = arnoldi_algorithm(A, v, m)
#@show round.(Q, digits=3)
@show Q'Q ≈ I
println("H =")
round.(H, digits=3)

Q' * Q ≈ I = true
H =

array([[ 9.25 , -1.877,  2.884, -0.919],
       [ 8.955, -0.591,  7.462, -3.439],
       [ 0.   ,  2.222,  5.185, -2.662],
       [ 0.   ,  0.   ,  1.945,  0.157]])

In [5]:
 %%julia
Q3,H3=arnoldi_algorithm(A, v, 3)
@show Q3'A*Q3 ≈ H3;


(Q3' * A) * Q3 ≈ H3 = true


## 1.5 Relationship between $\mathbf{H_k}$ and $\mathbf{QR}$

Let $A = Q R$. Since $H_k = Q^t_k A Q_k$ we have $H_k = Q^t_k Q R Q_k = R_k Q_k,$<br>
$\qquad$ where $R_k$ is the submatrix consisting of the first $k$ rows of $R$.

In [6]:
%%julia
Q3, H3 = arnoldi_algorithm(A, v, 3)
R3     = Q3'*A
@show R3*Q3 ≈ H3;

R3 * Q3 ≈ H3 = true

## 1.6 Symmetric Matrix: the Lanczos Algorithm

In [7]:
%%julia
N = 1000
X = randn(N,N)
Λ = diagm(randn(N))
A = X \ Λ * X

Xₕ, = qr(X) # a random orthogonal matrix via QR on a random matrix

Aₕ  = Xₕ' * Λ * Xₕ
b   = randn(N)

Q,  H  = arnoldi_algorithm(A, b, 50)
Qs, Hs = arnoldi_algorithm(Aₕ, b, 50)
round.(Hs, digits=2)

array([[-0.06,  1.02,  0.  , ...,  0.  , -0.  , -0.  ],
       [ 1.02, -0.28,  1.44, ...,  0.  , -0.  , -0.  ],
       [ 0.  ,  1.44, -0.3 , ...,  0.  , -0.  , -0.  ],
       ...,
       [ 0.  ,  0.  ,  0.  , ..., -0.14,  1.33,  0.  ],
       [ 0.  ,  0.  ,  0.  , ...,  1.33, -0.06,  1.37],
       [ 0.  ,  0.  ,  0.  , ...,  0.  ,  1.37, -0.15]])

In [8]:
pn.Row( raster(np.log10( np.abs(Main.H )+1e-10) ).opts( title="H for asymmetric A" ),
        raster(np.log10( np.abs(Main.Hs)+1e-10) ).opts( title="H for symmetric A") )

When $A$ is symmetric, then $H_k = Q^t_k A Q_k$ is symmetric.<br>
$\qquad$ Since $H_k$ is upper Hessenberg in general, $H_k$ will be tridiagonal for symmetric matrices!

Therefore $h_{i i+1} = h_{i i-1}$ and $h_{i j} = 0 \text{ for all } j>i+1.$ The Arnoldi algorithm considerably simplifies
to the Lanczos algorithm:

The update<br> $\qquad A q_k =  h_{1 k} q_1 + h_{2 k} q_2 + h_{3 k} q_3 + \dots h_{k+1 k} q_{k+1}$ in the Arnoldi algorithm reduces to the last three terms<br>$\qquad A q_k = h_{k-1 k} q_{k-1} +  h_{k k} q_k + h_{k+1 k} q_{k+1}$.

In [9]:
%%julia

function lanczos_algorithm(A, v, k, tol=1e-6)
    n     = size(A, 1)
    Q     = zeros(eltype(A), n, k+1)
    alpha = zeros(eltype(A), k)   # diagonal entries 
    beta  = zeros(eltype(A), k)   # offdiagonal entries 

    Q[:, 1] = v / norm(v)

    m = 0
    while m < k
        m += 1
        w  = A * Q[:, m]          # Compute the next Krylov vector

        alpha[m] = dot(Q[:, m], w)

        w -= alpha[m] * Q[:, m]
        if m > 1
            w -= beta[m-1]*Q[:,m-1]
        end
        beta[m]   = norm(w)

        if beta[m] < tol break end  # we have reached the maximal dimension of the Krylov spaces

        Q[:, m+1] = w / beta[m]
    end

    return Q[:, 1:m],SymTridiagonal( alpha, beta[1:end-1])
end;

In [10]:
%%julia
Qs, Hs = arnoldi_algorithm(Aₕ, b, 50, 1e-12)
Qₗ, Tₗ  = lanczos_algorithm(Aₕ,  b, 50, 1e-12)

@show Hs ≈ Tₗ
@show norm( Qs-Qₗ );


Hs ≈ Tₗ = true
norm(Qs - Qₗ) = 7.994013107184859e-10

The Arnoldi Algorithm is 

# 2. Ritz Eigenvalues

We have $H_k = Q^t_k A Q_k$.

When $k = n$, the size of the matrix $A$ (i.e., when $Q_n$ is square)<br>
$\qquad$ the resulting matrix $H_n$ has the same eigenvalues as $A:$<br>
$\qquad det\left(Q^t_n A Q - \lambda I\right) = det\left( Q^t ( A - \lambda I ) Q \right) = det(A - \lambda I)\;\;$ since $Q^{-1}= Q^t$.

For $k < n$, the matrix $H_k$ has fewer eigenvalues, called the **Ritz eigenvalues**.<br>
$\qquad$ Could they be related to the eigenvalues of $A$?

We have reason to suspect such a relationship: the Krylov vectors converge to a leading eigenvector<br>
$\qquad$ [see the power method: section 3 in IterativeMethods_python.ipynb](IterativeMethods_python.ipynb)<br>
$\qquad$ Further, when $k = n$, we know that $H_n$ has the same eigenvalues as $A_n$, with corresponding eigenvectors $x$ and $Q_n x$ respectively:<br>
$\qquad\qquad A x = \lambda x \Leftrightarrow Q^t_n H_n Q_n x = \lambda x \Leftrightarrow H_n \left( Q_n x \right) = \lambda  \left( Q_n x \right) .$

Let's investigate!

## 2.1 Construct an Example

In [11]:
%%julia

function ritz_eigenvalues(A,b,n)
    Q,H = arnoldi_algorithm( A, b, n )
    eigen(H).values
end;

In the following, we generate a random matrix $A \in \mathbb{C}^{200 \times 200}$<br>
and a random starting vector to construct a Krylov matrix,<br>
and we compute the eigenvalues for $\mathcal{K}_3$ as a quick check.

In [12]:
%%julia
N = 200
X = randn(N,N)                  # a real matrix
λ = randn(N) + 1im*randn(N)     # a set of complex eigenvalues
A = X \ diagm(λ) * X            # a matrix with these eigenvalues since X \ Λ  = X⁻¹ Λ
b = randn(N) .+ 0im             # a random real vector used to compute the Krylov matrix
println( L"Ritz eigenvalues for K_3(b)" )
round.(ritz_eigenvalues(A,b,3), digits=3)


$Ritz eigenvalues for K_3(b)$

array([-1.748-0.778j,  0.157-0.946j,  0.613+0.768j])

#### **Compute the Eigenvalues of $\mathbf{H_k}$ for all Choices of $\mathbf k$**

____
Next, we compute the Ritz eigenvalues and plot them (in red),<br>
together with the exact eigenvalues for the original matrix (in black).

The following display shows the two sets of eigenvalues for successive Krylov space $\mathcal{K}_k$ for $k = 1, 2, \dots n$.

In [13]:
h_exact = hv.Scatter( (np.real(Main.λ), np.imag(Main.λ)), "Re(λ)", "Im(λ)", label="exact" )\
            .opts(size=4, color='black', width=400, height=500, xlim=(-3.5,3.5),ylim=(-3.5,3.5), tools=['hover'])

#def plot_ritz_eigenvals(n):
#    e = Main.ritz_eigenvalues(Main.A, Main.b, n)
#    return (h_exact*hv.Scatter( (np.real(e), np.imag(e)), label="Ritz" ).opts(size=8, title=f"size {n}")).opts(legend_position="top")
#
#interact( plot_ritz_eigenvals, n=(1,Main.N) )

player = pn.widgets.Player(name='Playback', start=1, end=Main.N+1, interval=300)

@pn.depends(value=player.param.value)
def play_ritz_eigenvalues(value):
    e = Main.ritz_eigenvalues(Main.A, Main.b, value)
    return (h_exact*hv.Scatter( (np.real(e), np.imag(e)), label="Ritz" ).opts(size=8, title=f"size {value}")).opts(legend_position="top")

pn.Column( player, play_ritz_eigenvalues, width=500)

We observe the **usual behaviour** of the algorithm: the largest eigenvalues $\vert \lambda \vert$ converge first.<br>
when $k = n$, the eigenvalues are identical, and we have obtained all of them.

The algorithm is of great interest since it allows us to stop short of $n$: we frequently are interested in only a number of the largest eigenvalues!

### Convergence

To observe the convergence, we compute the distance of the first 6 Ritz eigenvalues to the nearest actual eigenvalue for eack $k = 1, 2, \dots 50$.

In [14]:
%%julia

# experiment with distance of Ritz eigenvalue to actual eigenvalues
# ------------------------------------------------------------------
# KDTree of actual eigenvalues
ev = eigen(A).values
if isreal(ev[1])
    data   = [ev zeros(size(A,1)) ]'
else
    data   = [real(ev) imag(ev) ]'
end

kdtree = KDTree(data);

# Use the KD Tree to compute the shortest distance of a given point to points in the tree.
#k    = 1
#point = rand(2)
#idxs, dists = knn(kdtree, point, k, true)
#@show point
#@show idxs
#@show data[:,idxs]'
#@show dists

<PyCall.jlwrap KDTree{StaticArraysCore.SVector{2, Float64}, Euclidean, Float64}
  Number of points: 200
  Dimensions: 2
  Metric: Euclidean(0.0)
  Reordered: true>

In [15]:
%%julia
"""
given a set of ritz eigenvalues, compute their distance to the nearest actual eigenvalue
"""
function dist_from_nearest_eval( re, k, kdtree)
    err = []
    for p in sort(re, by = x -> abs(x), rev=true)[1:k]
        i, distances = knn(kdtree, [real(p), imag(p)], 1, true)
        push!(err, distances[1])
    end
    err
end;

In [16]:
%%julia
# compute the distance of the first 6 Ritz eigenvalues to the neares actual eigenvalue
N_eigs=6
errors = []
for k in N_eigs:50+N_eigs
   local d = dist_from_nearest_eval( ritz_eigenvalues(A,b,k), N_eigs, kdtree)
   push!(errors, d)
end

In [17]:
h = \
hv.Overlay( [hv.Curve([f[i] for f in Main.errors], "Iteration Number", "distance", label=f"{i}" ).opts(show_grid=True, logy=True) for i in range(Main.N_eigs)] )\
  .opts(legend_position="top", width=500, height=450, title="Error versus Iteration Number")
h

Let us compare the convergence curve to the errors of the power method to compute a dominant eigenvalue:

In [18]:
%%julia
function power_method( A, x, n)
    errs = []
    x = x / norm(x)
    for i in 1:n
        x_old = x
        x     = A * x
        x     = x / norm( x )
        push!( errs, abs( x'A*x - x_old'A*x_old) ) # change in estimated eigenvalue
    end
    errs
end      
errs = power_method( A, b, 50);

In [19]:
# Overlay the Convergence Curves with the change in the dominant eigenvalue estimate
(h*hv.Curve( Main.errs, label="Power Method" )).opts( width=600, legend_position='right', title='Comparison with Power Method Error')

As we see, the Ritz egenvalue estimates converge faster!