#### TMA4205 Numerical Linear Algebra
# Project Part 2
## Introduction

In this part we want to find a low rank continuous approximation to a continuous matrix valued function.

In [None]:
%load_ext autoreload
%autoreload 2

# imports and useful functions
from IPython.display import clear_output
import numpy as np
import matplotlib.pyplot as plt
from matplotlib_inline.backend_inline import set_matplotlib_formats

set_matplotlib_formats('pdf', 'svg')

# line cyclers adapted to colourblind people
from cycler import cycler

line_cycler = (cycler(color=["#E69F00", "#56B4E9", "#009E73", "#0072B2", "#D55E00", "#CC79A7", "#F0E442"]) +
               cycler(linestyle=["-", "--", "-.", ":", "-", "--", "-."]))
plt.rc("axes", prop_cycle=line_cycler)
plt.rc('axes', axisbelow=True)

In [None]:
"""First define some useful functions for plotting and testing"""
from linalg.plotting import *
from linalg.helpers import get_function_timings, truncated_svd, get_equidistant_indexes

## Exercise 1:
We implement Lanczos bidiagonalization method with and without re-orthogonalization.

In [None]:
from linalg.bidiagonalization import lanczos_bidiag, lanczos_bidiag_reorth, make_bidiagonal, get_bidiagonal_approx

Make 3 randomly generated matrices $A_n \in \mathbb{R}^{n\times n} $, $n \in [32,64,128]$.
Then we show their eigenvalues.

In [None]:
# make matrices
n_list = [32, 64, 128]
A_list = [np.random.rand(n, n) * 2 - 1 for n in n_list]

# plot their eigenvalues
fig, axs = plt.subplots(ncols=len(n_list), sharey=True, constrained_layout=True, figsize=(3 * len(n_list) + 1, 4))
fig.suptitle("Singular values of $A$")
for A, ax, n in zip(A_list, axs, n_list):
    svs = np.linalg.svd(A, compute_uv=False)
    ax.plot(svs, ".")
    ax.set_ylabel("$\sigma$")
    ax.set_xlabel("$\sigma$ number")
    ax.set_title(f"$n={n}$")

Now we approximate the matrix $A_n$ with different approximation methods.
The truncated SVD gives the best approximation matrix of rank $k \leq$ (in Frobenius norm).
For all $k \leq n$ compare the best approximation with Lanczos bidiagonalization method, with and without re-orthogonalization.

We also measure the orthogonality error with the method from [5]:


\begin{equation}
    \eta(U) :=  ||I -U^T U||_F
\end{equation}

For each $k$ we also show this error for bidiagonalization with and without reorthogonalization.

In [None]:
from linalg.helpers import get_best_approx

fig, axs = plt.subplots(ncols=len(n_list), tight_layout=True, figsize=(1 + 3 * len(n_list), 4))
axs[0].set_ylabel("$||A - A_k||_F$")
fig.suptitle("Error approximation methods of rank $k$ ")

orth_fig, orth_axs = plt.subplots(ncols=len(n_list), sharey=True, tight_layout=True, figsize=(1 + 3 * len(n_list), 4))
orth_fig.suptitle("Orthogonalization error for bidiagonalization of rank $k$")
orth_axs[0].set_ylabel("$||I - Q^{T}Q||_F$")
for A, n, ax, orth_ax in zip(A_list, n_list, axs, orth_axs):
    best_approx_error = np.zeros(n)
    bidiagonal_error = np.zeros(n)
    bidiagonal_reorth_error = np.zeros(n)

    # error: [P, Q, P_reorth, Q_reorth]
    reorth_error = np.zeros((n, 4))

    k_list = np.arange(1, n + 1)
    for i, k in enumerate(range(1, n + 1)):
        b = np.random.rand(n)
        P, Q, alpha, beta = lanczos_bidiag(A, k, b)
        B = make_bidiagonal(alpha, beta)
        bidiagonal_error[i] = np.linalg.norm(A - P @ B @ Q.T, ord="fro")
        reorth_error[i, 0] = np.linalg.norm(np.eye(k) - P.T @ P, ord="fro")
        reorth_error[i, 1] = np.linalg.norm(np.eye(k) - Q.T @ Q, ord="fro")

        P, Q, alpha, beta = lanczos_bidiag_reorth(A, k, b)
        B = make_bidiagonal(alpha, beta)
        bidiagonal_reorth_error[i] = np.linalg.norm(A - P @ B @ Q.T, ord="fro")
        reorth_error[i, 2] = np.linalg.norm(np.eye(k) - P.T @ P, ord="fro")
        reorth_error[i, 3] = np.linalg.norm(np.eye(k) - Q.T @ Q, ord="fro")

        A_k = get_best_approx(A, k)
        best_approx_error[i] = np.linalg.norm(A - A_k, ord="fro")

    ax.plot(k_list, best_approx_error, label="best approximation")
    ax.plot(k_list, bidiagonal_error, label="bidiagonalization")
    ax.plot(k_list, bidiagonal_reorth_error, label="bidiag. with reorth.")
    ax.set_xlabel("$k$")
    ax.set_title(f"$n={n}$")
    ax.legend()

    #plot orthogonality error
    orth_ax.semilogy(k_list, reorth_error[:, 0], label="P")
    orth_ax.semilogy(k_list, reorth_error[:, 1], label="Q")
    orth_ax.semilogy(k_list, reorth_error[:, 2], label="P with reorth.")
    orth_ax.semilogy(k_list, reorth_error[:, 3], label="Q with reorth.")
    orth_ax.set_xlabel("$k$")
    orth_ax.set_title(f"$n={n}$")
    orth_ax.legend()
plt.show()

From figures above we see that bidiagonalization without re-orthogonalization does not give good approximations for large $k$.
The reason for this is that numerical instability makes the columns of $Q$ and $P$ not orthonormal.

## Exercise 2
In the following code block the integration method of order 2 is implemented.
A method of order 1 is also used to obtain an estimate for the local truncation error.
The step size is then determined based on this local truncation error.

In [None]:
from linalg.integrate import *

To make sure that the resulting matrices are in fact orthogonal we take steps in using the caylay-map.
Furthermore, all inputs in they caylay map are given on the form $B = [F, -U] [U, F]^T$.
Where $U^TU = I$ and $F^T U=0$
This input form we can exploit to compute the caylay map more efficiently:
...

In [None]:
from linalg.cayley_map import cayley_map_simple, cayley_map_plus, cayley_map_efficient

We now compare the performance of the different ways to compute the caylay map.
The cases are constructed by first generating matrices $A$ and $B$.
$A$ and $B$ are random matrices with elements randomly drawn from the uniform distribution on [0,1].
Then we compute the QR factorization of $A$ and set $Q$ as the $U$ matrix.
Then $F$ is computed as $F:=(I - U U^T)G $

In [None]:
from test.test_caylay import get_FUCDB

m_list = 2 ** np.array([4, 5, 6, 7, 8, 9, 10, 11])
sample_FUCDB = [get_FUCDB(m, k=int(np.sqrt(m))) for m in m_list]
sample_FU = [(F, U) for F, U, C, D, B in sample_FUCDB]
sample_CD = [(C, D) for F, U, C, D, B in sample_FUCDB]
sample_B = [(B,) for F, U, C, D, B in sample_FUCDB]

time_simple = get_function_timings(cayley_map_simple, sample_B, number=10)
time_efficient = get_function_timings(cayley_map_efficient, sample_CD, number=10)
time_plus = get_function_timings(cayley_map_plus, sample_FU, number=10)

fig, ax = plt.subplots()
fig.suptitle("Performance of different methods of caylay map computation")
ax.loglog(m_list, time_simple, label="simple", base=2)
ax.loglog(m_list, time_efficient, label="efficient", base=2)
ax.loglog(m_list, time_plus, label="efficient modified", base=2)

ax.set_ylabel("Time [ms]")
ax.set_xlabel("$m$")
ax.legend()
plt.show()

We see that method caylay map implementation inverting the smallest matrix is the most efficient implementaiton for our case.

We now implement a space discretization of the heat equation as a matrix ode $\dot A(t) =BA(t)$, with solution
$A(t) = \exp{tB}A(0)$.

In [None]:
from test.case_matrix_ode import generate_heat_equation

We now test our ...

In [None]:
m = 20
t_f = 1
k_list = [5, 15]
fig, axs = plt.subplots(ncols=len(k_list), sharex=True, sharey=True, constrained_layout=True,
                        figsize=(1 + 3 * len(k_list), 4))
axs[0].set_ylabel("Frobenious norm")
fig.suptitle("Error for different low rank approximations")

for i, k in enumerate(k_list):
    print(f"Running k={k}")
    # generate case and start conditions
    A_0, A, A_dot = generate_heat_equation(n=m, m=m, k=k)
    Y_0 = truncated_svd(A_0, k)

    # integrate
    Y, T = matrix_ode_simple(0, t_f, Y_0=Y_0, X=A_dot, TOL=1e-3, verbose=True)
    t_ind = get_equidistant_indexes(T, 0, t_f)
    T = [T[i] for i in t_ind]
    Y = [Y[i] for i in t_ind]

    XA_diff = [np.linalg.norm(get_best_approx(A(t), k) - A(t), ord="fro") for t in T]
    YA_diff = [np.linalg.norm(multiply_factorized(*y) - A(t), ord="fro") for t, y in zip(T, Y)]
    YX_diff = [np.linalg.norm(multiply_factorized(*y) - get_best_approx(A(t), k), ord="fro") for t, y in zip(T, Y)]

    ax = axs[i]
    ax.set_title(f"$k={k}$")
    ax.plot(T, XA_diff, label="||X - A||")
    ax.plot(T, YA_diff, label="||Y - A||")
    ax.plot(T, YX_diff, label="||Y - X||")
    ax.set_xlabel("$t$")
    ax.legend()
    clear_output()
plt.show()

We see that the best approximation of rank $k$ will perfectly approximate A wich also has rank $k$.
$Y$ does not approximate as well as the best approximate.

## Exercise 3
We now implement the test problem 3

In [None]:
from test.case_matrix_ode import generate_first_example

We now test our  solution on ...

In [None]:
t_f = 1
eps_list = 10. ** np.array([-1, -2, -3, -4, -5])
k_list = [10, 20]
m = 100
fig, axs = plt.subplots(nrows=len(eps_list), ncols=len(k_list), sharex=True, squeeze=False, constrained_layout=True,
                        figsize=(1 + 3 * len(k_list), 1 + 3 * len(eps_list)))

fig.suptitle("Error for different low rank approximations")
for i, eps in enumerate(eps_list):
    axs[i, 0].set_ylabel("Frobenious norm")
    for j, k in enumerate(k_list):
        axs[-1, j].set_xlabel("$t$")
        # generate case and start conditions
        print(f"k: {k}, epsilon: {eps}")
        A_0, A, A_dot = generate_first_example(eps=eps)
        Y_0 = truncated_svd(A_0, k)

        # integrate
        Y, T = matrix_ode_simple(0, t_f, Y_0=Y_0, X=A_dot, TOL=1e-0, verbose=True)

        t_ind = get_equidistant_indexes(T, 0, t_f)
        T = [T[i] for i in t_ind]
        Y = [Y[i] for i in t_ind]

        # I know this is not the most efficient way but it is easy to read
        b = np.random.rand(m)
        XA_diff = [np.linalg.norm(get_best_approx(A(t), k) - A(t), ord="fro") for t in T]
        YA_diff = [np.linalg.norm(multiply_factorized(*y) - A(t), ord="fro") for t, y in zip(T, Y)]
        WA_diff = [np.linalg.norm(get_bidiagonal_approx(A(t), k=k, b=b) - A(t), ord="fro") for t in T]
        YX_diff = [np.linalg.norm(multiply_factorized(*y) - get_best_approx(A(t), k), ord="fro") for t, y in zip(T, Y)]
        ax = axs[i, j]
        ax.set_title(f"$k=${k}, $\epsilon =$ {eps}")
        ax.plot(T, XA_diff, label="||X - A||")
        ax.plot(T, YA_diff, label="||Y - A||")
        ax.plot(T, WA_diff, label="||W - A||")
        ax.plot(T, YX_diff, label="||Y - X||")
        ax.legend()
        clear_output()
plt.show()

We see something...

## Exercise 4
We now implement the test problem 4

In [None]:
from test.case_matrix_ode import generate_second_example

We now test our  solution on ...

In [None]:

t_f = 10
eps_list = [1e-1]
k_list = [5, 20]
m = 100
fig_sigma, axs_sigma = plt.subplots(nrows=len(eps_list), ncols=len(k_list), sharex=True, squeeze=False,
                                    constrained_layout=True,
                                    figsize=(1 + 3 * len(k_list), 1 + 3 * len(eps_list)))
fig_sigma.suptitle("Singular values over time")

fig, axs = plt.subplots(nrows=len(eps_list), ncols=len(k_list), sharex=True, squeeze=False, constrained_layout=True,
                        figsize=(1 + 3 * len(k_list), 1 + 3 * len(eps_list)))
fig.suptitle("Error for different low rank approximations")
for i, eps in enumerate(eps_list):
    axs[i, 0].set_ylabel("Frobenious norm")
    for j, k in enumerate(k_list):
        axs[-1, j].set_xlabel("$t$")
        axs_sigma[-1, j].set_xlabel("$t$")
        # generate case and start conditions
        print(f"k: {k}, epsilon: {eps}")
        A_0, A, A_dot = generate_second_example(eps=eps)
        Y_0 = truncated_svd(A_0, k)
        # integrate
        Y, T = matrix_ode_simple(0, t_f, Y_0=Y_0, X=A_dot, TOL=1e-5, h_0=1e-7, verbose=True)

        # store a subset instead
        t_ind = get_equidistant_indexes(T, 0, t_f)
        T = [T[i] for i in t_ind]
        Y = [Y[i] for i in t_ind]

        # I know this is not the most efficient way but it is easy to read
        b = np.random.rand(m)
        XA_diff = [np.linalg.norm(get_best_approx(A(t), k) - A(t), ord="fro") for t in T]
        YA_diff = [np.linalg.norm(multiply_factorized(*y) - A(t), ord="fro") for t, y in zip(T, Y)]
        WA_diff = [np.linalg.norm(get_bidiagonal_approx(A(t), k=k, b=b) - A(t), ord="fro") for t in T]
        YX_diff = [np.linalg.norm(multiply_factorized(*y) - get_best_approx(A(t), k), ord="fro") for t, y in zip(T, Y)]
        A_norm = np.array([np.linalg.norm(A(t), ord="fro") for t in T])
        ax = axs[i, j]
        ax.set_title(f"$k=${k}, $\epsilon =$ {eps}")
        ax.plot(T, XA_diff / A_norm, label="||X - A||")
        ax.plot(T, YA_diff / A_norm, label="||Y - A||")
        ax.plot(T, WA_diff / A_norm, label="||W - A||")
        ax.plot(T, YX_diff / A_norm, label="||Y - X||")
        ax.legend()

        sing_values = np.linalg.svd([A(t) for t in T], compute_uv=False)
        sing_values_y = np.linalg.svd([S for U, S, V in Y], compute_uv=False)
        ax_sigma = axs_sigma[i, j]
        ax_sigma.set_title(f"$k=${k}, $\epsilon =$ {eps}")
        ax_sigma.plot(T, sing_values[:, :k], "k-", lw=0.5, label="A(t)")
        ax_sigma.plot(T[::2], sing_values_y[::2, :k], "k.", lw=0.5, label="Y(t)")

        clear_output()
plt.show()


Plot singular values, we can here use only $S$ since $Y$ is $S$ tranformed by unitary matricies.


In [None]:
t = 0.8
eps = 0.1
A_0, A, A_dot = generate_second_example(eps=eps)
plt.plot(np.linalg.svd(A(t), full_matrices=False, compute_uv=False)[:20])
plt.show()

insert


insert