# MD2SL - Master in Data Science and Statistical Learning

**Numerical Calculus and Linear Algebra**

Exercises 02: Linear systems: LU decomposition and partial pivoting

Deadline: 01/05/2024

In [None]:
# Installing packages
import numpy as np
import math
from scipy import *
import scipy.linalg as la
import matplotlib.pyplot as plt

# Exercise 1
Let $n\in\mathbb{N}$ and $\widehat{\mathbf{x}} = \pmatrix{1.1 \\ \vdots \\ 1.1}\in\mathbb{R}^n$.

1. Consider the matrix $A = (a_{ij})\in\mathbb{R}^{n\times n}$, so that
\begin{equation}
  a_{ij} = \cos(j\theta),\ \text{with}\ \theta = \frac{(2i+1)\pi}{2n}
\end{equation}

  and the linear system $A\cdot\mathbf{x} = \mathbf{b}$, with $\mathbf{b} = A\cdot\widehat{\mathbf{x}}$.

  For $n=5,10,15,20,25,30$, solve the system using the functions `solveLU(A,b)` if possible, otherwise use `solveLUP(A,b)`.
  Print the following values, depending on $n$
  -  conditioning of the matrix
  -  euclidean norm of the relative error
  -  euclidean norm of the residual error

  and comment the results.


Exercise 01 - Solution

In [None]:
# Matrix A

def get_matrix_ex1(n):
    A = np.zeros((n, n))
    ctheta = math.pi/(2*n)
    for i in range(0,n):
        theta = (2*i+1)*ctheta
        for j in range(0,n):
            A[i,j] = math.cos(j*theta)
    return A

Fattorizzazione LU

In [None]:
# Fattorizzazione LU
def gaussLU(A):
    #Get the number of rows
    n = A.shape[0]
    LU = A.copy()
    #Loop over rows
    for k in range(n-1):
        if LU[k,k] == 0:
            print('A has no lu decomposition.')
            raise ValueError('A has no lu decomposition.')
        LU[k+1:,k]     = LU[k+1:,k]/LU[k,k] # calcolo moltiplicatori
        LU[k+1:,k+1:]  = LU[k+1:,k+1:]- np.outer(LU[k+1:,k],LU[k,k+1:]) # outer vector product
    return LU


def solveLU(LU, b):
  assert LU.shape[0] == LU.shape[1] # m == n, otherwise break
  n = LU.shape[0]
  assert b.shape[0] == n  # b length == n according to LU, otherwise break
  x = lowerTriangularUnitDiagonal(LU,b)
  x = upperTriangular(LU,x)
  rnorm = np.linalg.norm(b-(np.eye(n) + np.tril(LU,-1)) @ np.triu(LU) @ x, 2) # matrix multiplication with @
  return x, rnorm

def lowerTriangularUnitDiagonal(A,b):
  assert A.shape[0] == A.shape[1] # m == n, otherwise break
  x = b.copy()
  for j in range(A.shape[1]):
    for i in range(j+1, A.shape[1]):
      x[i] = x[i]-A[i,j]*x[j]
  return x

def upperTriangular(A,b):
  # A: upper triangular coefficient matrix
  # b: vector of costant terms
  x = b.copy()
  for j in range(A.shape[1]-1, -1, -1): # n-2:-1:0
    x[j] = x[j]/A[j,j]
    for i in range(j):
      x[i] = x[i] - A[i,j] * x[j]
  return x


Fattorizzazione LUP

In [None]:
def gaussLUP(A):
  LU = A.copy()
  n = LU.shape[0]
  piv = np.arange(0,n)
  for k in range(n-1):
    # pivoting
    r_idx = np.argmax(abs(LU[k:,k])) + k
    if LU[r_idx,k]==0:
        print('Singular Matrix')
        raise ValueError('Singular matrix.')
    piv[[k,r_idx]] = piv[[r_idx,k]]
    LU[[k,r_idx]] = LU[[r_idx,k]]
    # LU
    LU[k+1:,k]     = LU[k+1:,k]/LU[k,k]
    LU[k+1:,k+1:] = LU[k+1:,k+1:]- np.outer(LU[k+1:,k],LU[k,k+1:]) # outer vector product
  return LU, piv

def solveLUP(LU,b,P):
  bp = b[P] # otherwise also b is changed
  x, rnorm = solveLU(LU,bp)
  return x, rnorm


Funzioni dei residui

In [None]:
#print(f'Absolute error    : norm(x-A\\b)         =  {np.linalg.norm(x-x_hat):.2e}')
#print(f'Relative error    : norm(x-A\\b)/norm(x) =  {np.linalg.norm(x-x_hat)/np.linalg.norm(x):.2e}')
#print(f'Residual          : norm(b-A*x)         =   {rnorm}')
#print(f'Relative residual : norm(b-A*x)/norm(b) =   {rnorm/np.linalg.norm(b)}')

def AbsoluteError(x, xex):
  return np.linalg.norm(x-xex)

def RelativeError(x, xex):
  return np.linalg.norm(x-xex)/np.linalg.norm(xex)

# superflua...
def Residual(A, x, b):
  return np.linalg.norm(b - A@b)

def RelativeResidual(rnorm, b):
  return rnorm/np.linalg.norm(b)

Main

In [None]:
print('--------------------------------------------------------------------------------------------------------------------')
print('n\t  cd\t\t  res\t\t   res-pivot\t rel-res\t  rel-res-piv\t  err\t\t   err-pivot')
print('--------------------------------------------------------------------------------------------------------------------')
for n in range(5, 35, 5): # range(start, stop, step) -> [5, 10, 15, 20, 25, 30]
    A = get_matrix_ex1(n)
    x_ex = 1.1*np.ones((n,1)) # exact solution of the system
    b     = A @ x_ex # known vector

    LU = gaussLU(A)
    x, rnorm = solveLU(LU, b)
    err_r = RelativeError(x, x_ex)
    rel_res = RelativeResidual(rnorm, b)

    LU_piv, P = gaussLUP(A)
    x_piv, rnorm_piv = solveLUP(LU_piv, b, P)
    err_r_piv = RelativeError(x_piv, x_ex)
    rel_res_piv = RelativeResidual(rnorm_piv, b)


    print(f'{n}\t  {np.linalg.cond(A):.2e}\t  {rnorm:.2e}\t   {rnorm_piv:.2e}\t {rel_res:.3}\t  {rel_res_piv:.3}\t  {err_r:.2e}\t   {err_r_piv:.2e}')

--------------------------------------------------------------------------------------------------------------------
n	  cd		  res		   res-pivot	 rel-res	  rel-res-piv	  err		   err-pivot
--------------------------------------------------------------------------------------------------------------------
5	  1.41e+00	  1.84e-15	   6.59e-16	 4.31e-16	  1.55e-16	  3.72e-16	   2.39e-16
10	  1.41e+00	  1.18e-12	   2.36e-15	 1.45e-13	  2.89e-16	  2.04e-13	   3.83e-16
15	  1.41e+00	  7.91e-10	   4.45e-15	 6.56e-11	  3.69e-16	  5.21e-11	   4.33e-16
20	  1.41e+00	  1.60e-07	   2.71e-14	 1.01e-08	  1.7e-15	  7.11e-09	   1.16e-15
25	  1.41e+00	  1.81e-04	   1.21e-14	 9.12e-06	  6.1e-16	  9.43e-06	   7.22e-16
30	  1.41e+00	  2.78e-02	   3.28e-14	 0.00117	  1.38e-15	  1.04e-03	   1.02e-15


- What happens to the condition number of $A$? What does it mean?

La matrice $\mathtt{A}$ rimane ben condizionata al crescere delle sue dimensioni, in quanto in questo caso il suo numero di condizionamento $k(\mathtt{A})$ non varia rispetto al valore iniziale prossimo a 1.

Posta $\hat{\mathbf{x}}$ la soluzione di ground-truth, ne segue che l'errore residuo relativo $\frac{\|\mathtt{A}\mathbf{x} - \mathbf{b}\|}{\|\mathbf{b}\|}$ è una buona stima dell'errore relativo $\frac{\|\hat{\mathbf{x}} - \mathbf{x}\|}{\|\hat{\mathbf{x}}\|}$ in quanto
\begin{equation}
\frac{1}{k(\mathtt{A})} \frac{\|\mathbf{r}\|}{\|\mathbf{b}\|} \le \frac{\|\Delta \mathbf{x}\|}{\|\mathbf{x}\|} \le k(\mathtt{A}) \frac{\|\mathbf{r}\|}{\|\mathbf{b}\|}
\end{equation}
con $\mathbf{r} = \mathtt{A}\mathbf{x} - \mathbf{b}$ e $\Delta \mathbf{x} = \hat{\mathbf{x}} - \mathbf{x}$

  
- How do the naive and pivot residuals behave?

Il metodo con pivoting è migliore perché è possibile scegliere il divisore in modo da evitare per quanto possibile divisioni per valori piccoli nel processo di fattorizzazione.

  
- How do the naive and pivot relative errors behave?

Stesso pattern osservato sui residui, in virtù del basso numero di condizionamento: la soluzione ottenuta con pivoting è più accurata della fattorizzazione LU semplice


# Exercise 2
Let $n\in\mathbb{N}$ and $\widehat{\mathbf{x}} = \pmatrix{1.1 \\ \vdots \\ 1.1}\in\mathbb{R}^n$.

1. Consider the matrix $A = (a_{ij})\in\mathbb{R}^{n\times n}$, so that
\begin{equation}
  a_{ij} = (i+1)^{j}
\end{equation}

  and the linear system $A\cdot\mathbf{x} = \mathbf{b}$, with $\mathbf{b} = A\cdot\widehat{\mathbf{x}}$.

  For $n=1,\dots,10$, solve the system using the functions `solveLU(A,b)` if possible, otherwise use `solveLUP(A,b)`.
  Print following values, depending on $n$
  -  conditioning of the matrix
  -  euclidean norm of the relative error
  -  euclidean norm of the residual error

  and comment the results.


Exercise 02 - Solution

Segue il codice che istanzia la matrice in funzione della dimensione n, e la stampa a video degli errori: oltre alla norma euclidea dell'errore relativo ($\|\mathbf{x}-\hat{\mathbf{x}}\|/\|\hat{\mathbf{x}}\|$, con $\hat{\mathbf{x}}$ soluzione di ground thuth), e dell'errore residuo $\|\mathbf{b}-\mathtt{A}\mathbf{x}\|$, si riporta anche il valore del residuo relativo $\|\mathbf{b}-\mathtt{A}\mathbf{x}\|/\|\mathbf{b}\|$ come ausilio alla comprensione dei dati tabulati.

In [None]:
def get_matrix_ex2(n):
    # TO DO: a_{ij} = (i+1)^j
    A = np.zeros((n, n))
    for i in range(0,n):
        A[i,0] = 1
        p = i+1
        for j in range(1,n):
            A[i,j] = p*A[i,j-1]
    return A

In [None]:
print('--------------------------------------------------------------------------------------------------------------------')
print('n\t  cd\t\t  res\t\t   res-pivot\t rel-res\t  rel-res-piv\t  err\t\t   err-pivot')
print('--------------------------------------------------------------------------------------------------------------------')

for n in range(1, 11): # range(start, stop, step) -> [5, 10, 15, 20, 25, 30]

    A = get_matrix_ex2(n)
    x_hat = 1.1*np.ones((n,1)) # exact solution of the system
    b     = A @ x_hat # known vector

    LU = gaussLU(A)
    x, rnorm = solveLU(LU, b)
    err_r = RelativeError(x, x_hat)
    rel_res = RelativeResidual(rnorm, b)

    LU_piv, P = gaussLUP(A)
    x_piv, rnorm_piv = solveLUP(LU_piv, b, P)
    err_r_piv = RelativeError(x_piv, x_hat)
    rel_res_piv = RelativeResidual(rnorm_piv, b)

    #print(f'{n}  {np.linalg.cond(A):.2e}  {rnorm:.2e}   {rnorm_piv:.2e}   {err_r:.2e}   {err_r_piv:.2e}')
    print(f'{n}\t  {np.linalg.cond(A):.2e}\t  {rnorm:.2e}\t   {rnorm_piv:.2e}\t {rel_res:.1e}\t  {rel_res_piv:.1e}\t  {err_r:.2e}\t   {err_r_piv:.2e}')

--------------------------------------------------------------------------------------------------------------------
n	  cd		  res		   res-pivot	 rel-res	  rel-res-piv	  err		   err-pivot
--------------------------------------------------------------------------------------------------------------------
1	  1.00e+00	  0.00e+00	   0.00e+00	 0.0e+00	  0.0e+00	  0.00e+00	   0.00e+00
2	  6.85e+00	  0.00e+00	   0.00e+00	 0.0e+00	  0.0e+00	  0.00e+00	   0.00e+00
3	  7.09e+01	  0.00e+00	   0.00e+00	 0.0e+00	  0.0e+00	  8.72e-16	   8.72e-16
4	  1.17e+03	  0.00e+00	   1.59e-14	 0.0e+00	  1.5e-16	  6.98e-15	   9.55e-15
5	  2.62e+04	  0.00e+00	   2.93e-14	 0.0e+00	  3.1e-17	  5.21e-14	   2.44e-13
6	  7.31e+05	  1.83e-12	   9.45e-13	 1.6e-16	  8.4e-17	  3.65e-12	   1.48e-11
7	  2.45e+07	  2.29e-13	   3.37e-11	 1.4e-18	  2.0e-16	  2.23e-11	   2.14e-10
8	  9.52e+08	  4.69e-10	   5.01e-10	 1.6e-16	  1.7e-16	  1.95e-09	   3.95e-09
9	  4.23e+10	  3.75e-09	   7.18e-09	 6.5e-17	  1.2e-16	  1.24e-07	   7.

- What happens to the condition number of $A$? What does it mean?

La matrice diventa sempre più malcondizionata al crescere di n, per cui l'errore residuo relativo $\frac{\|\mathtt{A}\mathbf{x} - \mathbf{b}\|}{\|\mathbf{b}\|}$ non è un buon indicatore della bontà della stima $\mathbf{x}$ rispetto al ground truth $\hat{\mathbf{x}}$ (ossia $\frac{\|\hat{\mathbf{x}} - \mathbf{x}\|}{\|\hat{\mathbf{x}}\|}$)

- How do the naive and pivot residuals behave?

La LU che mostra residui il più delle volte più bassi della LUP.

- How do the naive and pivot relative errors behave?

Il metodo senza pivoting produce errori relativi inferiori, talvolta anche di 1 ordine di grandezza: volendo azzardare una spiegazione del fenomeno, l'ipotesi per questo fatto è che la struttura della matrice A, a valori crescenti esponenzialmente spostandoci verso l'angolo inferiore a destra, influisca negativamente negli errori di approssimazione fatti permutando le righe alla ricerca del pivoting più grande da usare nel processo di riduzione della matrice.

# Exercise 3
Let $A\in\mathbb{R}^{n\times n}$ a tridiagonal matrix
\begin{equation}
  A = \pmatrix{d_0     & a_0    &        &         & \huge 0 \\
               c_1     & \ddots & \ddots &         &         \\
                       & \ddots & \ddots & \ddots  &         \\
                       &        & \ddots & \ddots  & a_{n-2} \\
               \huge 0 &        &        & c_{n-1} & d_{n-1} \\}
\end{equation}

for which its LU decomposition exists. Hence, $A = L\cdot U$ with
\begin{equation}
L = \pmatrix{1       &        &         & \huge0 \\
             l_1     & \ddots &         &        \\
                     & \ddots & \ddots  &        \\
             \huge 0 &        & l_{n-1} & 1},
\quad
U = \pmatrix{u_0       &  a_0   &         & \huge0 \\
                     & \ddots & \ddots  &        \\
                     &        & \ddots  & a_{n-2}\\
             \huge 0 &        &         & u_{n-1}}.
\end{equation}


Example given for $n=4$.
\begin{equation}
\pmatrix{d_0 & a_0 & 0   & 0   \\
         c_1 & d_1 & a_1 & 0   \\
         0   & c_2 & d_2 & a_2 \\
         0   & 0   & c_3 & d_3} = \pmatrix{1   & 0   & 0   & 0 \\
                                           l_2 & 1   & 0   & 0 \\
                                           0   & l_3 & 1   & 0 \\
                                           0   & 0   & l_4 & 1 }\pmatrix{u_0   & a_0 & 0   & 0   \\
 0     & u_1 & a_1 & 0   \\
 0     & 0   & u_2 & a_2 \\
 0     & 0   & 0   & u_3  }
\end{equation}


In particular,
\begin{array}{clll}
          & c_1 = l_1u_0        &  c_2 = l_2u_1       & c_3 = l_3u_2\\
d_0 = u_0 & d_1 = l_1a_0 + u_1  &  d_2 = l_2a_1 + u_2 & d_3 = l_3a_2 + u_3.\\
\end{array}



1. Write a python function `thomas(c,d,a)` which takes in input the diagonals   of a tridiagonal square matrix $A\in\mathbb{R}^{n\times n}$
  - $c:$  lower diagonal
  - $d:$  main diagonal
  - $a:$  upper diagonal
  
  and returns the diagonals of the lu decomposition $A = L\cdot U$
  - $l:$  lower diagonal of $L$
  - $u:$  upper diagonal of $U$

  by implementing the following *Thomas algorithm*.

  **Input:** $c$, $d$, $a$
  1. $u_0$ = $d_0$
  2. for $i=1,\dots,n-1$
    - $l_i$ = $c_i / u_{i-1}$
    - $u_i$ = $d_i - l_ia_{i-1}$
  **Output:** $l$, $u$

2. Test the function `thomas(c, d, a)` on the following tridiagonal matrices.

\begin{equation}
\begin{split}
(a)\ \pmatrix{1 & 4 & 0 & 0\\
             3 & 4 & 1 & 0\\
             0 & 2 & 3 & 4\\
             0 & 0 & 1 & 3}
             \\
             \\
(b)\ \pmatrix{2  & 1  & 0  & 0\\
             -1 & 2  & 1  & 0\\
             0  & -1 & 2  & 1\\
             0  & 0  & -1 & 2
            }
            \\
            \\
(c)\ \pmatrix{ 2 &  1 &  0 &  0 &  0 & 0\\
              -1 &  4 &  1 &  0 &  0 & 0\\
               0 & -1 &  4 &  1 &  0 & 0\\
               0 &  0 & -1 &  4 &  1 & 0\\
               0 &  0 &  0 & -1 &  4 & 1\\
               0 &  0 &  0 &  0 & -1 & 2}
\end{split}
\end{equation}
  - reconstruct the $L$ and $U$ matricies of their LU decomposition;
  - compute the recontruction error as $\texttt{norm}(A - L\cdot U)$.

Exercise 3.1 - Solution

In [None]:
def thomas(c, d, a):
    # TO DO
    n = len(d)
    u = np.ones(n)
    l = np.ones(n-1)
    u[0] = d[0]
    for i in range(1,n):
        iprv = i-1
        l[iprv] = c[iprv]/u[iprv]
        u[i] = d[i]-l[iprv]*a[iprv]
    return l, u


Exercise 3.1.a - Solution

In [None]:
A = np.array([[2,1,0,0], [3,4,1,0], [0,2,3,4], [0,0,1,3]])

d = np.diag(A) # main diagonal of A
c = np.diag(A, k=-1) # lower diagonal of A
a = np.diag(A, k=1) # upper diagonal of A

#l, u = thomas(lower,diag,upper)
l, u = thomas(c,d,a)

n = A.shape[0]
L = np.diag(np.ones(n),0) + np.diag(l,-1) # assemble
U = np.diag(u,0) + np.diag(a,1) # assemble

error = np.linalg.norm(A-L@U)

print(f'Reconstruction error: {error}')

Reconstruction error: 0.0


Exercise 3.1.b - Solution

In [None]:
A = np.array([[1,4,0,0], [-1,2,1,0], [0,-1,2,1], [0,0,-1,2]])


d = np.diag(A) # main diagonal of A
c = np.diag(A, k=-1) # lower diagonal of A
a = np.diag(A, k=1) # upper diagonal of A

#l, u = thomas(lower,diag,upper)
l, u = thomas(c,d,a)

n = A.shape[0]
L = np.diag(np.ones(n),0) + np.diag(l,-1) # assemble
U = np.diag(u,0) + np.diag(a,1) # assemble

error = np.linalg.norm(A-L@U)

print(f'Reconstruction error: {error}')

Reconstruction error: 2.220446049250313e-16


Exercise 3.1.c - Solution

In [None]:
dA = [2, 4, 4, 4, 4, 2]
uA = np.ones(5)
lA = -np.ones(5)
A = np.diag(lA,-1) + np.diag(dA,0) + np.diag(uA,1)

d = np.diag(A) # main diagonal of A
c = np.diag(A, k=-1) # lower diagonal of A
a = np.diag(A, k=1) # upper diagonal of A

#l, u = thomas(lower,diag,upper)
l, u = thomas(c,d,a)

n = A.shape[0]
L = np.diag(np.ones(n),0) + np.diag(l,-1) # assemble
U = np.diag(u,0) + np.diag(a,1) # assemble

error = np.linalg.norm(A-L@U)

print(f'Reconstruction error: {error}')

Reconstruction error: 6.280369834735101e-16
