# Transcript from Lecture, October 21, 2021


In [1]:
import sys

########################################
# Change the string in the line below! #
########################################
sys.path.append("/Users/gilbert/Documents/CS111-2021-fall/Python") 

import os
import time
import math
import numpy as np
import numpy.linalg as npla
import scipy
from scipy import linalg as spla
import scipy.sparse
import scipy.sparse.linalg
from scipy import integrate
import networkx as nx
import cs111

##########################################################
# If this import for matplotlib doesn't work, try saying #
#   conda install -c conda-forge ipympl                  #
# at a shell prompt on your computer                     #
##########################################################
import matplotlib
%matplotlib ipympl

import matplotlib.pyplot as plt
from matplotlib import cm
from mpl_toolkits.mplot3d import axes3d




np.set_printoptions(precision = 4)

# Vector dot products and perpendicular (orthogonal) vectors

In [2]:
x = np.array([3, 1, 4, -3])
y = np.array([1, 2, -1 , 1])

print('x:', x)
print('y:', y)

x: [ 3  1  4 -3]
y: [ 1  2 -1  1]


In [3]:
np.dot(x,y)

-2

In [4]:
x.dot(y)

-2

In [5]:
x.T @ y

-2

In [6]:
x @ y

-2

In [7]:
x.T @ x

35

In [8]:
npla.norm(x,2)

5.916079783099616

In [9]:
npla.norm(x,2) ** 2

35.0

<b> Two vectors x and y are *orthogonal* (perpendicular) if x.T @ y = 0

In [10]:
x

array([ 3,  1,  4, -3])

In [11]:
y = np.array([1, 4, -1 ,1])
y

array([ 1,  4, -1,  1])

In [12]:
x.T @ y

0

In [13]:
x.dot(y)

0

# Orthogonal matrices

<b> The square matrix A is *orthogonal* if A.T @ A = I, the identity matrix.

The inverse of an orthogonal matrix is its transpose.

Therefore, the transpose of an orthogonal matrix is also orthogonal.

In [14]:
Q = cs111.random_orthog(5)
Q

array([[-0.1318, -0.0603,  0.1377,  0.168 , -0.9653],
       [-0.4285, -0.4479,  0.6719, -0.3889,  0.1146],
       [ 0.0199, -0.0388, -0.4708, -0.8543, -0.2162],
       [ 0.2809,  0.7492,  0.5175, -0.2967, -0.0629],
       [-0.8484,  0.4827, -0.2004,  0.0521,  0.0662]])

In [15]:
Q.T @ Q

array([[ 1.0000e+00, -3.5621e-17,  3.1938e-18,  2.2611e-17, -1.2806e-16],
       [-3.5621e-17,  1.0000e+00, -1.2555e-16, -2.7800e-17,  2.5981e-17],
       [ 3.1938e-18, -1.2555e-16,  1.0000e+00, -1.7606e-16,  5.6584e-17],
       [ 2.2611e-17, -2.7800e-17, -1.7606e-16,  1.0000e+00,  1.0450e-16],
       [-1.2806e-16,  2.5981e-17,  5.6584e-17,  1.0450e-16,  1.0000e+00]])

In [16]:
I = np.eye(5)
npla.norm(Q.T @ Q - I, 2)

4.860336650511679e-16

<b>Every column of an orthogonal matrix is a unit vector (a vector of length one)

In [17]:
v = Q[:, 2]
npla.norm(v, 2)

1.0

<b> Any two different columns of an orthogonal matrix are perpendicular

In [18]:
w = Q[:,1]
v.T @ w

-1.6653345369377348e-16

<b>The identity matrix is orthogonal

In [19]:
I = np.eye(5)
I

array([[1., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0.],
       [0., 0., 1., 0., 0.],
       [0., 0., 0., 1., 0.],
       [0., 0., 0., 0., 1.]])

In [20]:
I.T @ I

array([[1., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0.],
       [0., 0., 1., 0., 0.],
       [0., 0., 0., 1., 0.],
       [0., 0., 0., 0., 1.]])

<b>Permutation matrices are orthogonal

In [21]:
P = np.array([[0,0,1,0,0], [1,0,0,0,0], [0,1,0,0,0], [0,0,0,0,1], [0,0,0,1,0]])
P

array([[0, 0, 1, 0, 0],
       [1, 0, 0, 0, 0],
       [0, 1, 0, 0, 0],
       [0, 0, 0, 0, 1],
       [0, 0, 0, 1, 0]])

In [22]:
P.T

array([[0, 1, 0, 0, 0],
       [0, 0, 1, 0, 0],
       [1, 0, 0, 0, 0],
       [0, 0, 0, 0, 1],
       [0, 0, 0, 1, 0]])

In [23]:
P @ P.T

array([[1, 0, 0, 0, 0],
       [0, 1, 0, 0, 0],
       [0, 0, 1, 0, 0],
       [0, 0, 0, 1, 0],
       [0, 0, 0, 0, 1]])

# An orthogonal matrix doesn't change the length of a vector

In [24]:
Q = cs111.random_orthog(5)
Q

array([[-0.4333,  0.5729, -0.06  , -0.6124,  0.3246],
       [-0.3281, -0.0774,  0.2859,  0.5193,  0.7314],
       [ 0.3694,  0.6295, -0.5091,  0.4403,  0.1188],
       [ 0.6571, -0.2655, -0.0641, -0.4013,  0.5767],
       [ 0.3692,  0.4461,  0.8071,  0.0163, -0.1143]])

In [25]:
v = np.random.random(5)
print('v:', v, 'norm(v):', npla.norm(v,2))

v: [0.024  0.302  0.1023 0.5889 0.6101] norm(v): 0.9062581426645598


In [26]:
w = Q @ v
print('w:', w, 'norm(w):', npla.norm(w,2))

w: [-0.0061  0.7501  0.4787  0.0445  0.166 ] norm(w): 0.90625814266456


<b> Therefore, the 2-norm and 2-condition number of an orthogonal matrix are both 1

In [27]:
Q = cs111.random_orthog(5)
Q

array([[-6.8900e-01,  5.6960e-04,  5.2960e-02,  7.1808e-01, -8.2626e-02],
       [ 6.1519e-01, -2.2365e-01, -3.7504e-01,  5.8352e-01, -3.0062e-01],
       [ 2.5805e-01,  8.9345e-01,  1.2024e-01,  2.6401e-01,  2.2582e-01],
       [-1.1437e-01, -6.1882e-02, -6.8721e-01,  2.3191e-02,  7.1435e-01],
       [ 2.5913e-01, -3.8457e-01,  6.0813e-01,  2.7133e-01,  5.8439e-01]])

In [28]:
Q.T @ Q

array([[ 1.0000e+00, -5.5756e-17,  6.4418e-18,  1.1844e-16,  3.6138e-18],
       [-5.5756e-17,  1.0000e+00,  8.5063e-17, -1.9757e-17,  1.0588e-16],
       [ 6.4418e-18,  8.5063e-17,  1.0000e+00,  2.4923e-16,  4.5388e-17],
       [ 1.1844e-16, -1.9757e-17,  2.4923e-16,  1.0000e+00, -2.1472e-16],
       [ 3.6138e-18,  1.0588e-16,  4.5388e-17, -2.1472e-16,  1.0000e+00]])

In [29]:
npla.norm(Q, 2)

1.0000000000000002

In [30]:
npla.cond(Q, 2)

1.0000000000000004

In [31]:
# Only the 2-norm, not the others!
npla.norm(Q, 1)

1.9357411196466003

# Solving a linear system with an orthogonal matrix

In [32]:
Q = cs111.random_orthog(5)
Q

array([[-3.3364e-01,  2.4122e-01,  2.2682e-01, -2.7007e-01,  8.4031e-01],
       [-5.1772e-01, -4.8411e-01,  6.4336e-01, -9.8660e-02, -2.7196e-01],
       [-5.3261e-01,  3.4827e-01, -6.8566e-02,  7.6692e-01, -4.6458e-02],
       [-2.4671e-01,  6.8138e-01,  3.9407e-02, -5.0551e-01, -4.6665e-01],
       [-5.2546e-01, -3.4911e-01, -7.2691e-01, -2.7133e-01,  5.8566e-04]])

In [33]:
x_exact = np.ones(5)
b = Q @ x_exact
print('x_exact:', x_exact)
print('b:      ', b)

x_exact: [1. 1. 1. 1. 1.]
b:       [ 0.7046 -0.7291  0.4676 -0.4981 -1.8722]


In [34]:
npla.norm(x_exact) - npla.norm(b)

0.0

In [35]:
x = Q.T @ b
x

array([1., 1., 1., 1., 1.])

In [36]:
npla.norm(x_exact) - npla.norm(x)

-4.440892098500626e-16

In [37]:
error = x_exact - x
residual = b - Q @ x

print("relative residual norm:", npla.norm(residual,2)/npla.norm(b,2))
print("relative error norm:   ", npla.norm(error,2)/npla.norm(x_exact,2))


relative residual norm: 4.1540741810552243e-16
relative error norm:    3.6485651666928703e-16


<b> Why not exactly equal? Wait for floating-point arithmetic lecture!


# Singular value decomposition: SVD

The **singular value decomposition** of an arbitrary $m$-by-$n$ matrix $A$ is

$$A = USV^T,$$

where $U$ is an $m$-by-$m$ orthogonal matrix ($U$'s columns $u_0$, $u_1$, $\ldots$, $u_{m-1}$ all have length 1 and are perpendicular to each other, so that $U^TU=I$); and $V$ is an $n$-by-$n$ orthogonal matrix ($V^TV=I$, where now the $v_i$ are columns of $V$ and thus _rows_ of $V^T$); and $S$ is an $m$-by-$n$ diagonal matrix, whose diagonal elements are $\sigma_0 \ge \sigma_1 \ge \ldots \ge \sigma_{\min(m,n)-1}\ge 0$. The $\sigma_i$'s are called the _singular values_ of $A$.

In [38]:
# A small example

V = np.array([[12/13, 5/13], [-5/13, 12/13]])
print('V:\n',V)
print()

U = np.array([[4/5, -3/5], [3/5, 4/5]])
print('U:\n',U)
print()

sigma =[5,1]
print('sigma:', sigma)
print()

S = np.diag(sigma)
print('S:\n', S)
print()

A = U @ S @ V.T
print('A:\n', A)

V:
 [[ 0.9231  0.3846]
 [-0.3846  0.9231]]

U:
 [[ 0.8 -0.6]
 [ 0.6  0.8]]

sigma: [5, 1]

S:
 [[5 0]
 [0 1]]

A:
 [[ 3.4615 -2.0923]
 [ 3.0769 -0.4154]]


In [39]:
U @ U.T

array([[ 1.0000e+00, -2.6645e-17],
       [-2.6645e-17,  1.0000e+00]])

In [40]:
V @ V.T

array([[1.0000e+00, 2.6277e-18],
       [2.6277e-18, 1.0000e+00]])

In [41]:
A @ V

array([[ 4. , -0.6],
       [ 3. ,  0.8]])

In [42]:
U @ S

array([[ 4. , -0.6],
       [ 3. ,  0.8]])

<b> SVD of a random 8-by-5 matrix

In [43]:
A = np.random.rand(8,5)
A

array([[0.7049, 0.6859, 0.9308, 0.4466, 0.2988],
       [0.4622, 0.7432, 0.2746, 0.856 , 0.1049],
       [0.4704, 0.8586, 0.2772, 0.972 , 0.242 ],
       [0.568 , 0.4868, 0.4398, 0.7512, 0.885 ],
       [0.8545, 0.0575, 0.9277, 0.0608, 0.3459],
       [0.1266, 0.0078, 0.8828, 0.9928, 0.9801],
       [0.4576, 0.6528, 0.0456, 0.8258, 0.6001],
       [0.8746, 0.204 , 0.9731, 0.6723, 0.6738]])

The scipy routine spla.svd(A) returns the three elements of the SVD, but not exactly in the form $A=USV^T$. Instead, spla.svd() returns the matrix $U$, then a vector of the $\min(m-1, n-1)$ singular values, then the matrix $V^T$ (not $V$). 

In [None]:
spla.svd?

<b>Use spla.svd() to compute the SVD and then form the actual matrices $U$, $S$, and $V$.

In [44]:
print('shape of A:', A.shape)
print()

U,sigma,Vt = spla.svd(A)

print('shape of U:', U.shape)
print('sigma:', sigma)
print('shape of Vt:', Vt.shape)

shape of A: (8, 5)

shape of U: (8, 8)
sigma: [3.6524 1.3681 1.0468 0.5127 0.2728]
shape of Vt: (5, 5)


In [45]:
S = np.zeros(A.shape)
for i in range(len(sigma)):
    S[i,i] = sigma[i]
V = Vt.T

print('\nU:\n', U)
print('\nS:\n', S)
print('\nV:\n', V)


U:
 [[-0.3694 -0.1335  0.4401 -0.2874  0.684  -0.0514  0.15    0.2767]
 [-0.3008  0.3895  0.2374 -0.2978 -0.3429 -0.671   0.0516 -0.2056]
 [-0.3461  0.4585  0.1726 -0.2269 -0.1258  0.6605 -0.3461 -0.1297]
 [-0.3832  0.0675 -0.2167  0.5297  0.3247 -0.2713 -0.5706 -0.128 ]
 [-0.2745 -0.5503  0.3495  0.19   -0.1678  0.1328  0.1455 -0.6297]
 [-0.3893 -0.2002 -0.7339 -0.4562  0.0702  0.0314  0.1574 -0.176 ]
 [-0.314   0.3923 -0.1079  0.4926  0.0168  0.1359  0.6857  0.0654]
 [-0.4254 -0.3446  0.0305  0.0989 -0.5099  0.0116 -0.1152  0.6451]]

S:
 [[3.6524 0.     0.     0.     0.    ]
 [0.     1.3681 0.     0.     0.    ]
 [0.     0.     1.0468 0.     0.    ]
 [0.     0.     0.     0.5127 0.    ]
 [0.     0.     0.     0.     0.2728]
 [0.     0.     0.     0.     0.    ]
 [0.     0.     0.     0.     0.    ]
 [0.     0.     0.     0.     0.    ]]

V:
 [[-0.4324 -0.2028  0.536   0.5275 -0.4543]
 [-0.348   0.568   0.45   -0.0121  0.5946]
 [-0.4702 -0.6325  0.1226 -0.5598  0.2247]
 [-0.5463  0.4

In [46]:
print('norm of difference between U.T @ U     and I:', npla.norm(U.T @ U - np.eye(A.shape[0])))
print('norm of difference between V.T @ V     and I:', npla.norm(V.T @ V - np.eye(A.shape[1])))
print('norm of difference between U @ S @ V.T and A:', npla.norm(U @ S @ V.T - A) )

norm of difference between U.T @ U     and I: 2.3033550112261085e-15
norm of difference between V.T @ V     and I: 1.3724469393924201e-15
norm of difference between U @ S @ V.T and A: 3.3163122064917368e-15


In [47]:
fig0 = plt.figure()
plt.plot(sigma,'.')
plt.yscale('log')
plt.title('Singular values on log scale')
plt.ylabel('singular value')
plt.xlabel('index')
print()

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …


