# Transcript from lecture 6, January 31, 2023


In [1]:
import sys

########################################
# Change the string in the line below! #
########################################
sys.path.append("/Users/gilbert/Documents/CS111-2023-winter/Python") 

import os
import time
import math
import numpy as np
import numpy.linalg as npla
import scipy
from scipy import linalg as spla
import scipy.sparse
import scipy.sparse.linalg
from scipy import integrate
import networkx as nx
import cs111

##########################################################
# If this import for matplotlib doesn't work, try saying #
#   conda install -c conda-forge ipympl                  #
# at a shell prompt on your computer                     #
##########################################################
import matplotlib
%matplotlib ipympl

import matplotlib.pyplot as plt
from matplotlib import cm
from mpl_toolkits.mplot3d import axes3d




np.set_printoptions(precision = 4)

# How do we measure a vector?

## 2-norm of a vector (usually what we mean by just "norm")

In [2]:
v = np.array([3,1,4,1])
v

array([3, 1, 4, 1])

In [3]:
npla.norm(v)

5.196152422706632

In [4]:
v = np.random.random(10)
v

array([0.2193, 0.4278, 0.4038, 0.5769, 0.0731, 0.7723, 0.5373, 0.0264,
       0.6315, 0.0333])

In [5]:
npla.norm(v)

1.420578987965332

In [6]:
# Multiplying the vector by a constant multiplies the norm by the same constant

npla.norm(2 * v)

2.841157975930664

## Error and residual for Ax=b, relative residual norm

In [7]:
A = np.random.random((5,5))
A

array([[0.8669, 0.6571, 0.8257, 0.4616, 0.0156],
       [0.5542, 0.3993, 0.4998, 0.3752, 0.4391],
       [0.08  , 0.819 , 0.1834, 0.6962, 0.1927],
       [0.5588, 0.0655, 0.186 , 0.3699, 0.6209],
       [0.4263, 0.014 , 0.4808, 0.4472, 0.2524]])

In [8]:
# make a b for which the exact answer is x_exact = all ones
n = A.shape[0]
x_exact = np.ones(n)
print("x_exact:", x_exact)
print()

b = A @ x_exact
print("b:", b)

x_exact: [1. 1. 1. 1. 1.]

b: [2.8268 2.2677 1.9712 1.801  1.6208]


In [9]:
x, r = cs111.LUsolve(A,b)
print("x:", x)

x: [1. 1. 1. 1. 1.]


In [10]:
error = x - x_exact
print("error:", error)

error: [ 0.0000e+00 -3.3307e-16  4.4409e-16 -3.3307e-16  4.4409e-16]


In [11]:
npla.norm(error)

7.850462293418876e-16

In [12]:
# If we don't know the exact answer, we don't know the error!

b = np.random.random(n)
print("b:", b)
print()

x, rel_res = cs111.LUsolve(A,b)
print("x:", x)

b: [0.7666 0.4497 0.6469 0.3156 0.4935]

x: [ 0.1993  0.0773  0.1998  0.8269 -0.2316]


In [13]:
# But we can always compute the residual

residual = b - A@x
print("residual:", residual)
print()

print("residual norm:", npla.norm(b - A@x))
print()

print("relative residual norm:", npla.norm(b - A@x) / npla.norm(b))
print()


residual: [ 0.0000e+00  0.0000e+00 -1.1102e-16  0.0000e+00 -1.1102e-16]

residual norm: 1.5700924586837752e-16

relative residual norm: 1.260538933686803e-16



In [14]:
rel_res

1.260538933686803e-16

In [15]:
npla.norm(b)

1.2455723633157407

In [16]:
# relative residual norm is immune to scaling.
# suppose the coefficients of the equations are in light-years, and we want to change to kilometers

km_per_lightyear = 9.461 * 10**12
AA = km_per_lightyear * A
bb = km_per_lightyear * b


In [17]:
print('A:\n', A)
print()
print('AA:\n', AA)

A:
 [[0.8669 0.6571 0.8257 0.4616 0.0156]
 [0.5542 0.3993 0.4998 0.3752 0.4391]
 [0.08   0.819  0.1834 0.6962 0.1927]
 [0.5588 0.0655 0.186  0.3699 0.6209]
 [0.4263 0.014  0.4808 0.4472 0.2524]]

AA:
 [[8.2015e+12 6.2167e+12 7.8123e+12 4.3667e+12 1.4714e+11]
 [5.2434e+12 3.7780e+12 4.7290e+12 3.5494e+12 4.1547e+12]
 [7.5691e+11 7.7484e+12 1.7348e+12 6.5865e+12 1.8230e+12]
 [5.2867e+12 6.1930e+11 1.7598e+12 3.4992e+12 5.8739e+12]
 [4.0335e+12 1.3281e+11 4.5487e+12 4.2314e+12 2.3884e+12]]


In [18]:
xx, rr = cs111.LUsolve(AA, bb)
print("x:", x)
print()
print("xx:", xx)

x: [ 0.1993  0.0773  0.1998  0.8269 -0.2316]

xx: [ 0.1993  0.0773  0.1998  0.8269 -0.2316]


In [19]:
# residual depends on scaling, but relative residual norm does not

print("old residual:", b - A@x)
print("new residual:", bb - AA@xx)
print()

print("old residual norm:", npla.norm(b - A@x))
print("new residual norm:", npla.norm(bb - AA@xx))
print()

print("old relative residual norm:", npla.norm(b - A@x) / npla.norm(b))
print("new relative residual norm:", npla.norm(bb - AA@xx) / npla.norm(bb))
print()


old residual: [ 0.0000e+00  0.0000e+00 -1.1102e-16  0.0000e+00 -1.1102e-16]
new residual: [ 0.001   0.     -0.001   0.0015 -0.001 ]

old residual norm: 1.5700924586837752e-16
new residual norm: 0.002237585788552656

old relative residual norm: 1.260538933686803e-16
new relative residual norm: 1.8987758045373244e-16



In [20]:
npla.norm(xx - x)

1.384235421332246e-15

# Other vector norms

In [None]:
npla.norm?

In [21]:
v = np.array([3, -1, 4, 0, -5])
npla.norm(v)

7.14142842854285

In [22]:
print("v:", v)
print()
print("2-norm:", npla.norm(v, 2))
print()
print("1-norm:", npla.norm(v, 1))
print()
print("inf-norm:", npla.norm(v, np.inf))
print()
print("10-norm:", npla.norm(v, 10))
print()
print("0-norm:", npla.norm(v, 0))   # not an actual norm! number of nonzeros

v: [ 3 -1  4  0 -5]

2-norm: 7.14142842854285

1-norm: 13.0

inf-norm: 5.0

10-norm: 5.054008189891657

0-norm: 4.0


# How do we measure a matrix?  

## Norm of a matrix

The norm of A, written ||A||, is the maximum over nonzero vectors v of norm(A@v) / norm(v)

Each different vector norm (1-norm, 2-norm, inf-norm, etc.) gives a different matrix norm.

In [23]:
A = np.array([[2,-1,1],[1,0,1],[3,-1,4]])
A

array([[ 2, -1,  1],
       [ 1,  0,  1],
       [ 3, -1,  4]])

In [24]:
npla.norm(A, 2)

5.722926953325028

In [25]:
npla.norm(A, 1)

6.0

<b> The 1-norm of a matrix turns out to be the largest column sum (summing absolute values)

In [26]:
npla.norm(A, np.inf)

8.0

<b> The infinity norm of a matrix turns out to be the largest row sum (summing absolute values)

## Condition number of a matrix

The condition number of A is the ratio (max norm(A@v)/norm(v)) / (min norm(A@v)/norm(v)), where both the max and the min are taken over nonzero vectors v.

The condition number is always greater than or equal to 1. If the rank of A is less than the number of columns, the min will be 0 and the condition number is infinite.

If A is square and nonsingular, the condition number is equal to ||A|| times ||A inverse||.

There is a different condition number for each norm: 1-norm condition, 2-norm condition, etc.

In [27]:
A = np.eye(5)
A

array([[1., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0.],
       [0., 0., 1., 0., 0.],
       [0., 0., 0., 1., 0.],
       [0., 0., 0., 0., 1.]])

In [28]:
npla.cond(A, 2)

1.0

In [29]:
A = np.random.random((5,5))
A

array([[0.5454, 0.6728, 0.1683, 0.3746, 0.5042],
       [0.0117, 0.5132, 0.6499, 0.1639, 0.751 ],
       [0.7017, 0.1478, 0.4978, 0.9885, 0.2789],
       [0.2902, 0.87  , 0.9071, 0.8389, 0.181 ],
       [0.6083, 0.8403, 0.5703, 0.8069, 0.8857]])

In [30]:
npla.cond(A, 2)

31.63657837783012

In [31]:
npla.cond(A, 1)

50.38518493767685

In [32]:
npla.cond(A, np.inf)

49.979939234465014

In [33]:
# A singular matrix

A = np.array([[1, 4, 7], [2, 5, 8], [3, 6, 9]])
A

array([[1, 4, 7],
       [2, 5, 8],
       [3, 6, 9]])

In [34]:
npla.matrix_rank(A)

2

In [35]:
npla.cond(A,2)

2.0896350540350372e+16

In [36]:
A = np.diag([1,2,3,4,5,6,7,8])
A

array([[1, 0, 0, 0, 0, 0, 0, 0],
       [0, 2, 0, 0, 0, 0, 0, 0],
       [0, 0, 3, 0, 0, 0, 0, 0],
       [0, 0, 0, 4, 0, 0, 0, 0],
       [0, 0, 0, 0, 5, 0, 0, 0],
       [0, 0, 0, 0, 0, 6, 0, 0],
       [0, 0, 0, 0, 0, 0, 7, 0],
       [0, 0, 0, 0, 0, 0, 0, 8]])

In [37]:
npla.cond(A, 2)

8.0

## Scaling a matrix changes its norm but not its condition number

In [38]:
A = np.random.random((5,5))
A

array([[0.3214, 0.9221, 0.7692, 0.4058, 0.9547],
       [0.7688, 0.8088, 0.0226, 0.7941, 0.6597],
       [0.471 , 0.8402, 0.9426, 0.9259, 0.4444],
       [0.1784, 0.9218, 0.5213, 0.2868, 0.5152],
       [0.9525, 0.1588, 0.4676, 0.217 , 0.1261]])

In [39]:
npla.norm(A,2) 

2.980441996460057

In [40]:
npla.norm(10*A, 2)

29.804419964600566

In [41]:
npla.cond(A, 2)

12.643134105086201

In [42]:
npla.cond(10*A, 2)

12.643134105086203

## Rank of a matrix (for review, not run in class)

In [43]:
A = np.array([[1, 4, 7], [2, 5, 8], [3, 6, 9]])
A

array([[1, 4, 7],
       [2, 5, 8],
       [3, 6, 9]])

In [44]:
A[:,1]


array([4, 5, 6])

In [45]:
(A[:,0] + A[:,2]) / 2

array([4., 5., 6.])

In [46]:
# A has a null vector v:
v = np.array([-1/2, 1, -1/2])
v

array([-0.5,  1. , -0.5])

In [47]:
A @ v

array([0., 0., 0.])

In [48]:
npla.matrix_rank(A)

2

<b> A square matrix is singular (has a nonzero null vector) iff its rank is smaller than its dimension.

In [49]:
# LU factorization with partial pivoting doesn't work if the matrix is singular
L, U, p = cs111.LUfactor(A)

AssertionError: can't find nonzero pivot, matrix is singular

In [50]:
A.T

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [51]:
# The number of linearly independent rows is always 
# equal to the number of linearly independent columns.
# That is, the rank of A and A.T are the same

npla.matrix_rank(A.T)

2

In [52]:
# most (square) matrices have rank equal to their dimension
A = np.random.random((3,3))
A

array([[0.5573, 0.9289, 0.7066],
       [0.5012, 0.7082, 0.813 ],
       [0.2428, 0.8631, 0.6866]])

In [53]:
npla.matrix_rank(A)

3

In [54]:
A = A = np.array([[1, 2, 3], [2, 4, 6], [3, 6, 9]])
A

array([[1, 2, 3],
       [2, 4, 6],
       [3, 6, 9]])

In [55]:
npla.matrix_rank(A)

1

In [56]:
# The rank of a non-square matrix can't be more than its smaller dimension
A = np.random.random((3,7))
A

array([[0.1861, 0.0018, 0.7173, 0.1206, 0.6535, 0.9959, 0.7751],
       [0.5084, 0.1326, 0.9045, 0.6394, 0.7272, 0.5337, 0.42  ],
       [0.0924, 0.1987, 0.0682, 0.6303, 0.7711, 0.7174, 0.76  ]])

In [57]:
npla.matrix_rank(A)

3

In [58]:
A.T

array([[0.1861, 0.5084, 0.0924],
       [0.0018, 0.1326, 0.1987],
       [0.7173, 0.9045, 0.0682],
       [0.1206, 0.6394, 0.6303],
       [0.6535, 0.7272, 0.7711],
       [0.9959, 0.5337, 0.7174],
       [0.7751, 0.42  , 0.76  ]])

In [59]:
npla.matrix_rank(A.T)

3