# Transcript from Lecture, November 4, 2021

In [None]:
import sys

########################################
# Change the string in the line below! #
########################################
sys.path.append("/Users/gilbert/Documents/CS111-2021-fall/Python") 

import os
import time
import math
import numpy as np
import numpy.linalg as npla
import scipy
from scipy import linalg as spla
import scipy.sparse
import scipy.sparse.linalg
from scipy import integrate
import networkx as nx
import json
import cs111

##########################################################
# If this import for matplotlib doesn't work, try saying #
#   conda install -c conda-forge ipympl                  #
# at a shell prompt on your computer                     #
##########################################################
import matplotlib
%matplotlib ipympl

import matplotlib.pyplot as plt
from matplotlib import cm
from mpl_toolkits.mplot3d import axes3d




np.set_printoptions(precision = 4)

# Data fitting by least squares

## The surveyor problem

In [None]:
A = np.array([[1,0,0],[0,1,0],[0,0,1],[-1,1,0],[-1,0,1],[0,-1,1]])
b = np.array([1237,1941,2417,711,1177,475])
print('A:'); print(A)
print('\nb:', b)

In [None]:
x = npla.solve(A,b)

In [None]:
x, resid, rank, sv = npla.lstsq(A, b, rcond = None)
x

In [None]:
npla.lstsq?


In [None]:
x = npla.lstsq(A, b, rcond = None)[0]
x

In [None]:
b - A@x

In [None]:
relres = npla.norm(b - A@x)/npla.norm(b)
print('relres =', relres)

In [None]:
first_surveyor_x = np.array([1237,1941,2417])
relres = npla.norm(b - A @ first_surveyor_x)/npla.norm(b)
print("first surveyor's relres =", relres)

## Fitting a linear-regression line to data

<b>Warning: This data originated from a real source (SDSS), but it has been messed around with and should be thought of as only a toy example.

In [None]:
u = np.array([
24.41442,
25.67274,
26.02992,
23.00638,
24.86972,
21.96248,
21.87139,
24.93143,
22.90206,
22.749,
23.84269,
21.74025,
23.87719,
19.22808,
21.62093,
25.31738,
24.78468,
26.16757,
24.25187,
23.59213,
22.67077,
25.01685,
25.43069,
24.23741,
22.36275,
24.56208,
21.57241,
28.41815,
25.05081,
20.96467,
23.56018,
23.45984,
22.8903,
22.97415,
20.78101,
22.46072,
24.60077,
24.78432,
23.35197,
24.09856,
25.7874,
22.36821,
24.50825,
22.06679,
16.63657,
24.38522,
26.19138,
21.29271])

z = np.array([
21.37779,
21.2569,
19.88106,
20.76563,
23.18144,
19.76097,
18.34194,
21.97318,
18.26459,
20.63926,
21.69268,
18.20393,
20.19502,
15.4874,
21.864,
22.357,
22.36432,
20.08167,
20.08758,
20.00934,
19.9816,
21.85483,
19.38139,
17.90937,
16.79848,
22.54093,
18.07866,
27.90404,
21.32371,
17.1744,
18.08388,
22.01883,
19.4599,
22.5115,
16.82848,
21.34581,
20.29847,
22.91343,
19.40759,
21.99136,
22.56684,
19.13174,
22.60053,
20.24307,
13.87426,
19.74682,
21.74273,
17.74383])/100

In [None]:
u.shape

In [None]:
z.shape

In [None]:
%matplotlib inline
plt.figure()
plt.plot(u,z,'.')
plt.xlabel('magnitude u_i')
plt.ylabel('redshift z_i')
plt.title('Hubble diagram: magnitude and red shift of astronomical objects')

In [None]:
m = len(u)
A = np.ones((m,2))
A[:,1] = u
A.shape

In [None]:
A

In [None]:
x = npla.lstsq(A, z, rcond = None)[0]
x

In [None]:
np.linspace(15,30,num=4)

In [None]:
%matplotlib inline
plt.figure()
plt.plot(u,z,'.')
lineu = np.linspace(15,30,num=4)
linez = x[0] + x[1]*lineu
plt.plot(lineu,linez)
plt.xlabel('magnitude u')
plt.ylabel('red shift z')
plt.legend(('observations', 'linear fit'))
plt.title('Hubble diagram: magnitude and red shift of astronomical objects')

In [None]:
x

## SVD and least squares

In [None]:
# The surveyors again

A = np.array([[1,0,0],[0,1,0],[0,0,1],[-1,1,0],[-1,0,1],[0,-1,1]])
b = np.array([1237,1941,2417,711,1177,475])
print('A:'); print(A)
print()
print('b:', b)

In [None]:
U,sigma,Vt = npla.svd(A)

print('singular values:', sigma)
print()

S = np.zeros(A.shape)
for i, s in enumerate(sigma):
    S[i,i] = s
    
V = Vt.T

print('U:'); print(U)
print()
print('S:'); print(S)
print()
print('V:'); print(V)

In [None]:
print('A:'); print(A)
print()
print('U @ S @ V.T:'); print(U @ S @ V.T)


In [None]:
npla.norm(A - U@S@V.T)

The least squares problem is to find the $x$ that minimizes the 2-norm $||Ax-b||_2$. We can use SVD to solve this, because multiplication by an orthogonal matrix doesn't change the 2-norm of a vector.

We want to minimize 

$$||Ax-b|| = ||USV^Tx-b||.$$

Multiplying by the orthogonal matrix $U^T$, this is the same as minimizing 

$$||U^TUSV^Tx - U^Tb|| = ||SV^Tx-U^Tb||.$$

Define $y=V^Tx$. Then if we can somehow find the $y$ that minimizes

$$||Sy - U^Tb||,$$

we can compute $x = Vy$ and we'll be done.

In [None]:
print('b:', b)
print('U.T @ b:', U.T@b)

In [None]:
print('S:'); print(S)
print()
print('sigma:', sigma)

The bottom part of $S$ is zero. Therefore the bottom part of $Sy$ will be zero no matter what $y$ is, and the best we can do to minimize $||Sy - U^Tb||$ is to make the top part of $Sy$ match the top part of $U^Tb$.

That's easy because the top part of $S$ is diagonal.

In [None]:
print('first 3 elements of U.T @ b:', (U.T @ b)[:3])

In [None]:
y = (U.T @ b)[:3] / sigma

print('y:', y)

In [None]:
x = V@y

print('x:', x)

In [None]:
print('npla.lstsq x:', npla.lstsq(A, b, rcond=None)[0])

In [None]:
npla.norm(A@x - b)

In [None]:
npla.norm(S@y - U.T@b)

In [None]:
print('S@y:', S@y)
print()
print('U.T@b:', U.T@b)
print()
print('S@y - U.T@b:', S@y - U.T@b)

In [None]:
npla.norm((U.T@b)[3:])

# Eigenvalues and eigenvectors

If $w$ is a nonzero vector and $\lambda$ is a number and $Aw = \lambda w$, we say $w$ is an *eigenvector* of $A$ with *eigenvalue* $\lambda$. Notice that in this case any nonzero multiple of $w$ is also an eigenvector.

Every matrix has at least one eigenvalue/eigenvector, and an $n$-by-$n$ matrix has at most $n$ linearly independent eigenvectors.

In [None]:
spla.eig?

In [None]:
A = np.eye(3)
A

In [None]:
lam, W = spla.eig(A)
print('lam:', lam)
print('W:')
print(W)

In [None]:
A = np.diag([1,2,3])
A

In [None]:
lam, W = spla.eig(A)
print('lam:', lam)
print('W:')
print(W)

In [None]:
# An eigenvalue can be zero (but an eigenvector can't be the zero vector)
A[1,1] = 0
A

In [None]:
# An eigenvalue can be zero (but an eigenvector can't be the zero vector)
lam, W = spla.eig(A)
print('lam:', lam)
print('W:')
print(W)

In [None]:
A = np.array([[0,1,0,0], [0,0,1,0], [0,0,0,1], [1,0,0,0]])
A

In [None]:
lam, W = spla.eig(A)
print('lam:', lam)
print('W:')
print(W)

In [None]:
A = np.random.rand(4,4)
A

In [None]:
lam, W = spla.eig(A)
print('lam:', lam)
print('W:')
print(W)

In [None]:
i = 2
val = lam[i]
val

In [None]:
vec = W[:,i]
vec

In [None]:
npla.norm(vec)

In [None]:
val * vec

In [None]:
A @ vec

The eigenvalues of $A$ and $A^T$ are the same, though the eigenvectors aren't necessarily the same.

In [None]:
lam, W = spla.eig(A)
print('lam:', lam)
print('W:')
print(W)

In [None]:
lam, W = spla.eig(A.T)
print('lam:', lam)
print('W:')
print(W)

# Eigenvalues and eigenvectors of symmetric matrices

If $A$ is an $n$-by-$n$ symmetric matrix,
- All the eigenvalues of $A$ are real (no imaginary part)
- $A$ has $n$ linearly independent eigenvectors
- The eigenvectors can be chosen to be orthogonal to each other

Thus, $AW = WS$ holds where $W$ is an orthogonal matrix ($W^TW=I$) and $S$ is a square diagonal matrix. We can therefore write the eigenvalue equation as a matrix factorization:

$$A = WSW^T$$

We will write $S$ = diag($\lambda_0, \lambda_1, \ldots, \lambda_{n-1}$) with
$$\lambda_0 \le \lambda_1 \le \cdots \lambda_{n-1}.$$ 

(Unfortunately the standard convention is to order eigenvalues in increasing order and singular values in decreasing order. Yuck.)

We will write $w_i$ to mean column $i$ of $W$, so for all $0\le i < n$,
$$Aw_i = \lambda_i w_i$$


In [None]:
# Random symmetric matrix
A = np.random.randn(4,4)
A = A + A.T
A

In [None]:
lam, W = spla.eig(A)
print('lam:', lam)
print('W:')
print(W)

In [None]:
# Better! Use spla.eigh(A) not spla.eig(A) when A is symmetric
lam, W = spla.eigh(A)
print('lam:', lam)
print('W:')
print(W)

In [None]:
W.T @ W

In [None]:
S = np.diag(lam)
S

In [None]:
W @ S @ W.T

In [None]:
A

# Symmetric positive definite (SPD) and positive semidefinite (SPSD) matrices

A symmetric matrix $A$ is *positive definite* if all its eigenvalues are positive, 
so $0 < \lambda_0 \le \lambda_1 \le \cdots \lambda_{n-1}$.
<br>A symmetric matrix $A$ is positive definite if and only if $x^TAx>0$ for all nonzero vectors $x$.

A symmetric matrix $A$ is *positive semidefinite* if all its eigenvalues are nonnegative,
so $0 \le \lambda_0 \le \lambda_1 \le \cdots \lambda_{n-1}$.
<br>A symmetric matrix $A$ is positive semidefinite if and only if $x^TAx\ge 0$ for all nonzero vectors $x$.


In [None]:
# One way to create an SPD matrix...
A = np.random.randn(4,4)
A = A.T @ A
A

In [None]:
lam, W = spla.eigh(A)
print('lam:', lam)
print('W:')
print(W)

In [None]:
# Now make it semidefinite by shifting the eigenvalues by lambda_0
B = A - lam[0] * np.eye(4)
B

In [None]:
npla.matrix_rank(B)

In [None]:
B @ W[:,0]

In [None]:
lam, W = spla.eigh(A)
print('lam for A:', lam)
print('W for A:')
print(W)

In [None]:
lam, W = spla.eigh(B)
print('lam for B:', lam)
print('W for B:')
print(W)