# Least Square Method

### We'll start off by importing what we need

**NOTE:** You can `import cs111` module. See the website for the latest version.

In [None]:
import numpy as np
import numpy.linalg as npla

import matplotlib.pyplot as plt
from matplotlib import cm
from mpl_toolkits.mplot3d import axes3d
#%matplotlib tk

In [None]:
# Can we use npla.solve() on LSQ problems?

A = np.array([[1,0,0], [0,1,0], [0,0,1], [-1,1,0], [-1,0,1], [0,-1,1]])
b = np.array([1237, 1941, 2417, 711, 1177, 475])

x = npla.solve(A, b)
print("x :", x)

# ANSWER IS: NO!!! It can "break down" Jupyter Notebook too (so you'd have to close the program and re-open it)

# LSQ: Line (1st order) fit

### Simple example

In [None]:
y = np.array([2.9, 2.7, 4.8, 5.3, 7.1, 7.6, 7.7, 8.0, 9.4])
t = np.array(range(9))

# Note that the vectors y and t have the same number of elements:
print(len(t), len(y))
print('t:',t)
print('y:',y)

In [None]:
%matplotlib inline
plt.figure()
plt.plot(t, y, '.')

### Another example

In [None]:
y = np.array([2.9, 2.7, 4.8, 5.3, 7.1, 7.6, 7.7, 7.6, 9.4, 9.0, 9.6,10.0, 10.2, 9.7])
t = np.array(range(14))

# Note that the vectors y and t have the same number of elements:
print(len(t), len(y))
print('t:',t)
print('y:',y)

In [None]:
%matplotlib inline
plt.figure()
plt.plot(t, y, '*')

## We want to create matrix A to reflect this 1st-order set of equations
### i.e. set of $y_{n}$ = $x_{0}$ + $t_{1}$.$x_{1}$

In [None]:
m = len(y)
# Create matrix A that is all 1s, with dims m rows and 2 cols
A = np.ones((m,2))
#print(A)

A[:, 1] = t
print(A)

## We have A and we have y, so let's solve for x in Ax = y
**Since this is an OVERDETERMINED system, we cannot solve using the typical npla.solve() solver**

***Instead, we have to use the npla.lstsq() method***

In [None]:
# LSTSQ [0] returns the x-vector

x = npla.lstsq(A, y, rcond = None)[0]
print(x)

# Note: x[0] and x[1] are the coefficients for the straight line
# i.e. y(t) = x[0] + x[1].t

print("x0: ",x[0],'\nx1: ',x[1])

In [None]:
# Can we duplicate using QR factorization?

Q,R = npla.qr(A)
x_qr = npla.inv(R)@Q.T@y
print(x_qr)

**How far away is EACH data point (i.e. in y) from the line determined by LSQ method?**

***Best determined with a residual calculation:***

The **residual vector** will show the difference of EACH point from the "ideal" line and
the **relative residual value** acts as an "average" of sorts of all these point differences.

BOTH the residual vector AND the relative residual value are useful metrics.

In [None]:
# Ax = b is the least-square approximation
# y - Ax is the residue

r = y - A @ x
print('The residual vector r:\n', r, '\n')
print('The relative residual norm:\n', npla.norm(r)/npla.norm(y))

## Notes on the `numpy` function `linspace()`

### If we want to plot the "ideal" line that was determined by the LSQ method, then we need to create an x-axis of evenly spaced numbers. The function `linspace()` can do this:

In [None]:
# np.linspace(a,b,c) - Returns evenly spaced numbers over a specified interval.
# This example returns 10 points evenly space between 0 and 13.

np.linspace(0, 13, num = 10)

In [None]:
# This example returns N points evenly space between START and END.

START = 0
END = 13
N = 10

np.linspace(START, END, num = N)

In [None]:
%matplotlib inline
plt.figure()
plt.plot(t, y, '.')

polyt = np.linspace(0, 13, num = 100)
polyy = x[0] + x[1]*polyt 

plt.plot(polyt, polyy)
plt.xlabel('time')
plt.ylabel('height')
plt.legend(('observations', 'linear fit'))

# LSQ: Parabolic (2nd order) fit

In [None]:
t = np.array(range(21))/2
y = np.array([2.9, 2.7, 4.8, 5.3, 7.1, 7.6, 7.7, 7.6, 9.4, 9.0, 9.6, 
              10.0, 10.2, 9.7, 8.3, 8.4, 9.0, 8.3, 6.6, 6.7, 4.1])
print('t:',t)
print('y:',y)

%matplotlib inline
plt.figure()
plt.plot(t,y,'.')

## We want to create matrix A to reflect this 2nd-order set of equations
### i.e. set of $y_{n}$ = $x_{0}$ + $t_{1}$.$x_{1}$ + $t_{2}^{2}$.$x_{2}$

*Step 1: Set up matrix A:*

In [None]:
m = len(y)
A = np.ones((m,3))

# What-if you wanted to fit this to a cubic curve???
#A = np.ones((m,4))

A[:,1] = t
A[:,2] = t**2

print(A)

*Step 2: find x by using the npla.lstsq() function.*

*Step 3: calculate the relative residual norm.*

In [None]:
x = npla.lstsq(A, y, rcond = None)[0]
print(x)

r = y - A @ x
print('\nThe residual vector r:\n', r, '\n')
print('The relative residual norm:\n', npla.norm(r)/npla.norm(y))

*Step 4: Plot the data points and overlay a plot of the model:*

In [None]:
%matplotlib inline
plt.figure()
plt.plot(t,y,'.')

polyt = np.linspace(0,10,num=100)
polyy = x[0] + x[1]*polyt + x[2]*polyt**2

# What-if you wanted to fit this to a cubic curve???
#polyy = x[0] + x[1]*polyt + x[2]*polyt**2 + x[3]*polyt**3

plt.plot(polyt,polyy)
plt.xlabel('time')
plt.ylabel('height')
plt.legend(('observations', 'parabola fit'))