# Data Fitting with Least Squares

In [69]:
import numpy as np
import numpy.linalg as la
import scipy.linalg as sla
import random
import matplotlib.pyplot as pt
%matplotlib inline

Suppose we are modeling a relationship between $x$ and $y$, and the "true" relationship is $y = a+bx$:

In [128]:
# Matrix shape
M = 10
N = 4
rankA = N-1

# Generating the orthogonal matrix U
X = np.random.rand(M,M)
U,R = sla.qr(X)

# Generating the orthogonal matrix V
Y = np.random.rand(N,N)
V,R = sla.qr(Y)
Vt = V.T

# Generating the diagonal matrix Sigma
singval = random.sample(range(1, 9), rankA)
singval.sort()
sigmavec = singval[::-1]
sigma = np.zeros((M,N))
for i,sing in enumerate(sigmavec):
    sigma[i,i] = sing

b = np.random.rand(M)

In [129]:
A = U@sigma@Vt
print(A.shape)
print(la.cond(A))

(10, 4)
2.84210776193e+16


In [131]:
print(U.shape)
print(sigma.shape)
print(V.shape)

UR = U[:,:N]
print(UR.shape)
print(sigmavec)

(10, 10)
(10, 4)
(4, 4)
(10, 4)
[8, 6, 3]


## Using normal equations (unique solution, full rank)

In [137]:
xu = la.solve(A.T@A,A.T@b)

In [138]:
la.norm(A@xu-b,2)

0.83375806371858807

In [139]:
la.norm(xu,2)

0.77674518213602828

## Using SVD

In [132]:
ub = (UR.T@b)
x = np.zeros(N)
for i,s in enumerate(sigmavec):
    x += V[:,i]*ub[i]/s
print(x)

[ 0.09604311  0.11533489  0.00223527 -0.02462985]


In [133]:
coeffs,residual,rank,sval=np.linalg.lstsq(A,b)
print(coeffs)

[ 0.09604311  0.11533489  0.00223527 -0.02462985]


In [135]:
la.norm(A@coeffs-b,2)

0.83375806371858796

In [140]:
la.norm(coeffs,2)

0.1521119376287009