# Linear least-squares regression

- This notebook follows course notes `least-squares.pdf`

## Polynomial model

- We now use a polynomial model to fit the data

In [None]:
# setup python
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
%config InlineBackend.figure_format = 'svg'
from cvxpy import *

In [None]:
# plot properties
# use to create figure for tex
plt.rc('text', usetex=True)
plt.rc('font', family='serif')
font = {'family' : 'normal',
        'weight' : 'normal',
        'size'   : 16}
plt.rc('font', **font)

In [None]:
# create a polynomial from roots
p = np.poly1d([1,-6,11,-6])

In [None]:
# generate some data
n_points = 20
np.random.seed(0)
x = np.linspace(.5,3.5,n_points)
y = p(x) + .4*np.random.randn(n_points)
y_true = p(x)

In [None]:
# plot data and underlying true function
plt.figure()
plt.plot(x,y,'o',label='$y(x_i)$')
plt.plot(x,y_true,label='$f_\mathrm{true}(x)$')
plt.xlabel('$x$: independent variable')
plt.ylabel('$y$: response variable')
plt.gcf().subplots_adjust(bottom=0.15,left=0.15)
plt.legend()

In [None]:
# define function that uses cvxpy to solve the least-squares problem using a polynomial model

def cvxpy_poly_fit(x,y,degree):
    # construct data matrix
    A = np.vander(x,degree+1)
    b = y
    p_cvx = Variable(degree+1)
    # set up optimization problem
    objective = Minimize(sum_squares(A*p_cvx - b))
    constraints = []
    # solve the problem
    prob = Problem(objective,constraints)
    prob.solve()
    # return the polynomial coefficients
    return np.array(p_cvx.value)
    

In [None]:
# fit the polynomial models of degrees 1 to 3 using cvxpy
c1 = cvxpy_poly_fit(x,y,1)
c2 = cvxpy_poly_fit(x,y,2)
c3 = cvxpy_poly_fit(x,y,3)

p1 = np.poly1d(c1.flatten())
p2 = np.poly1d(c2.flatten())
p3 = np.poly1d(c3.flatten())

In [None]:
# plot
y1 = p1(x)
y2 = p2(x)
y3 = p3(x)

In [None]:
# plot linear (p=1) fit
plt.figure()
plt.plot(x,y,'o')
plt.plot(x,y1,label='linear')
plt.legend(loc=4,fontsize=10)
plt.xlabel('$x$: independent variable')
plt.ylabel('$y$: response variable')
plt.gcf().subplots_adjust(bottom=0.15,left=0.15)

In [None]:
# plot quadratic (p=2) fit
plt.figure()
plt.plot(x,y,'o')
plt.plot(x,y1,label='linear')
plt.plot(x,y2,label='degree-2')
plt.legend(loc=4,fontsize=10)
plt.xlabel('$x$: independent variable')
plt.ylabel('$y$: response variable')
plt.gcf().subplots_adjust(bottom=0.15,left=0.15)

In [None]:
# plot cubic (p=3) fit
plt.figure()
plt.plot(x,y,'o')
plt.plot(x,y1,label='linear')
plt.plot(x,y2,label='degree-2')
plt.plot(x,y3,label='degree-3')
plt.legend(loc=4,fontsize=10)
plt.xlabel('$x$: independent variable')
plt.ylabel('$y$: response variable')
plt.gcf().subplots_adjust(bottom=0.15,left=0.15)

In [None]:
# plot all
plt.figure()
plt.plot(x,y,'o')
plt.plot(x,y1,label='linear')
plt.plot(x,y2,label='degree-2')
plt.plot(x,y3,label='degree-3')
plt.plot(x,y_true,label='true')
plt.legend(loc=4,fontsize=10)
plt.xlabel('$x$: independent variable')
plt.ylabel('$y$: response variable')
plt.gcf().subplots_adjust(bottom=0.15,left=0.15)