$\newcommand{\vect}[2]{[#1, #2]^T}$

## Q2.2 ##

### a) ###

Gradients, derived from the previous problem set:

$ \nabla f_1 = \vect{2}{3} $ 

$ \nabla f_2 = \vect{2x - y}{2y - x} $

$ \nabla f_3 = \vect{cos(y - 5) - (y - 5)cos(x - 5)}{(5 - x)sin(y - 5) - sin(x - 5)} $ 

Hessians:

1. $\nabla^2f_1 = 
    \begin{bmatrix} 
      0 & 0 \\
      0 & 0
    \end{bmatrix}
   $
2. $\nabla^2f_2 = 
    \begin{bmatrix} 
      2 & -1 \\
      -1 & 2
    \end{bmatrix}
   $
3. $\nabla^2f_3 = 
    \begin{bmatrix} 
      (y - 5)sin(x - 5) & -sin(y - 5) - cos(x - 5) \\
      -sin(y - 5) - cos(x - 5) & (5 - x)cos(y - 5)
    \end{bmatrix}
   $

### b) ### 

Some setup code.

In [13]:
import numpy as np
import plotly.offline as py
import plotly.graph_objs as go
py.offline.init_notebook_mode(connected=True)

# Common function used for plotting.
def plot_f(f, filename='default-plot', title='default-plot', 
           plot_contour=True,
           vecs_to_plot=None):
    """
    @param plot_contour: If True, creates a contour plot of the given function. 
                         Else creates a surface plot.
    @param vecs_to_plot: A list of 2D vectors to plot. 
                         Each element should be a ((x0, y0), (x1, y1)) tuple.
    """
    n_x, n_y = 50, 50
    x_1d, y_1d = np.linspace(-2, 3.5, n_x), np.linspace(-2, 3.5, n_y)
    x, y = np.meshgrid(x_1d, y_1d)
    
    shapes_list = []
    if vecs_to_plot is not None:
        for vec in vecs_to_plot:
            elm = {
                'type': 'line',
                'x0': vec[0][0],
                'y0': vec[0][1],
                'x1': vec[1][0],
                'y1': vec[1][1],
                'opacity': 0.7,
                'line': {
                    'color': 'white',
                    'width': 2.5,
                }
            }
            shapes_list.append(elm)

    layout = go.Layout(
        title=title,
        margin=go.layout.Margin(
            l=10,
            r=10,
            b=25,
            t=50
        ),
        shapes=shapes_list
    )
    
    if plot_contour:
        assert not isinstance(f, list)
        data = [go.Contour(z=f(x, y), x=x_1d, y=y_1d)]
    else:
        if not isinstance(f, list):
            f = [f]
        data = [go.Surface(z=func(x, y), x=x_1d, y=y_1d) for func in f]
    fig = go.Figure(data=data, layout=layout)
    py.iplot(fig, filename=filename)

#### Plots for $f_1$ ####

Note that the plots for (x, y) at any point will look identical, i.e the approximations for the function are exactly the same. This is because the function is linear and can be perfectly described with just the second term in the Taylor series expansion.

Plots for (x, y) = (1, 0):

In [18]:
def f1(x, y):
    return 2 * x + 3 * y + 1

def f1_tangent_0(x, y):
    return 2 * x + 3 * y + 1

# Cheating a bit here, but they are the same thing -- Hessian is 0.
def f1_quadratic_0(x, y):
    return 2 * x + 3 * y + 1

# Setup.
orig = [1, 0]
scaling = 0.3
f1_grad = scaling * np.array([2, 3])
f1_tang = scaling * np.array([-3, 2])
f1_plot_vecs = [[orig, orig + f1_grad], [orig, orig + f1_tang]]
func_str = 'f1'

# Plot.
plot_f(f1, '2.2-' + func_str, title=func_str, vecs_to_plot=f1_plot_vecs)
plot_f([f1, f1_tangent_0], '2.2-' + func_str, title=func_str + '-tangent', plot_contour=False)
plot_f([f1, f1_quadratic_0], '2.2-' + func_str, title=func_str + '-quadratic', plot_contour=False)

#### Plots for $f_2$ ####

Setup code:

In [47]:
def f2(x, y):
    return x ** 2 + y ** 2 - x * y - 5

def f2_grad_func(x, y):
    return np.array([2 * x - y, 2 * y - x])

def f2_hessian_func(x, y):
    top_left = 2
    top_right = -1
    bot_left = -1
    bot_right = 2
    return np.array([
        [top_left, top_right],
        [bot_left, bot_right]
    ])

def f2_tangent(x0, y0):
    grad = f2_grad_func(x0, y0)
    orig_vec = np.array([x0, y0])[:, None, None]
    offset = f2(x0, y0)
    def _tangent(x, y):
        vec = np.array([x, y])
        diff = vec - orig_vec
        return grad[0] * diff[0] + grad[1] * diff[1] + offset
    return _tangent

def f2_quadratic(x0, y0):
    hessian = f2_hessian_func(x0, y0)
    orig_vec = np.array([x0, y0])[:, None, None]
    tangent = f2_tangent(x0, y0)
    def _quadratic(x, y):
        vec = np.array([x, y])
        diff = vec - orig_vec
        prev_shape = diff.shape
        diff = np.reshape(diff, (2, -1))
        vals = np.sum(diff * np.matmul(hessian, diff), axis=0)
        vals = np.reshape(vals, prev_shape[1:])
        return 0.5 * vals + tangent(x, y)
    return _quadratic

Plots for (x, y) = (1, 0):

In [48]:
x0, y0 = 1, 0
f2_grad = scaling * f2_grad_func(x0, y0)
f2_tang = np.array([-f2_grad[1], f2_grad[0]])
f2_plot_vecs = [[orig, orig + f2_grad], [orig, orig + f2_tang]]

func_str = 'f2-1-0'
plot_f(f2, '2.2-' + func_str, title=func_str, vecs_to_plot=f2_plot_vecs)
plot_f([f2, f2_tangent(x0, y0)], '2.2-' + func_str, title=func_str + '-tangent', plot_contour=False)
plot_f([f2, f2_quadratic(x0, y0)], '2.2-' + func_str, title=func_str + '-quadratic', plot_contour=False)

(50, 50)


Plots for (x, y) = (-0.7, 2):

In [49]:
x0, y0 = -0.7, 2
f2_grad = scaling * f2_grad_func(x0, y0)
f2_tang = np.array([-f2_grad[1], f2_grad[0]])
f2_plot_vecs = [[orig, orig + f2_grad], [orig, orig + f2_tang]]

func_str = 'f2-1-0'
plot_f(f2, '2.2-' + func_str, title=func_str, vecs_to_plot=f2_plot_vecs)
plot_f([f2, f2_tangent(x0, y0)], '2.2-' + func_str, title=func_str + '-tangent', plot_contour=False)
plot_f([f2, f2_quadratic(x0, y0)], '2.2-' + func_str, title=func_str + '-quadratic', plot_contour=False)

(50, 50)


Plots for (x, y) = (2.5,-1):

In [50]:
x0, y0 = 2.5, -1
f2_grad = scaling * f2_grad_func(x0, y0)
f2_tang = np.array([-f2_grad[1], f2_grad[0]])
f2_plot_vecs = [[orig, orig + f2_grad], [orig, orig + f2_tang]]

func_str = 'f2-2.5--1'
plot_f(f2, '2.2-' + func_str, title=func_str, vecs_to_plot=f2_plot_vecs)
plot_f([f2, f2_tangent(x0, y0)], '2.2-' + func_str, title=func_str + '-tangent', plot_contour=False)
plot_f([f2, f2_quadratic(x0, y0)], '2.2-' + func_str, title=func_str + '-quadratic', plot_contour=False)

(50, 50)


#### Plots for $f_3$ ####

Setup code:

In [55]:
def f3(x, y):
    return (x - 5) * np.cos(y - 5) - (y - 5) * np.sin(x - 5)

def f3_grad_func(x, y):
    x_grad = np.cos(y - 5) - (y - 5) * np.cos(x - 5)
    y_grad = (5 - x) * np.sin(y - 5) - np.sin(x - 5)
    return np.array([x_grad, y_grad])
    
def f3_hessian_func(x, y):
    top_left = (y - 5) * np.sin(x - 5)
    top_right = -np.sin(y - 5) - np.cos(x - 5)
    bot_left = -np.sin(y - 5) - np.cos(x - 5)
    bot_right = (5 - x) * np.cos(y - 5)
    return np.array([
        [top_left, top_right],
        [bot_left, bot_right]
    ])

def f3_tangent(x0, y0):
    grad = f3_grad_func(x0, y0)
    orig_vec = np.array([x0, y0])[:, None, None]
    offset = f3(x0, y0)
    def _tangent(x, y):
        vec = np.array([x, y])
        diff = vec - orig_vec
        return grad[0] * diff[0] + grad[1] * diff[1] + offset
    return _tangent

def f3_quadratic(x0, y0):
    hessian = f3_hessian_func(x0, y0)
    orig_vec = np.array([x0, y0])[:, None, None]
    tangent = f3_tangent(x0, y0)
    def _quadratic(x, y):
        vec = np.array([x, y])
        diff = vec - orig_vec
        prev_shape = diff.shape
        diff = np.reshape(diff, (2, -1))
        vals = np.sum(diff * np.matmul(hessian, diff), axis=0)
        vals = np.reshape(vals, prev_shape[1:])
        return 0.5 * vals + tangent(x, y)
    return _quadratic

Plots for (x, y) = (1, 0):

In [56]:
x0, y0 = 1, 0
f3_grad = scaling * f3_grad_func(x0, y0)
f3_tang = np.array([-f3_grad[1], f3_grad[0]])
f3_plot_vecs = [[orig, orig + f3_grad], [orig, orig + f3_tang]]

func_str = 'f3-1-0'
plot_f(f3, '2.2-' + func_str, title=func_str, vecs_to_plot=f3_plot_vecs)
plot_f([f3, f3_tangent(x0, y0)], '2.2-' + func_str, title=func_str + '-tangent', plot_contour=False)
plot_f([f3, f3_quadratic(x0, y0)], '2.2-' + func_str, title=func_str + '-quadratic', plot_contour=False)

(50, 50)


Plots for (x, y) = (-0.7, 2):

In [57]:
x0, y0 = -0.7, 2
f3_grad = scaling * f3_grad_func(x0, y0)
f3_tang = np.array([-f3_grad[1], f3_grad[0]])
f3_plot_vecs = [[orig, orig + f3_grad], [orig, orig + f3_tang]]

func_str = 'f3--0.7-2'
plot_f(f3, '2.2-' + func_str, title=func_str, vecs_to_plot=f3_plot_vecs)
plot_f([f3, f3_tangent(x0, y0)], '2.2-' + func_str, title=func_str + '-tangent', plot_contour=False)
plot_f([f3, f3_quadratic(x0, y0)], '2.2-' + func_str, title=func_str + '-quadratic', plot_contour=False)

(50, 50)


Plots for (x, y) = (2.5, -1):

In [59]:
x0, y0 = 2.5, -1
f3_grad = scaling * f3_grad_func(x0, y0)
f3_tang = np.array([-f3_grad[1], f3_grad[0]])
f3_plot_vecs = [[orig, orig + f3_grad], [orig, orig + f3_tang]]

func_str = 'f3-2.5--1'
plot_f(f3, '2.2-' + func_str, title=func_str, vecs_to_plot=f3_plot_vecs)
plot_f([f3, f3_tangent(x0, y0)], '2.2-' + func_str, title=func_str + '-tangent', plot_contour=False)
plot_f([f3, f3_quadratic(x0, y0)], '2.2-' + func_str, title=func_str + '-quadratic', plot_contour=False)

(50, 50)


### d) ###

$f_1$ is linear and can be perfectly approximated with just a first-order Taylor approximation.

$f_2$ is quadratic and can be perfectly approximated with a second-order Taylor approximation. The first-order tangent is not very good on its own.

$f_3$ is more complicated and not approximated well even using the quadratic term when far away enough from the approximation center. For example, the approximation at $(x, y) = (2.5, -1)$ is very inaccurate. The reason for this is that the convexity change, which is not captured since we have no third-order term in the Taylor series approximation, is substantial; as seen in the figure above, the quadratic surface actually "lifts away" from the surface of the true function (although locally it fits well enough). 

A similar reason, i.e the lack of a quadratic term in the linear approximation, is why the linear approximations cannot keep with the functions $f_2$ and $f_3$.