# Continuous Optimisation HW1

In [None]:
# Imports
import numpy as np
import scipy
import matplotlib.pyplot as plt
from matplotlib import cm
from matplotlib.ticker import LinearLocator
from tqdm import tqdm
from scipy.io import loadmat


data = loadmat('data.mat')
print(data.keys())
sigma = float(data['sigma'])

'''
K = data['K']
P = data['P']
X0 = data['X0']
d = data['d']
delta_0 = data['delta_0']
delta_bar = data['delta_bar']
n = data['n']
sigma = data['sigma']
y = data['y']
'''

sigmasquared = sigma**2


# Question 1
Implement phi(x, P), bigphi(X, P), f(X, P, y).

In [None]:
# Question 1
#TODO Vectorise all the functions


def h(x: np.ndarray) -> float:
    """
    Gaussian filter

    :x: np.ndarray[(1, 2)]
    :returns: float
    """
    return np.e**(-np.inner(x, x)/sigmasquared)  # Always take sigma = 0.1


def phi(x, P):
    """
    Calculate contribution of each 'true' star to observed image
    
    :x: np.ndarray[(1, 2)]
    :P: np.ndarray[(2, n**2)]
    :returns: np.ndarray[(n**2, 1)]
    """
    global K, n
    _phi = np.zeros((n**2, 1))
    for i in range(n**2):
        _phi[i] = (h(P[:, i] - x))
    return _phi


def bigphi(X, P):
    """
    Calculate image observed, based on K-star positions X
    
    :X: np.ndarray[(2, K)]
    :P: np.ndarray[(2, n**2)]
    :returns: np.ndarray[(n**2, 1)]
    """
    global K, n
    bigphi = np.zeros((n**2, 1))
    for i in range(K):
        bigphi += phi(X[:, i], P)
    return bigphi


def  f(X, P, y):
    """
    Calculate squared error of estimate bigphi(X)
    
    :X: np.ndarray[(2, K)]
    :P: np.ndarray[(2, n**2)]
    :y: np.ndarray[(n**2, 1)]
    :returns: float
    """
    global K, n
    return (1/(2*n**2)) * np.linalg.norm(bigphi(X, P)-y)**2

# Question 2

We see that $f$ is not convex. There are clear local maxima which cannot occur if $f$ were convex.

In [None]:
K = 1
n = 2
true_positions = np.array([[0], [0]])
positions = np.array([[0.5, 0.5], [-0.5, 0.5], [-0.5, -0.5], [0.5, -0.5]]).T
y = np.array([[0], [0], [1], [0]])
fig = plt.figure(figsize=(12, 12))
ax = fig.add_subplot(projection='3d')

x_vals = np.linspace(-1, 1, 100)
y_vals = np.linspace(-1, 1, 100)
x_vals, y_vals = np.meshgrid(x_vals, y_vals)
grid = np.array([x_vals, y_vals]).reshape((2, 100**2))
z_vals = np.zeros((1, 10000))
for i in range(10000):
    z_vals[0, i] = f(grid[:, i].reshape((2, 1)), positions, y)
z_vals = z_vals.reshape((100, 100))
ax.scatter(x_vals, y_vals, z_vals)
plt.show()
plt.show()

# Question 3

In [None]:
#computes value of cell i_0 of jacobian of phi
def d_phi_i_0(x,p_i):
    return (2/sigmasquared)*(p_i[0]-x[0])*np.e**((-1/sigmasquared)*np.inner(p_i-x,p_i-x))
#computes value of cell i_1 of jacobian of phi
def d_phi_i_1(x,p_i):
    return (2/sigmasquared)*(p_i[1]-x[1])*np.e**((-1/sigmasquared)*np.inner(p_i-x,p_i-x))

#compute jacobion of small phi
def d_phi(x,P):
    d_phi = np.zeros((n**2,2))
    for i in range(n**2):
        P_i = np.array([P[0][i],P[1][i]])
        d_phi[i][0] = d_phi_i_0(x,P_i)
        d_phi[i][1] = d_phi_i_1(x,P_i)
    return d_phi


#compute jacobian of big phi
def d_big_phi(X,P):
    d_big_phi = np.zeros((n**2,2*K))
    for i in range(K):
        d_big_phi[0:n**2,2*i:2*i+1] = d_phi(X[2*i:2*i+1].T,P)
    return d_big_phi

#compute jacobian of f
def d_f(X,P):
    return (1/n**2)*(bigphi(X,P)-y).T@d_big_phi(X,P)

#compute gradient of f
def grad_f(X,P):
    return d_f(X,P).T

In [None]:
#test out calculating f
K = int(data['K'])
P = data['P']
X0 = data['X0']
d = int(data['d'])
delta_0 = data['delta_0']
delta_bar = data['delta_bar']
n = int(data['n'])
sigma = float(data['sigma'])
y = data['y'].flatten(order='F')

#check that the gradient is correct using
# f(x+tv)= f(x) + t<v,grad_f(x)> + O(t^2)

# Generate a random point and a random direction
theta = np.random.uniform(-0.5, 0.5, (d, K))
v = np.random.uniform(-0.5, 0.5, (d, K))
v = v / np.linalg.norm(v)

f(theta,P,y)

In [None]:
#checking gradient is correct numerically
K = int(data['K'])
P = data['P']
X0 = data['X0']
d = int(data['d'])
delta_0 = data['delta_0']
delta_bar = data['delta_bar']
n = int(data['n'])
sigma = float(data['sigma'])
y = data['y'].flatten(order='F')

#check that the gradient is correct using
# f(x+tv)= f(x) + t<v,grad_f(x)> + O(t^2)

# Generate a random point and a random direction
theta = np.random.uniform(-0.5, 0.5, (d, K))
v = np.random.uniform(-0.5, 0.5, (d, K))
v = v / np.linalg.norm(v)

## Check the gradient 
def checkgradient(f,grad_f, theta,v):
    #logspace of t values
    t=np.logspace(-8, 0, num=5)
    #intialise error to 0
    error = np.zeros_like(t)
    #pre-calculae f_lambda and f_lambda_grad to use in for loop
    f_lambda = f(theta,P,y)
    f_lambda_grad = grad_f(theta,P)
    #compute the error at each t
    for i in tqdm(range(5)):
        error[i] = np.abs( f(theta+(t[i]*v),P,y)-f_lambda-(t[i]*v.T@f_lambda_grad) )
    #plot the graph of error vs t
    plt.loglog(t,error)
    plt.xlabel('t (log scale)')
    plt.ylabel('Error (log scale)')
    plt.title('Plot of t vs Error')
    plt.grid()
    plt.show
checkgradient(f,grad_f,theta,v)