### Methods for non-linear least-squares with manifold constraint using tangent space:
- Gauss-newton
- Levenberg-Marquardt

### Examples for non-linear least-squares on manifolds:
- Rotation in 2D (SO(2))
- Rotation in 3D (SO(3))

In [1]:
import autograd.numpy as np
from autograd import grad as auto_grad, jacobian as auto_jacobian
import scipy

import matplotlib.pyplot as plt
import seaborn

In [3]:
def solve_square_with_qr(A, b):
    """Solve system of equations with square (real values) matrix A: A * x = b"""""
    Q, R = np.linalg.qr(A)
    x = scipy.linalg.solve_triangular(R, Q.T @ b)
    return x


def solve_symm_with_chol(A, b):
    """Solve system of equations with symmetric (hermitian) matrix A: A * x = b"""""
    c, lower = scipy.linalg.cho_factor(A)
    x = scipy.linalg.cho_solve((c, lower), b)
    return x

In [293]:
np.random.seed(41)

"""Example for optimization on SO(2) manifold"""

def skew_symmetric_3(x):
    return np.array([[    0, -x[2],  x[1]],
                     [ x[2],     0, -x[0]],
                     [-x[1],  x[0],     0]])

# Create test data (point rotated by 25 degree)
p0 = np.array([1., 0.])[..., np.newaxis]
angle0 = 25. * np.pi / 180.
R_true = np.array([[np.cos(angle0), -np.sin(angle0)],
                   [np.sin(angle0),  np.cos(angle0)]])
p1 = np.dot(R_true, p0)
# Introduce error so that non-manifold optimization leads to wrong result
p1[1] += 0.1

n0 = 4
def residuals_fn0(x):
    x = np.reshape(x, (2, 2))
    p = np.dot(x, p0)
    y = p - p1
    return y.ravel()

residuals_jacobian_fn0 = auto_jacobian(residuals_fn0)

tangent_space_size_so2 = 1

def exp_map_so2(eps):
    R = np.array([[np.cos(eps[0]), -np.sin(eps[0])],
                  [np.sin(eps[0]),  np.cos(eps[0])]])
    return R

def tangent_update_so2(x, eps):
    R = exp_map_so2(eps)
    x = np.reshape(x, (2, 2))
    x = np.dot(R, x)
    x = np.ravel(x)
    return x

def residuals_fn0_tangent_so2(x, eps):
    R = exp_map_so2(eps)
    x = np.reshape(x, (2, 2))
    x = np.dot(R, x)
    return residuals_fn0(x)

tangent_residuals_jacobian_fn0 = auto_jacobian(residuals_fn0_tangent_so2, 1)
# print(tangent_residuals_jacobian_fn0(R_true.ravel(), np.array([0.])))

def tangent_residuals_jacobian_fn0(x, eps):
    """Define jacobian by hand"""
    assert np.all(eps == 0)
    x = np.reshape(x, (2, 2))
    p = np.dot(x, p0)
    q = np.array([[0., -1.],
                  [1.,  0.]])
    return np.dot(q, p)

# print(tangent_residuals_jacobian_fn0(R_true.ravel(), np.array([0.])))


"""Example for optimization on SO(3) manifold"""

# Create test data (point rotated by 25 degree)
p0_3d = np.array([0.6, 0.2, 0.3])[..., np.newaxis]
angle0_3d = 25. * np.pi / 180.
R_true_3d = np.array([[np.cos(angle0_3d), -np.sin(angle0_3d), 0.],
                   [np.sin(angle0_3d),  np.cos(angle0_3d), 0.],
                   [0.,              0.,             1.]])
p1_3d = np.dot(R_true_3d, p0_3d)
# Introduce error so that non-manifold optimization leads to wrong result
p1_3d[1] += 0.05

n0 = 9
def residuals_fn1(x):
    x = np.reshape(x, (3, 3))
    p = np.dot(x, p0_3d)
    y = p - p1_3d
    return y.ravel()

residuals_jacobian_fn1 = auto_jacobian(residuals_fn1)

tangent_space_size_so3 = 3

def exp_map_so3(eps):
    omega = skew_symmetric_3(eps)
    omega_sq = np.dot(omega, omega)
    theta = np.linalg.norm(eps)
    if theta < 1e-3:
        R = np.eye(3) + omega + 0.5 * omega_sq
    else:
        R = np.eye(3) + omega * (np.sin(theta) / theta) + omega_sq * ((1 - np.cos(theta)) / theta**2)
    return R

def tangent_update_so3(x, eps):
    R = exp_map_so3(eps)
    x = np.reshape(x, (3, 3))
    x = np.dot(R, x)
    x = np.ravel(x)
    return x

def residuals_fn1_tangent_so3(x, eps):
    R = exp_map_so3(eps)
    x = np.reshape(x, (3, 3))
    x = np.dot(R, x)
    return residuals_fn1(x)

tangent_residuals_jacobian_fn1 = auto_jacobian(residuals_fn1_tangent_so3, 1)
#print(tangent_residuals_jacobian_fn1(R_true_3d.ravel(), np.array([0., 0., 0.])))

def tangent_residuals_jacobian_fn1(x, eps):
    """Define jacobian by hand"""
    assert np.all(eps == 0)
    x = np.reshape(x, (3, 3))
    p = np.dot(x, p0_3d).ravel()
    return - skew_symmetric_3(p)

#print(tangent_residuals_jacobian_fn1(R_true_3d.ravel(), np.array([0., 0., 0.])))


In [294]:
def min_gauss_newton(residuals_fn, residuals_jacobian_fn, x0, obj_tol=1e-12, max_iterations=10**6):
    x = np.array(x0)
    iteration = 0
    step_size = 1
    dobj = float("inf")
    residuals = residuals_fn(x)
    obj_value = np.sum(residuals**2)
    while np.abs(dobj) > obj_tol and iteration < max_iterations:
        grad = residuals_jacobian_fn(x)
        pseudo_inv = np.linalg.pinv(grad.T @ grad)
        step_dir = (pseudo_inv @ grad.T) @ residuals
        prev_obj_value = obj_value
        x = x - step_size * step_dir
        residuals = residuals_fn(x)
        obj_value = np.sum(residuals**2)
        dobj = obj_value - prev_obj_value
        iteration += 1
    info = {
        "obj_value": obj_value,
        "dobj": dobj,
        "obj_tol": obj_tol,
        "iteration": iteration,
        "max_iterations": max_iterations,
        "grad": grad,
        "step_dir": step_dir,
        "step_size": step_size,
    }
    return x, info


In [302]:
def min_levenberg_marquardt(residuals_fn, residuals_jacobian_fn, x0, lambda0=1., v=2.,
                            obj_tol=1e-12, max_iterations=10**6):
    x = np.array(x0)
    lambda_ = lambda0
    iteration = 0
    dobj = float("inf")
    residuals = residuals_fn(x)
    obj_value = np.sum(residuals**2)
    while np.abs(dobj) > obj_tol and iteration < max_iterations:
        prev_obj_value = obj_value
        grad = residuals_jacobian_fn(x)
        grad_grad = grad.T @ grad
        while True:  # Should max iteration criterion here
            lambda1 = lambda_
            lambda2 = lambda_ / v
            H1 = grad_grad + lambda1 * np.diag(np.diag(grad_grad))
            H2 = grad_grad + lambda2 * np.diag(np.diag(grad_grad))
            pseudo_inv1 = np.linalg.pinv(H1)
            pseudo_inv2 = np.linalg.pinv(H2)
            step1 = (pseudo_inv1 @ grad.T) @ residuals
            step2 = (pseudo_inv2 @ grad.T) @ residuals
            x_next1 = x - step1
            x_next2 = x - step2
            residuals1 = residuals_fn(x_next1)
            residuals2 = residuals_fn(x_next2)
            obj_value1 = np.sum(residuals1**2)
            obj_value2 = np.sum(residuals2**2)
            dobj1 = obj_value1 - prev_obj_value
            dobj2 = obj_value2 - prev_obj_value
            if dobj1 > 0 and dobj2 > 0:
                lambda_ *= v
            elif dobj2 < 0:
                lambda_ = lambda2
                x = x_next2
                step = step2
                residuals = residuals2
                obj_value = obj_value2
                dobj = dobj2
                break
            else:
                x = x_next1
                step = step1
                residuals = residuals1
                obj_value = obj_value1
                dobj = dobj1
                break
        iteration += 1
    info = {
        "obj_value": obj_value,
        "dobj": dobj,
        "obj_tol": obj_tol,
        "iteration": iteration,
        "max_iterations": max_iterations,
        "grad": grad,
        "step": step,
    }
    return x, info


In [303]:
def min_gauss_newton_tangent_space(residuals_fn,
                                   tangent_update_fn,
                                   tangent_jacobian_fn,
                                   tangent_space_size,
                                   x0, obj_tol=1e-12, max_iterations=10**6):
    eps0 = np.zeros((tangent_space_size,))
    x = np.array(x0)
    iteration = 0
    step_size = 1
    dobj = float("inf")
    residuals = residuals_fn(x)
    obj_value = np.sum(residuals**2)
    while np.abs(dobj) > obj_tol and iteration < max_iterations:
        grad = tangent_jacobian_fn(x, eps0)
        pseudo_inv = np.linalg.pinv(grad.T @ grad)
        step_dir = (pseudo_inv @ grad.T) @ residuals
        prev_obj_value = obj_value
        x = tangent_update_fn(x, - step_size * step_dir)
        residuals = residuals_fn(x)
        obj_value = np.sum(residuals**2)
        dobj = obj_value - prev_obj_value
        iteration += 1
    info = {
        "obj_value": obj_value,
        "dobj": dobj,
        "obj_tol": obj_tol,
        "iteration": iteration,
        "max_iterations": max_iterations,
        "grad": grad,
        "step_dir": step_dir,
        "step_size": step_size,
    }
    return x, info


In [310]:
def min_levenberg_marquardt_tangent_space(residuals_fn,
                                          tangent_update_fn,
                                          tangent_jacobian_fn,
                                          tangent_space_size,
                                          x0, lambda0=1., v=2.,
                                          obj_tol=1e-12, max_iterations=10**6):
    eps0 = np.zeros((tangent_space_size,))
    x = np.array(x0)
    lambda_ = lambda0
    iteration = 0
    dobj = float("inf")
    residuals = residuals_fn(x)
    obj_value = np.sum(residuals**2)
    while np.abs(dobj) > obj_tol and iteration < max_iterations:
        prev_obj_value = obj_value
        grad = tangent_jacobian_fn(x, eps0)
        grad_grad = grad.T @ grad
        while True:  # Should max iteration criterion here
            lambda1 = lambda_
            lambda2 = lambda_ / v
            H1 = grad_grad + lambda1 * np.diag(np.diag(grad_grad))
            H2 = grad_grad + lambda2 * np.diag(np.diag(grad_grad))
            pseudo_inv1 = np.linalg.pinv(H1)
            pseudo_inv2 = np.linalg.pinv(H2)
            step1 = (pseudo_inv1 @ grad.T) @ residuals
            step2 = (pseudo_inv2 @ grad.T) @ residuals
            x_next1 = tangent_update_fn(x, - step1)
            x_next2 = tangent_update_fn(x, - step2)
            residuals1 = residuals_fn(x_next1)
            residuals2 = residuals_fn(x_next2)
            obj_value1 = np.sum(residuals1**2)
            obj_value2 = np.sum(residuals2**2)
            dobj1 = obj_value1 - prev_obj_value
            dobj2 = obj_value2 - prev_obj_value
            if dobj1 > 0 and dobj2 > 0:
                lambda_ *= v
            elif dobj2 < 0:
                lambda_ = lambda2
                x = x_next2
                step = step2
                residuals = residuals2
                obj_value = obj_value2
                dobj = dobj2
                break
            else:
                x = x_next1
                step = step1
                residuals = residuals1
                obj_value = obj_value1
                dobj = dobj1
                break
        iteration += 1
    info = {
        "obj_value": obj_value,
        "dobj": dobj,
        "obj_tol": obj_tol,
        "iteration": iteration,
        "max_iterations": max_iterations,
        "grad": grad,
        "step": step,
    }
    return x, info


In [311]:
"""Minimize function #0 with gauss newton without manifold constraint"""

x0 = np.eye(2).ravel()
# x0 = np.zeros((2, 2)).ravel()

max_iterations = 100
x_min, info = min_gauss_newton(residuals_fn0, residuals_jacobian_fn0, x0, max_iterations=max_iterations)
x_min = np.reshape(x_min, (2, 2))
print("minimum x: {}".format(x_min))
print("objective value: {}".format(info["obj_value"]))

p = np.dot(x_min, p0)
err = np.sum((p - p1)**2)
print("error: {}".format(err))

print("determinant of R_true: {}".format(np.linalg.det(R_true)))
print("determinant of x_min: {}".format(np.linalg.det(x_min)))

minimum x: [[0.90630779 0.        ]
 [0.52261826 1.        ]]
objective value: 0.0
error: 0.0
determinant of R_true: 0.9999999999999999
determinant of x_min: 0.9063077870366499


In [312]:
"""Minimize function #0 with gauss newton with manifold constraint (using tangent space)"""

x0 = np.eye(2).ravel()

max_iterations = 100
# print(residuals_fn4(x0).shape)
# print(residuals_jacobian_fn4(x0).shape)
x_min, info = min_gauss_newton_tangent_space(
    residuals_fn0,
    tangent_update_so2,
    tangent_residuals_jacobian_fn0,
    tangent_space_size_so2,
    x0, max_iterations=max_iterations)
x_min = np.reshape(x_min, (2, 2))
print("minimum x: {}".format(x_min))
print("objective value: {}".format(info["obj_value"]))

p = np.dot(x_min, p0)
err = np.sum((p - p1)**2)
print("error: {}".format(err))

print("determinant of R_true: {}".format(np.linalg.det(R_true)))
print("determinant of x_min: {}".format(np.linalg.det(x_min)))

minimum x: [[ 0.86628965 -0.49954204]
 [ 0.49954204  0.86628965]]
objective value: 0.0021339636446826
error: 0.0021339636446826
determinant of R_true: 0.9999999999999999
determinant of x_min: 1.0000000000000002


In [317]:
"""Minimize function #0 with levenberg marquardt with manifold constraint (using tangent space)"""

x0 = np.eye(2).ravel()

max_iterations = 100
# print(residuals_fn4(x0).shape)
# print(residuals_jacobian_fn4(x0).shape)
x_min, info = min_levenberg_marquardt_tangent_space(
    residuals_fn0,
    tangent_update_so2,
    tangent_residuals_jacobian_fn0,
    tangent_space_size_so2,
    x0, max_iterations=max_iterations)
x_min = np.reshape(x_min, (2, 2))
print("minimum x: {}".format(x_min))
print("objective value: {}".format(info["obj_value"]))

p = np.dot(x_min, p0)
err = np.sum((p - p1)**2)
print("error: {}".format(err))

print("determinant of R_true: {}".format(np.linalg.det(R_true)))
print("determinant of x_min: {}".format(np.linalg.det(x_min)))

minimum x: [[ 0.86628968 -0.49954199]
 [ 0.49954199  0.86628968]]
objective value: 0.00213396364468073
error: 0.00213396364468073
determinant of R_true: 0.9999999999999999
determinant of x_min: 1.0


In [318]:
"""Minimize function #1 with gauss newton without manifold constraint"""

x0 = np.eye(3).ravel()
# x0 = np.zeros((3, 3)).ravel()

max_iterations = 100
x_min, info = min_gauss_newton(residuals_fn1, residuals_jacobian_fn1, x0, max_iterations=max_iterations)
x_min = np.reshape(x_min, (3, 3))
print("minimum x: {}".format(x_min))
print("objective value: {}".format(info["obj_value"]))

p = np.dot(x_min, p0_3d)
err = np.sum((p - p1_3d)**2)
print("error: {}".format(err))

print("determinant of R_true_3d: {}".format(np.linalg.det(R_true_3d)))
print("determinant of x_min: {}".format(np.linalg.det(x_min)))

minimum x: [[ 0.82766655 -0.05744448 -0.08616672]
 [ 0.34877451  1.11625817  0.17438725]
 [ 0.          0.          1.        ]]
objective value: 0.0
error: 0.0
determinant of R_true_3d: 0.9999999999999999
determinant of x_min: 0.9439247241115509


In [319]:
"""Minimize function #1 with gauss newton with manifold constraint (using tangent space)"""

x0 = np.eye(3).ravel()

max_iterations = 100
# print(residuals_fn4(x0).shape)
# print(residuals_jacobian_fn4(x0).shape)
x_min, info = min_gauss_newton_tangent_space(
    residuals_fn1,
    tangent_update_so3,
    tangent_residuals_jacobian_fn1,
    tangent_space_size_so3,
    x0, max_iterations=max_iterations)
x_min = np.reshape(x_min, (3, 3))
print("minimum x: {}".format(x_min))
print("objective value: {}".format(info["obj_value"]))

p = np.dot(x_min, p0_3d)
err = np.sum((p - p1_3d)**2)
print("error: {}".format(err))

print("determinant of R_true_3d: {}".format(np.linalg.det(R_true_3d)))
print("determinant of x_min: {}".format(np.linalg.det(x_min)))

minimum x: [[ 0.91713928 -0.38118256 -0.11642761]
 [ 0.39562361  0.90609388  0.14991946]
 [ 0.04834766 -0.18355854  0.98181911]]
objective value: 0.0010309731255731854
error: 0.0010309731255731854
determinant of R_true_3d: 0.9999999999999999
determinant of x_min: 1.000000000000001


In [321]:
"""Minimize function #1 with levenberg marquardt newton with manifold constraint (using tangent space)"""

x0 = np.eye(3).ravel()

max_iterations = 100
# print(residuals_fn4(x0).shape)
# print(residuals_jacobian_fn4(x0).shape)
x_min, info = min_levenberg_marquardt_tangent_space(
    residuals_fn1,
    tangent_update_so3,
    tangent_residuals_jacobian_fn1,
    tangent_space_size_so3,
    x0, max_iterations=max_iterations)
x_min = np.reshape(x_min, (3, 3))
print("minimum x: {}".format(x_min))
print("objective value: {}".format(info["obj_value"]))

p = np.dot(x_min, p0_3d)
err = np.sum((p - p1_3d)**2)
print("error: {}".format(err))

print("determinant of R_true_3d: {}".format(np.linalg.det(R_true_3d)))
print("determinant of x_min: {}".format(np.linalg.det(x_min)))

minimum x: [[ 0.93298448 -0.29580328 -0.20503749]
 [ 0.34294071  0.9035157   0.257004  ]
 [ 0.10923197 -0.31009645  0.94440911]]
objective value: 0.0010309731255715919
error: 0.0010309731255715919
determinant of R_true_3d: 0.9999999999999999
determinant of x_min: 1.0000000000000615
