# Minimize L1 loss between closest points

Load data

In [1]:
import numpy as np

loaded = np.load('frames.npz')
rgb_all = loaded['rgb'] / 255.0
depth_all = loaded['depth'] / 65535.0

print(rgb_all.shape)
print(depth_all.shape)

(34, 288, 513, 3)
(34, 288, 513)


Generate point clouds

In [31]:
from plyfile import PlyData, PlyElement

def downsample_point_cloud(points, attrs):

    bmax = np.amax(points, 0)
    bmin = np.amin(points, 0)
    bbox_size = bmax - bmin
    size = np.prod(bbox_size)**(1/3)

    voxel_size = np.fmax((60*bbox_size/size+0.5).astype(np.int32), 1)
    # print(voxel_size)
    pids = -1 * np.ones(voxel_size, dtype=np.int32)
    pdists = np.inf * np.ones(voxel_size, dtype=np.float32)

    for i, p in enumerate(points):
        g = (p-bmin)/(bmax-bmin) * voxel_size
        g = np.fmin(g, voxel_size-0.001)
        gi = g.astype(np.int32)
        gd = np.linalg.norm(g-gi-0.5)
        if gd < pdists[gi[0], gi[1], gi[2]]:
            pids[gi[0], gi[1], gi[2]] = i
            pdists[gi[0], gi[1], gi[2]] = gd

    attrs1 = []
    points1 = []
    for p0, p1, p2 in zip(*np.where(pids >= 0)):
        i = pids[p0, p1, p2]
        attrs1.append(attrs[i])
        points1.append(points[i])
    return np.array(points1), np.array(attrs1)


def write_ply(points, rgbs, file_path):
    n = len(rgbs)
    vertex = np.zeros(n, dtype=[('x', 'f4'), ('y', 'f4'), ('z', 'f4'), 
                                ('red', 'u1'), ('green', 'u1'), ('blue', 'u1')])
    vertex['x'] = points[:,0]
    vertex['y'] = points[:,1]
    vertex['z'] = points[:,2]
    vertex['red'] = rgbs[:,0]
    vertex['green'] = rgbs[:,1]
    vertex['blue'] = rgbs[:,2]
    vertex_element = PlyElement.describe(vertex, 'vertex')
    ply_data = PlyData([vertex_element])
    ply_data.write(file_path)


def generate_point_cloud(rgb, depth):
    grad_x = depth[2:,1:-1]-depth[:-2,1:-1]
    grad_y = depth[1:-1,2:]-depth[1:-1,:-2]
    grad = np.hypot(grad_x, grad_y).flatten()
    rgb = rgb[1:-1, 1:-1]
    depth = depth[1:-1, 1:-1]
    h, w = depth.shape
    depth = depth.flatten()
    rgb = rgb.reshape((-1, 3))
    x = np.tile(np.arange(w), h) / (w*h)**0.5
    y = (h-np.repeat(np.arange(h), w)) / (w*h)**0.5
    z = depth
    p = np.array([x*2-1, y*2-1, z*2]).T
    i = np.where((grad < 0.25) & (z > 0.01))
    rgb, p = rgb[i], p[i]
    p, rgb = downsample_point_cloud(p, rgb)
    print(len(p), 'points')
    return p, rgb

pcl1, rgb1 = generate_point_cloud(rgb_all[9], depth_all[9])
write_ply(pcl1, rgb1, "pcl09.ply")
pcl2, rgb2 = generate_point_cloud(rgb_all[10], depth_all[10])
write_ply(pcl2, rgb2, "pcl10.ply")
pcl3, rgb3 = generate_point_cloud(rgb_all[11], depth_all[11])
write_ply(pcl3, rgb3, "pcl11.ply")


9095 points
8375 points
8901 points


Point cloud matching robust to outliers, minimize sum of L1 losses

In [3]:
from scipy.spatial import KDTree
import scipy.optimize

Keep first cloud static

Second cloud: optimize for so3 exponential map + translation

In [4]:
def decode_rt(T):
    phi = T[:3]
    t = T[3:6]
    theta = np.linalg.norm(phi)
    R = np.eye(3)
    if theta != 0.0:
        n = phi / theta
        nnT = np.outer(n, n)
        n_star = np.array([[0.0, -n[2], n[1]], [n[2], 0.0, -n[0]], [-n[1], n[0], 0.0]])
        R = np.cos(theta) * R + \
            (1.0-np.cos(theta)) * nnT + \
            np.sin(theta) * n_star
    assert np.linalg.norm(R@R.T-np.eye(3)) < 1e-12
    return R, t


def minl1_rt(pc1, pc2, T_guess=np.zeros(6)):
    T = np.array(T_guess)

    kdtree_pc1 = KDTree(pc1)

    def fun(T):
        R, t = decode_rt(T)
        Rp = np.dot(pc2, R.T)
        pc2_transformed = Rp + t
        g_pc2_transformed = np.zeros((*Rp.shape, len(T)), dtype=Rp.dtype)
        g_pc2_transformed[:, 0, 1] = Rp[:, 2]
        g_pc2_transformed[:, 0, 2] = -Rp[:, 1]
        g_pc2_transformed[:, 1, 0] = -Rp[:, 2]
        g_pc2_transformed[:, 1, 2] = Rp[:, 0]
        g_pc2_transformed[:, 2, 0] = Rp[:, 1]
        g_pc2_transformed[:, 2, 1] = -Rp[:, 0]
        g_pc2_transformed[:, 0, 3] = 1.0
        g_pc2_transformed[:, 1, 4] = 1.0
        g_pc2_transformed[:, 2, 5] = 1.0
        distances, indices = kdtree_pc1.query(pc2_transformed)
        g_distances = np.einsum('nij,ni->nj', g_pc2_transformed, pc2_transformed-pc1[indices]) / \
                (distances.reshape((len(distances), 1)) + 1e-8)
        cost = np.mean(distances)
        g_cost = np.mean(g_distances, axis=0)
        return cost, g_cost

    # print(scipy.optimize.check_grad(lambda _: fun(_)[0], lambda _: fun(_)[1], T))
    # assert False

    loss0 = fun(T)[0]
    print("original loss:", loss0)

    res = scipy.optimize.minimize(fun, T, jac=True)
    if not res.success:
        print(res.message)
    print('success', res.success, 'nit', res.nit, 'nfev', res.nfev)
    print("loss:", res.fun)
    R, t = decode_rt(res.x)
    pc2_transformed = np.dot(pc2, R.T) + t
    return pc2_transformed, res.x


from time import perf_counter
t0 = perf_counter()
pc2_transformed, T = minl1_rt(pcl1, pcl2)
t1 = perf_counter()
print(t1-t0, "secs")

write_ply(pc2_transformed, rgb2, "minl1_rt.ply")

original loss: 0.05982861957744673
Desired error not necessarily achieved due to precision loss.
success False nit 20 nfev 87
loss: 0.038522199725969083
0.6770351209997898 secs


Optimize for second cloud pose + both cloud z scale

 - Doesn't work, collapsed into a plane

In [81]:
def decode_rtzz(T):
    phi = T[:3]
    t = T[3:6]
    theta = np.linalg.norm(phi)
    R = np.eye(3)
    if theta != 0.0:
        n = phi / theta
        nnT = np.outer(n, n)
        n_star = np.array([[0.0, -n[2], n[1]], [n[2], 0.0, -n[0]], [-n[1], n[0], 0.0]])
        R = np.cos(theta) * R + \
            (1.0-np.cos(theta)) * nnT + \
            np.sin(theta) * n_star
    assert np.linalg.norm(R@R.T-np.eye(3)) < 1e-12
    return R, t, *np.exp(T[6:])

def decode_rtzz_jac(T):
    epsilon = 1e-6
    R, t, z1, z2 = decode_rtzz(T)
    num_rows_R, num_cols_R = R.shape
    num_rows_t, = t.shape
    num_dims_T = len(T)
    jacobian_R = np.zeros((num_rows_R, num_cols_R, num_dims_T))
    jacobian_t = np.zeros((num_rows_t, num_dims_T))
    jacobian_z1 = np.zeros((num_dims_T,))
    jacobian_z2 = np.zeros((num_dims_T,))
    for i in range(num_dims_T):
        delta_T = np.zeros_like(T)
        delta_T[i] = epsilon
        R1, t1, z11, z21 = decode_rtzz(T + delta_T)
        R0, t0, z10, z20 = decode_rtzz(T - delta_T)
        jacobian_R[:, :, i] = (R1 - R0) / (2 * epsilon)
        jacobian_t[:, i] = (t1 - t0) / (2 * epsilon)
        jacobian_z1[i] = (z11-z10) / (2 * epsilon)
        jacobian_z2[i] = (z21-z20) / (2 * epsilon)
    return jacobian_R, jacobian_t, jacobian_z1, jacobian_z2


def minl1_rtzz(pc1, pc2, T_guess=np.zeros(8)):
    T = np.array(T_guess)

    def fun(T):
        R, t, z1, z2 = decode_rtzz(T)
        pc1_scaled = pc1 * [[1, 1, z1]]
        # pc1_scaled = pc1
        kdtree_pc1 = KDTree(pc1_scaled)
        pc2_scaled = pc2 * [[1, 1, z2]]
        Rp = np.dot(pc2_scaled, R.T)
        pc2_transformed = Rp + t
        distances, indices = kdtree_pc1.query(pc2_transformed)
        cost = np.mean(distances)
        return cost

    loss0 = fun(T)
    print("original loss:", loss0)

    res = scipy.optimize.minimize(fun, T)
    if not res.success:
        print(res.message)
    print('success', res.success, 'nit', res.nit, 'nfev', res.nfev)
    print("loss:", res.fun)
    R, t, z1, z2 = decode_rtzz(res.x)
    pc1_transformed = pc1 * [[1,1,z1]]
    pc2_transformed = np.dot(pc2*[[1,1,z2]], R.T) + t
    return pc1_transformed, pc2_transformed, res.x


from time import perf_counter
t0 = perf_counter()
pc1_transformed, pc2_transformed, T = minl1_rtzz(pcl1, pcl2)
t1 = perf_counter()
print(t1-t0, "secs")

write_ply(pc1_transformed, rgb1, "minl1_rtzz_1.ply")
write_ply(pc2_transformed, rgb2, "minl1_rtzz_2.ply")

original loss: 0.05982861957744673
Desired error not necessarily achieved due to precision loss.
success False nit 35 nfev 831
loss: 0.006776329447766397
6.3003387409989955 secs


Camera?

In [57]:
def transform_rtfszsz(T, p1, p2, verbose=False):
    R, t = decode_rt(T[0:6])
    f = np.exp(T[6])
    # f = 4.0
    sz1, dz1 = np.exp(T[7:9])
    sz2, dz2 = np.exp(T[9:11])
    if verbose:
        print('R =', R, sep='\n')
        print('t =', t)
        print('f =', f)
        print('sz =', sz1, sz2)
        print('dz =', dz1, dz2)
    p1, p2 = np.array(p1), np.array(p2)
    p1[:,2] = p1[:,2]*sz1+dz1
    p1[:,:2] *= p1[:,2:]/f
    p2[:,2] = p2[:,2]*sz2+dz2
    p2[:,:2] *= p2[:,2:]/f
    return p1, np.dot(p2, R.T) + t


def regularize_pcl(p):
    p = p - np.mean(p, axis=0)
    cov = p.T @ p
    eigvals = np.linalg.eigvalsh(cov) ** 0.5
    # print(eigvals)
    # print(max(eigvals) / min(eigvals))
    return 1e-6 * max(eigvals) / min(eigvals), np.prod(eigvals)**(-1/3)


def minl1_rtfszsz(pc1, pc2, T_guess=np.zeros(11)):
    T = np.array(T_guess)

    def fun(T, reg=True):
        pc1_transformed, pc2_transformed = transform_rtfszsz(T, pc1, pc2)
        kdtree_pc1 = KDTree(pc1_transformed)
        distances, indices = kdtree_pc1.query(pc2_transformed)
        cost = np.mean(distances)
        reg1, s1 = regularize_pcl(pc1_transformed)
        reg2, s2 = regularize_pcl(pc2_transformed)
        cost *= (s1*s2)**0.5
        if not reg:
            return cost
        return cost + reg1 + reg2

    loss0 = fun(T, False)
    print("original loss:", loss0)

    res = scipy.optimize.minimize(fun, T)
    if not res.success:
        print(res.message)
    print('success', res.success, 'nit', res.nit, 'nfev', res.nfev)
    T = res.x
    print("loss:", fun(T, False))
    pc1_transformed, pc2_transformed = transform_rtfszsz(T, pc1, pc2, True)
    return pc1_transformed, pc2_transformed, T

from time import perf_counter
t0 = perf_counter()
pc1_transformed, pc2_transformed, T = minl1_rtfszsz(pcl1, pcl2)
t1 = perf_counter()
print(t1-t0, "secs")

print(T)
write_ply(pc1_transformed, rgb1, "minl1_rtfszsz_1.ply")
write_ply(pc2_transformed, rgb2, "minl1_rtfszsz_2.ply")

original loss: 0.0011934447948996224
success True nit 133 nfev 1884
loss: 0.0005769190387349132
R =
[[ 0.99997598  0.00358787  0.00592962]
 [-0.00369474  0.99982916  0.01811054]
 [-0.00586363 -0.01813201  0.99981841]]
t = [ 0.03661054 -0.0229958   0.15158072]
f = 0.999675505194089
sz = 0.6574367200384057 0.6704247421865344
dz = 1.5718395766459987 1.4392239135979719
9.748117209004704 secs
[-1.81224135e-02  5.89699104e-03 -3.64153445e-03  3.66105396e-02
 -2.29958036e-02  1.51580716e-01 -3.24547466e-04 -4.19406763e-01
  4.52246638e-01 -3.99843824e-01  3.64104019e-01]
