In [1]:
%load_ext autoreload
%autoreload 2
%matplotlib inline
import os
import sys
module_path = os.path.abspath(os.path.join(".."))
if module_path not in sys.path:
    sys.path.append(module_path)

In [2]:
import json
from pathlib import Path
from typing import List

import dataclass_array as dca
import jax.numpy as jnp
import matplotlib.pyplot as plt
import mitsuba as mi
import numpy as np
from PIL import Image
import pycolmap
import pyrender
import trimesh
import visu3d as v3d

import burybarrel.colmap_util as cutil
from burybarrel.image import render_v3d

In [None]:
reconstr_path = Path("/scratch/jeyan/barreldata/results/barrelddt1/colmap-out/0")
foundpose_res_path = Path("/scratch/jeyan/foundpose/output_barrelddt1_renders_layer9/inference/estimated-poses.json")
obj_path = Path("/scratch/jeyan/barreldata/models3d/barrelsingle-scaled.ply")

reconstruction = pycolmap.Reconstruction(reconstr_path)
print(reconstruction.summary())

In [8]:
p, c = cutil.get_pc(reconstruction)
pts3d = v3d.Point3d(p=p, rgb=c)
cams = cutil.get_cams_v3d(reconstruction)
# v3d.make_fig([cams, pts3d])

In [9]:
with open(foundpose_res_path, "rt") as f:
    foundpose_res = json.load(f)
foundpose_res = list(filter(lambda x: x["hypothesis_id"] == "0", foundpose_res))

In [None]:
foundpose_res[9]

In [11]:
obj2cams = []
for res in foundpose_res:
    R = res["R"]
    t = res["t"]
    T = np.eye(4)
    T[:3, :3] = R
    T[:3, 3] = np.reshape(t, -1)
    obj2cams.append(T)
obj2cams = np.array(obj2cams)

In [40]:
mesh = trimesh.load(obj_path)
vtxs = np.array(mesh.vertices)
rgb = np.zeros_like(vtxs, dtype=np.uint8)
rgb[:, 0] = 255
vtxs_p3d = v3d.Point3d(p=vtxs, rgb=rgb)

In [13]:
# unscaled 3d
tofig = []
barrels_trf = []
tofig.extend([pts3d, cams])
for i, cam in enumerate(cams):
    cam2world = cam.world_from_cam
    obj2cam = v3d.Transform.from_matrix(obj2cams[i])
    rgb = np.zeros_like(vtxs, dtype=np.uint8)
    rgb[:, 0] = 255
    pts = v3d.Point3d(p=vtxs, rgb=rgb)
    pts = cam2world @ obj2cam @ pts
    tofig.extend([pts])
# v3d.make_fig(*tofig)

In [20]:
# scaled 3d
scalefactor = 0.19
scaleT = v3d.Transform.from_matrix(np.eye(4) * scalefactor)
tofig = []
camscaled = cams.replace(world_from_cam=cams.world_from_cam.replace(t=cams.world_from_cam.t * scalefactor))
sceneptsscaled = pts3d.apply_transform(scaleT)
barrels_trf = []
barrels_T = []
tofig.extend([sceneptsscaled, camscaled])
for i, cam in enumerate(camscaled):
    cam2world = cam.world_from_cam
    obj2cam = v3d.Transform.from_matrix(obj2cams[i])
    rgb = np.zeros_like(vtxs, dtype=np.uint8)
    rgb[:, 0] = 255
    pts = v3d.Point3d(p=vtxs, rgb=rgb)
    pts = cam2world @ obj2cam @ pts
    barrels_T.append(cam2world @ obj2cam)
    barrels_trf.append(pts)
    tofig.extend([pts])
# v3d.make_fig(*tofig)

In [39]:
# manually finding a truth barrel; i.e. manually scale and select barrel, camera pair
# that visually fits the best
i = 9
truth_T = barrels_T[i]
obj2cams_truth = camscaled.world_from_cam.inv @ truth_T[None, ...]
# v3d.make_fig(camscaled[i], sceneptsscaled, barrels_trf[i])

In [None]:
i = 2
Image.fromarray(render_v3d(camscaled[i], dca.concat([sceneptsscaled, camscaled[i].world_from_cam @ obj2cams_truth[i] @ vtxs_p3d]), radius=4))

In [68]:
# use manually found ground truth barrel as reference, save these relative transformations
gt_data_list = []
for i, T in enumerate(obj2cams_truth):
    truthdata = {
        "img_path": foundpose_res[i]["img_path"],
        "img_id": foundpose_res[i]["img_id"],
        "R": T.R.tolist(),
        "t": T.t.tolist(),
    }
    gt_data_list.append(truthdata)
with open("/scratch/jeyan/barreldata/results/barrelddt1/gt_obj2cam.json", "wt") as f:
    json.dump(gt_data_list, f)

In [30]:
i = 9
# Image.fromarray(render_v3d(camscaled[i], dca.concat([sceneptsscaled, barrels_trf[i]]), radius=2))

In [31]:
mesh = trimesh.load(obj_path)
vtxs = np.array(mesh.vertices)
stuff = []
stuff.append(pts3d)

i = 9
cam = cams[i]
cam2world = cam.world_from_cam
obj2cam = v3d.Transform.from_matrix(obj2cams[i])
rgb = np.zeros_like(vtxs, dtype=np.uint8)
rgb[:, 0] = 255
pts = v3d.Point3d(p=vtxs, rgb=rgb)
pts = cam2world @ obj2cam @ pts
stuff.extend([pts, cam])
# Image.fromarray(render_v3d(cam, dca.concat([pts3d, pts]), radius=2))
# Image.fromarray(render_v3d(cam, dca.concat([pts3d]), radius=2))

In [None]:
camTs = jnp.array(cams.world_from_cam.matrix4x4)
objTs = jnp.array(obj2cams)

def variance_from_scale(scale):
    scaledcamTs = camTs.at[:, 0:3, 3].multiply(scale)
    centershom = scaledcamTs @ objTs @ jnp.array([0, 0, 0, 1.0])
    centers = centershom[:, :3]
    return jnp.sum(jnp.var(centers, axis=0))

from jax import grad, jacfwd, jacrev

grad_cost = grad(variance_from_scale)
scaleinit = 1.0
currscale = scaleinit
currgrad = grad_cost(scaleinit)
rate = 0.01
eps = 1e-3
while jnp.abs(currgrad) > eps:
    currgrad = grad_cost(currscale)
    currscale -= rate * currgrad
print(currscale)

In [133]:
def ransac(*data, fit_func=None, loss_func=None, cost_func=None, samp_min=10, inlier_min=10, inlier_thres=0.1, max_iter=1000, seed=None):
    """
    Args:
        fit_func (data -> model)
        loss_func ((model, data) -> array): vectorized loss for individual data points
        cost_func ((model, data) -> scalar): total cost to try to minimize
    """
    rng = np.random.default_rng(seed)
    best_model = None
    best_inlier_idxs = []
    best_inliers = []
    best_error = float("inf")

    for _ in range(max_iter):
        sample_indices = rng.choice(len(data[0]), samp_min, replace=False)
        sample = [singledata[sample_indices] for singledata in data]

        model = fit_func(sample)

        errors = loss_func(model, data)

        inlier_idxs = np.where(errors < inlier_thres)[0]
        n_inliers = len(inlier_idxs)
        inliers = [singledata[inlier_idxs] for singledata in data]

        total_error = cost_func(model, inliers)

        if n_inliers >= inlier_min:
            if n_inliers > len(best_inlier_idxs) or (n_inliers == len(best_inlier_idxs) and total_error < best_error):
                best_model = model
                best_inliers = inliers
                best_inlier_idxs = inlier_idxs
                best_error = total_error
    if best_model is None:
        raise ValueError("No valid model found after RANSAC")
    return best_model, best_inlier_idxs

X = np.array([-0.848,-0.800,-0.704,-0.632,-0.488,-0.472,-0.368,-0.336,-0.280,-0.200,-0.00800,-0.0840,0.0240,0.100,0.124,0.148,0.232,0.236,0.324,0.356,0.368,0.440,0.512,0.548,0.660,0.640,0.712,0.752,0.776,0.880,0.920,0.944,-0.108,-0.168,-0.720,-0.784,-0.224,-0.604,-0.740,-0.0440,0.388,-0.0200,0.752,0.416,-0.0800,-0.348,0.988,0.776,0.680,0.880,-0.816,-0.424,-0.932,0.272,-0.556,-0.568,-0.600,-0.716,-0.796,-0.880,-0.972,-0.916,0.816,0.892,0.956,0.980,0.988,0.992,0.00400]).reshape(-1,1)
y = np.array([-0.917,-0.833,-0.801,-0.665,-0.605,-0.545,-0.509,-0.433,-0.397,-0.281,-0.205,-0.169,-0.0531,-0.0651,0.0349,0.0829,0.0589,0.175,0.179,0.191,0.259,0.287,0.359,0.395,0.483,0.539,0.543,0.603,0.667,0.679,0.751,0.803,-0.265,-0.341,0.111,-0.113,0.547,0.791,0.551,0.347,0.975,0.943,-0.249,-0.769,-0.625,-0.861,-0.749,-0.945,-0.493,0.163,-0.469,0.0669,0.891,0.623,-0.609,-0.677,-0.721,-0.745,-0.885,-0.897,-0.969,-0.949,0.707,0.783,0.859,0.979,0.811,0.891,-0.137]).reshape(-1,1)

class LinearRegressor:
    def __init__(self):
        self.params = None

    def fit(self, X: np.ndarray, y: np.ndarray):
        r, _ = X.shape
        X = np.hstack([np.ones((r, 1)), X])
        self.params = np.linalg.inv(X.T @ X) @ X.T @ y
        return self

    def predict(self, X: np.ndarray):
        r, _ = X.shape
        X = np.hstack([np.ones((r, 1)), X])
        return X @ self.params

def fitfunc(data):
    X = data[0]
    y = data[1]
    reg = LinearRegressor()
    reg.fit(X, y)
    return reg

def lossfunc(model, data):
    X = data[0]
    y = data[1]
    ypred = model.predict(X)
    return np.abs(y - ypred).reshape(-1)

def costfunc(model, data):
    X = data[0]
    y = data[1]
    ypred = model.predict(X)
    return np.sum((y - ypred) ** 2) ** 0.5

reg = LinearRegressor()
reg.fit(X, y)

import matplotlib.pyplot as plt

# plt.scatter(X, y)
# line = np.linspace(np.min(X), np.max(X), num=100).reshape(-1, 1)
# plt.plot(line, reg.predict(line), c="peru")

In [134]:
model, inlieridxs = ransac(X, y, fit_func=fitfunc, loss_func=lossfunc, cost_func=costfunc, samp_min=10, inlier_min=10, inlier_thres=0.2)
# plt.scatter(X[inlieridxs], y[inlieridxs])
# line = np.linspace(np.min(X), np.max(X), num=100).reshape(-1, 1)
# plt.plot(line, model.predict(line), c="peru")

In [102]:
# this derivative 100% has a closed form but i'm too lazy to solve for it
# so screw it, just do gradient descent.
def variance_from_scale(scale, data):
    camTs = jnp.array(data[0])
    objTs = jnp.array(data[1])
    scaledcamTs = camTs.at[:, 0:3, 3].multiply(scale)
    centershom = scaledcamTs @ objTs @ jnp.array([0, 0, 0, 1.0])
    centers = centershom[:, :3]
    # trace of cov matrix for now, i guess
    return jnp.sum(jnp.var(centers, axis=0))

class ScaleCentroidModel():
    def __init__(self):
        self.scale = None
        self.mean = None

    def __call__(self, data):
        return self.predict(data)
    
    def fit(self, data):
        varfunc_data = lambda x: variance_from_scale(x, data)
        grad_cost = grad(varfunc_data)
        scaleinit = 1.0
        currscale = scaleinit
        currgrad = grad_cost(scaleinit)
        rate = 0.01
        eps = 1e-3
        while jnp.abs(currgrad) > eps:
            currgrad = grad_cost(currscale)
            currscale -= rate * currgrad
        self.scale = float(currscale)
        centroids = self.predict(data)
        self.mean = np.mean(centroids, axis=0)
        return self

    def predict(self, data):
        cam2worlds = data[0]
        obj2cams = data[1]
        scaledcamTs = np.copy(cam2worlds)
        scaledcamTs[:, 0:3, 3] *= self.scale
        centershom = scaledcamTs @ obj2cams @ jnp.array([0, 0, 0, 1.0])
        centers = centershom[:, :3]
        return centers

# data = (cam2world nx4x4, obj2cam nx4x4)
def fitcams(data):
    model = ScaleCentroidModel()
    model.fit(data)
    return model

def camloss(model, data):
    cents = model(data)
    return np.linalg.norm(cents - model.mean, axis=1)

def camcost(model, data):
    cents = model(data)
    return jnp.sum(jnp.var(cents, axis=0))

model, inlieridxs = ransac(cams.world_from_cam.matrix4x4, obj2cams, fit_func=fitcams, loss_func=camloss, cost_func=camcost, samp_min=5, inlier_min=5, inlier_thres=0.15, max_iter=50)

In [None]:
model.scale, inlieridxs

In [None]:
# scaled 3d
scalefactor = float(model.scale)
scaleT = v3d.Transform.from_matrix(np.eye(4) * scalefactor)
tofig = []
camsfilt = cams[inlieridxs]
camsfilt = camsfilt.replace(world_from_cam=camsfilt.world_from_cam.replace(t=camsfilt.world_from_cam.t * scalefactor))
sceneptsscaled = pts3d.apply_transform(scaleT)
barrels_trf = []
obj2camsfilt = obj2cams[inlieridxs]
tofig.extend([sceneptsscaled, camsfilt])
for i, cam in enumerate(camsfilt):
    cam2world = cam.world_from_cam
    obj2cam = v3d.Transform.from_matrix(obj2camsfilt[i])
    rgb = np.zeros_like(vtxs, dtype=np.uint8)
    rgb[:, 0] = 255
    pts = v3d.Point3d(p=vtxs, rgb=rgb)
    pts = cam2world @ obj2cam @ pts
    barrels_trf.append(pts)
    tofig.extend([pts])
v3d.make_fig(*tofig)

In [None]:
len(tofig)

In [None]:
A = np.array([[1,2,3], [4,5,6]])
np.linalg.norm(A - np.mean(A, axis=0), axis=1)

In [None]:
from sklearn import datasets, linear_model
ransac = linear_model.RANSACRegressor()