## Acoustic Integration - Speaker Pose Estimation

http://localhost:8888/?token=sloth

In [None]:
if "mi" not in vars():
    import numpy as np
    from tqdm import trange
    import matplotlib.pyplot as plt

    import drjit as dr
    import mitsuba as mi

    from libs import utils

    plt.style.use('ggplot')
    mi.set_log_level(mi.LogLevel.Warn)
    mi.set_variant('cuda_ad_acoustic')

    sess_seed   = np.random.randint(0, 2**30)
    sess_seed_g = np.random.randint(0, 2**30)
    print(f"session seeds are: sess_seed={sess_seed}; sess_seed_g={sess_seed_g}")

### Scene Construction

In [None]:
mic_poses  = np.array([[2., 3., 3.],
                       [23., 3., 2.],
                    #    [22., 3., 6.],
                       [4., 10., 7.]])

config = {
    "box_dim":     [25., 12., 8.],
    "mic_pos":     mic_poses[0],
    "speaker_pos": [12.,  6., 4.],
    "speaker_radius": 1.0, #0.1,

    "absorption": 0.2,
    "scattering": 0.4,

    "wav_bins":  mic_poses.shape[0],
    "time_bins": 50,
    "max_time":  0.5,

    # "integrator": "prb_acoustic",
    "integrator": "prb_reparam_acoustic",
    "max_depth": 8,
    "spp": 2**20,
}

tf      = mi.ScalarTransform4f
box_dim = np.array(config['box_dim']) / 2.
time    = np.linspace(0., config["max_time"], config["time_bins"], endpoint=False)

# config["max_depth"] = utils.estimate_max_depth(config["box_dim"], config["max_time"], 1.2)
print(f"max_depth = {config['max_depth']}")

### Reference Histogram

In [None]:
scene_dict = utils.shoebox_scene(**config)
# scene_dict["sensor"]["film"]["rfilter"] = {
#     "type": "gaussian",
#     "stddev": 0.3,
# }
# scene_dict["integrator"]["skip_direct"] = True
# scene_dict["integrator"]["reparam_max_depth"] = 8

del scene_dict["sensor"]["microphoneA"]
for i, m in enumerate(mic_poses):
    scene_dict["sensor"][f"microphone_{i}"] = {
        "type": "microphone",
        "to_world": tf.translate(m - box_dim),
    }

scene = mi.load_dict(scene_dict)
img_ref = mi.render(scene, seed=sess_seed)
utils.plot_hist(img_ref[:, :, 0], **config)

### Optimization Setup

In [None]:
params = mi.traverse(scene)
# display(params)

key = 'speaker.vertex_positions'
vertex_pos_ref = dr.unravel(mi.Point3f, params[key])

In [None]:
opt = mi.ad.Adam(lr=0.01)
opt['s'] = mi.Vector3f(-0.9, 0.4, 1.2)

def apply_transform():
    opt['s'] = dr.clamp(opt['s'], -3.0, 3.0)
    # opt['s'].y = opt['s'].z = 0.0
    # opt['s'].z = 0.0
    transf = mi.Transform4f.translate(opt['s'])
    params[key] = dr.ravel(transf @ vertex_pos_ref)
    params.update()

In [None]:
apply_transform()
img = mi.render(scene, seed=sess_seed)
utils.plot_hist(img[:, :, 0], **config)

### Visualize Gradient

In [None]:
apply_transform()
img = mi.render(scene, seed=sess_seed)

dr.enable_grad(img)
loss = utils.mse(img, img_ref)
dr.backward(loss)

grad = mi.TensorXf(dr.grad(img))
print(dr.max(dr.abs(grad)))

utils.plot_hist(grad[:, :, 0], abs=True, **config)

### Main Loop

In [None]:
vals, grads, losses = [], [], []

%matplotlib ipympl
fig, (ax1, ax2, ax3) = plt.subplots(1, 3, figsize=(12, 3))

ax1.set_title("losses")
ax1.set_xlim(-1, 51)
ax1.set_ylim(-0.1, 2.1)

ax2.set_title("values")
ax2.set_xlim(-1, 51)
ax2.set_ylim(-0.1, 1.1)

ax3.set_title("gradients")
ax3.set_xlim(-1, 51)
ax3.set_ylim(-0.1, 1.1);

In [None]:
iters = 150
if iters > 1:
    n  = len(vals) + iters

for i in trange(iters):
    apply_transform()
    img = mi.render(scene, params, seed=sess_seed+i, seed_grad=sess_seed_g+i)

    l = utils.mse(img, img_ref)
    dr.backward(l, flags=dr.ADFlag.ClearNone if iters < 2 else dr.ADFlag.Default)

    if iters < 2:
        display(opt['s'])
        display(dr.grad(opt['s']))
        # display(dr.epsilon(mi.Float))
    else:
        vals.append(opt['s'].numpy()[0])
        losses.append(l[0])
        grads.append(dr.grad(opt['s']).numpy()[0])

        opt.step()

        ax1.clear()
        ax1.set_title("losses")
        ax1.set_xlim(-n * 0.02, n * 1.02)
        ax1.plot(np.array(losses))

        ax2.clear()
        ax2.set_title("values")
        ax2.set_xlim(-0 * 0.02, n * 1.02)
        # ax2.set_xlim(-1.1, 1.1)
        ax2.set_ylim(-2.1, 2.1)
        ax2.plot(np.array(vals))
        # ax2.plot(np.array(vals)[:, 0], np.array(vals)[:, 1])
        # ax2.scatter(np.array(vals)[-1, 0], np.array(vals)[-1, 1])

        ax3.clear()
        ax3.set_title("gradients")
        ax3.set_xlim(-n * 0.02, n * 1.02)
        ax3.plot(np.array(grads), label=["x", "y", "z"])
        ax3.legend()

        fig.canvas.draw()

In [None]:
L = np.array(losses)
V = np.array(vals)
G = np.array(grads)
# np.save("../data/emitter-pos/emitter-pos-1_1-no-gauss.npy", np.stack([V, G]))
# np.save("../data/emitter-pos/emitter-pos-1_1-no-gauss-losses.npy", L)
# np.save("../data/emitter-pos/emitter-pos-1_1-stddev-0_1.npy", np.stack([V, G]))
# np.save("../data/emitter-pos/emitter-pos-1_1-stddev-0_1-losses.npy", L)

In [None]:
mic_pos_org = np.array(config["mic_pos"]) - (np.array(config["box_dim"]) / 2.)
mic_pos_new =  np.mean((vertex_pos_ref + opt['s']).numpy(), axis=0)
np.linalg.norm(mic_pos_org - mic_pos_new)

### Finite Differences

In [None]:
s = mi.Vector3f(0.0, 0.0, 0.0)

# forward pass
opt['s'] = s
apply_transform()
dr.set_grad(opt['s'], mi.Vector3f(0., 1., 0.))

img  = mi.render(scene, params, seed=sess_seed, seed_grad=sess_seed_g)
grad = dr.forward_to(img)

# finite differences
opt['s'] = s
apply_transform()
img = mi.render(scene)

opt['s'] = s + mi.Vector3f(1e-2, 0.0, 0.0)
apply_transform()
finite_difference = (mi.render(scene, seed=sess_seed) - img)

utils.plot_hist(grad[:, :, 0], **config)
utils.plot_hist(finite_difference[:, :, 0], **config)