## Acoustic Integration - Speaker Pose Estimation

http://localhost:8888/?token=sloth

In [None]:
if "mi" not in vars():
    import numpy as np
    from tqdm import trange
    import matplotlib.pyplot as plt

    import drjit as dr
    import mitsuba as mi

    from libs import utils

    plt.style.use('ggplot')
    mi.set_log_level(mi.LogLevel.Warn)
    mi.set_variant('cuda_ad_acoustic')

    sess_seed   = np.random.randint(0, 2**30)
    sess_seed_g = np.random.randint(0, 2**30)
    print(f"session seeds are: sess_seed={sess_seed}; sess_seed_g={sess_seed_g}")

### Scene Construction

In [None]:
config = {
    "box_dim":     [25., 12., 8.],
    "mic_pos":     [ 4., 2., 4.],
    "speaker_pos": [21.15, 2., 4.],
    "speaker_radius": 0.1,

    "absorption": 0.2,
    "scattering": 0.2,

    "wav_bins":    1,
    # "time_bins":   4,
    "time_bins": 100,
    "max_time":  0.1,

    # "integrator": "prb_acoustic",
    "integrator": "prb_reparam_acoustic",
    "max_depth": 1,
    "spp": 2**18,
    # "spp": 2**4,
}

# config["max_depth"] = utils.estimate_max_depth(config["box_dim"], config["max_time"], 1.2)
print(f"max_depth = {config['max_depth']}")

In [None]:
def scene_dict_wrapper(filters=None):
    scened = utils.shoebox_scene(**config)
    del scened["main_box"]
    del scened["shoebox"]

    if filters is not None:
        scened["sensor"]["film"]["rfilter"] = {
            "type": "gaussian",
            "stddev": filters,# * config["time_bins"] / (config["max_time"] * 343.),
        }

    box_dim     = np.array(config['box_dim']) / 2.
    mic_pos     = np.array(config['mic_pos'])
    speaker_pos = np.array(config['speaker_pos'])
    scened["sensor"]["microphoneA"] = {
        "type": "microphone",
        "cos_cutoff": 0.9,
        # "cos_cutoff": 1.0,
        "to_world": mi.ScalarTransform4f.look_at(
            origin=(mic_pos - box_dim),
            target=(speaker_pos - box_dim),
            up=[0, 1, 0]
        ),
        # "to_world": tf.translate(m - box_dim),
    }

    # scened["speaker"] = {
    #     'type': 'ply',
    #     'filename': '/home/daniel/Studium/masterarbeit/data/scenes/meshes/rectangle.ply',
    #     'to_world': mi.ScalarTransform4f.translate(speaker_pos - box_dim).rotate(axis=[0, -1, 0], angle=90.),
    #     'emitter': {'type': 'area', 'radiance': {'type': 'uniform', 'value': 1.0}}
    # }
    return scened

### Reference Histogram

In [None]:
# scene_dict = scene_dict_wrapper(filters=0.11)
scene_dict = scene_dict_wrapper(filters=None)
scene = mi.load_dict(scene_dict)
img_ref = mi.render(scene, seed=sess_seed)

t = np.linspace(0., config["max_time"], config["time_bins"] + 1, endpoint=True)
plt.stairs(img_ref[:, 0, 0].numpy(), edges=t, fill=True, color='C1', baseline=0.1)
plt.xticks(np.linspace(0., config["max_time"], 11, endpoint=True))
plt.xlim(0.03, 0.07)
plt.xlabel("time in $s$")
plt.ylabel("energy")
plt.show()
# utils.plot_hist(img_ref[:, :, 0], **config)

### Optimization Setup

In [None]:
params = mi.traverse(scene)
# display(params)

key = 'speaker.vertex_positions'
vertex_pos_ref = dr.unravel(mi.Point3f, params[key])

In [None]:
opt = mi.ad.Adam(lr=0.01)
opt['s'] = mi.Vector3f(0.09, 0.0, 0.0)

def apply_transform():
    opt['s'] = dr.clamp(opt['s'], -3.0, 3.0)
    opt['s'].y = opt['s'].z = 0.0
    transf = mi.Transform4f.translate(opt['s'])
    params[key] = dr.ravel(transf @ vertex_pos_ref)
    params.update()

In [None]:
apply_transform()
img = mi.render(scene, seed=sess_seed)
plt.stairs(img[:, 0, 0].numpy(), edges=t, fill=True, color='C1', baseline=0.1)
plt.xticks(np.linspace(0., config["max_time"], 11, endpoint=True))
plt.xlim(0.03, 0.07)
plt.xlabel("time in $s$")
plt.ylabel("energy")
plt.show()
# utils.plot_hist(img[:, :, 0], **config)

### Visualize Gradient

In [None]:
apply_transform()
img = mi.render(scene, seed=sess_seed)

dr.enable_grad(img)
loss = utils.mse(img, img_ref)
dr.backward(loss)

grad = mi.TensorXf(dr.grad(img))
print(dr.max(dr.abs(grad)))

plt.stairs(grad[:, 0, 0].numpy(), edges=t, fill=True, color='C1')
plt.show()
# utils.plot_hist(grad[:, :, 0], abs=True, **config)

### Main Loop

In [None]:
%matplotlib ipympl

vals, losses, grads = [], [], []

fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 4))
ax1.set_title("losses")
ax1.set_xlim(-1, 51)

ax2.set_title("values")
ax1.set_xlim(-1, 51)
ax2.set_ylim(-1.1, 1.1);

In [None]:
opt.reset('s')
opt.set_learning_rate(0.0001)
opt['s'] = mi.Vector3f([0.088, 0., 0.])

In [None]:
iters = 50
if iters > 1:
    n  = len(vals) + iters

for i in trange(iters):
    apply_transform()
    img = mi.render(scene, params, seed=sess_seed+i, seed_grad=sess_seed_g+i)
    l = utils.mse(img, img_ref)
    dr.backward(l, flags=dr.ADFlag.ClearNone if iters < 2 else dr.ADFlag.Default)
    # dr.set_grad(img, -1.)
    # dr.backward_to(opt['s'])

    if iters < 2:
        display(opt['s'])
        display(dr.grad(opt['s']))
        # display(dr.epsilon(mi.Float))
    else:
        vals.append(opt['s'].numpy()[0])
        losses.append(l[0])
        grads.append(dr.grad(opt['s']).numpy()[0])

        opt.step()

        ax1.clear()
        ax1.set_title("losses")
        ax1.set_xlim(-n * 0.02, n * 1.02)
        ax1.plot(np.array(losses))

        ax2.clear()
        ax2.set_title("values")
        ax2.set_xlim(-n * 0.02, n * 1.02)
        ax2.set_ylim(-0.21, 0.2)
        ax2.plot(np.array(vals))

        fig.canvas.draw()

### Filter width heatmap

In [None]:
n = 21
fs_vals = np.linspace(0.15, 0.625, n-1, endpoint=True)
# fs_vals = np.linspace(20, 180, n, endpoint=True)
offsets = np.linspace(-0.06, 0.14, n, endpoint=True)
fs_vals, offsets

In [None]:
res = np.zeros((n, n))

for i in trange(n):
    # config["time_bins"] = int(fs_vals[i])
    # scene_dict = scene_dict_wrapper(filters=0.3)

    config["time_bins"] = 100
    scene_dict = scene_dict_wrapper(filters=(None if i == 0 else fs_vals[i-1]))

    scene   = mi.load_dict(scene_dict)
    img_ref = mi.render(scene, seed=0)
    params  = mi.traverse(scene)
    opt     = mi.ad.Adam(lr=0.01)

    for j in range(n):
        opt['s']   = mi.Vector3f(offsets[j], 0.0, 0.0)
        apply_transform()

        img = mi.render(scene, params, seed=0, seed_grad=1)
        l = utils.mse(img, img_ref)

        dr.backward(l)
        res[j, i] = dr.grad(opt['s']).numpy()[0, 0]

In [None]:
fs_vals = np.insert(fs_vals, 0, 0.)
fs_vals

In [None]:
fig, ax = plt.subplots(1, 1)

aximg = ax.imshow(res, origin='lower', cmap='seismic', interpolation='none', vmin=-0.6, vmax=0.6)
ax.grid(False)

k = np.insert(np.arange(1, n, dtype=np.int32)[1::3], 0, 0)
# ax.set_xticks(k, np.round(fs_vals, 3)[k])
ax.set_xticks(np.arange(n)[::4], fs_vals.astype(np.int32)[::4])
ax.set_yticks(np.arange(n)[::2], np.round(offsets, 2)[::2])

ax.set_xlabel("$\sigma$/sampling rate")
ax.set_ylabel("Offset in $m$")

fig.colorbar(aximg, fraction=0.03)
fig.show()

In [None]:
# np.save("../data/emitter-pos/emitter-pos-heatmap-stddev.npy", res)
# np.save("../data/emitter-pos/emitter-pos-heatmap-stddev-ticks.npy", np.stack([fs_vals, offsets]))
# np.save("../data/emitter-pos/emitter-pos-heatmap-fs.npy", res)
# np.save("../data/emitter-pos/emitter-pos-heatmap-fs-ticks.npy", np.stack([fs_vals, offsets]))

### Thesis data

In [None]:
config['speaker_radius'] = 0.1
config['speaker_pos'] = [21.15, 2., 4.]
scene_dict = scene_dict_wrapper(filters=None)
scene = mi.load_dict(scene_dict)
img_a = mi.render(scene, seed=sess_seed).numpy()[:, 0, 0]

config['speaker_radius'] = 1.0
config['speaker_pos'] = [22.15, 2., 4.]
scene_dict = scene_dict_wrapper(filters=None)
scene = mi.load_dict(scene_dict)
img_b = mi.render(scene, seed=sess_seed).numpy()[:, 0, 0]

scene_dict = scene_dict_wrapper(filters=0.25)
scene = mi.load_dict(scene_dict)
img_c = mi.render(scene, seed=sess_seed).numpy()[:, 0, 0]

# np.save('../data/emitter-pos/emitter-pos-histograms.npy', np.stack([img_a, img_b, img_c]))