## Acoustic Integration - Room Size Estimation

http://localhost:8888/?token=sloth

In [None]:
if "mi" not in vars():
    from tqdm import trange

    # import torch
    import numpy as np
    import matplotlib.pyplot as plt

    import drjit as dr
    import mitsuba as mi

    from libs import utils, acoustic_torch

    plt.style.use('ggplot')

    mi.set_variant('cuda_ad_acoustic')
    mi.set_log_level(mi.LogLevel.Warn)

    utils.drjit_turn_off_optimizations(False)

    sess_seed   = 0 #np.random.randint(0, 2**30)
    sess_seed_g = 1 #np.random.randint(0, 2**30)
    print(f"session seeds are: sess_seed={sess_seed}; sess_seed_g={sess_seed_g}")

### Scene Construction

In [None]:
absorption = [0.2, 0.4]

config = {
    "box_dim":     [25., 12., 7. ],
    "mic_pos":     [11.,  6., 3.5],
    "speaker_pos": [14.,  6., 3.5],
    "speaker_radius": 0.5, #0.1,

    "absorption": [(i + 1, a) for i, a in enumerate(absorption)],
    "scattering": 0.2,

    "wav_bins":  len(absorption), # x
    "time_bins": 150,              # y
    "max_time":  1.5,

    # "integrator": "prb_acoustic",
    "integrator": "prb_reparam_acoustic",
    "max_depth": 50,
    "spp": 2**18,
}

fs = config["time_bins"] / config["max_time"]
time = np.linspace(0., config["max_time"], config["time_bins"], endpoint=False)

# dist = np.linalg.norm(np.array(config["mic_pos"]) - np.array(config["speaker_pos"]))

# config["max_depth"] = utils.estimate_max_depth(config["box_dim"], config["max_time"], 1.5)
print(f"max_depth = {config['max_depth']}")

### Sabine's Equation

In [None]:
box_dim = np.array(config["box_dim"])
V  = np.prod(box_dim)
S  = 2 * (box_dim[0] * box_dim[1] + box_dim[1] * box_dim[2] + box_dim[2] * box_dim[0])
sm = 24 * np.log(10) / 343.
t60 = sm * V / (np.array(absorption) * S)
t60

### Reference Histogram

In [None]:
scene_dict = utils.shoebox_scene(**config, connected_cube=False)
scene_dict["integrator"]["skip_direct"] = False
scene_dict["integrator"]["reparam_max_depth"] = 16
# scene_dict["sensor"]["film"]["rfilter"] = {
#     "type": "gaussian",
#     "stddev": 0.1 * 343. * config["max_time"] / config["time_bins"],
# }

scene = mi.load_dict(scene_dict)

img_ref = mi.render(scene)
# utils.plot_hist(img_ref[:, :, 0], **config)

### Optimization Setup

In [None]:
params = mi.traverse(scene)
# display(params)

# key = "shoebox.vertex_positions"
# vertex_pos_ref = dr.unravel(mi.Point3f, params[key])
# display(vertex_pos_ref)

vertex_pos_ref = {}

faces = ['back', 'front', 'left', 'right', 'top', 'bottom']
for f in faces:
    key_g = f"shoebox.{f}.vertex_positions"
    vertex_pos_ref[key_g] = dr.unravel(mi.Point3f, params[key_g])

In [None]:
opt = mi.ad.Adam(lr=0.01)
# opt['s'] = mi.Vector3f(1.0, 1.0, 1.0)
opt['s'] = mi.Point3f(1.5)

def apply_transform(params_to_update):
    opt['s'] = dr.clamp(opt['s'], 0.5, 2.0)

    # params_to_update[key] = dr.ravel(mi.Transform4f.scale(opt['s']) @ vertex_pos_ref)
    for f, v in vertex_pos_ref.items():
        params_to_update[f] = dr.ravel(mi.Transform4f.scale(opt['s']) @ v)

    params_to_update.update()

In [None]:
c80_ref = acoustic_torch.C(img_ref[:, :, 0], fs=fs)
d50_ref = acoustic_torch.D(img_ref[:, :, 0], fs=fs)
ts_ref  = acoustic_torch.TS(mi.TensorXf(time), img_ref[:, :, 0])
edc_ref = acoustic_torch.EDC(img_ref[:, :, 0], db=True, norm=True)
t30_ref = acoustic_torch.T(mi.TensorXf(time), edc_ref)

def loss(hist, hist_ref=None):
    # assert hist_ref is None
    # c80 = acoustic_torch.C(hist[:, :, 0], fs=fs)
    # d50 = acoustic_torch.D(hist[:, :, 0], fs=fs)
    # ts  = acoustic_torch.TS(mi.TensorXf(time), hist[:, :, 0])
    # edc = acoustic_torch.EDC(hist[:, :, 0], db=True, norm=True)
    # t30 = acoustic_torch.T(mi.TensorXf(time), edc)
    # return utils.mse(t30[0], t30_ref[0])
    return utils.mse(hist[:, :, 0], hist_ref[:, :, 0])

In [None]:
apply_transform(params)
img = mi.render(scene)
utils.plot_hist(img[:, :, 0], **config)

### Visualize Loss/Gradient-Spectogram

In [None]:
apply_transform(params)
hist = mi.render(scene, seed=sess_seed)
dr.enable_grad(hist)

l = loss(hist)
dr.backward(l, flags=dr.ADFlag.ClearNone)

grad = dr.grad(hist)
utils.plot_hist(grad[:, :, 0], **config)

In [None]:
analyze_grads = False
if analyze_grads:
    t = np.linspace(0., config["max_time"], config["time_bins"], endpoint=False)

    opt['s'] = mi.Float(0.85)
    apply_transform(params)
    img_n = mi.render(scene).numpy()

    fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(6, 8), sharex=True)

    img = mi.TensorXf(img_n)
    dr.enable_grad(img)
    l = utils.mse(img, img_ref)
    dr.backward(l)
    g = dr.grad(img)[:, :, 0].numpy()
    ax1.plot(t, g)

    img = mi.TensorXf(img_n)
    dr.enable_grad(img)
    l = dr.sqr(dr.sum(img[:, :, 0]) - dr.sum(img_ref[:, :, 0]))
    dr.backward(l)
    g = dr.grad(img)[:, :, 0].numpy()
    ax2.plot(t, g)

    fig.show()

    # gradl = mi.TensorXf(dr.grad(img))
    # print(dr.min(gradl[:, :, 0]), dr.max(gradl[:, :, 0]))
    # plot_hist(gradl[:, :, 0], log=False, **config)

In [None]:
if analyze_grads:
    factors = np.linspace(0.5, 2.0, 31, endpoint=True)
    losses  = np.zeros((factors.shape[0], 2))
    grads   = np.zeros((factors.shape[0], 2))

    assert losses.shape == grads.shape

    for i in trange(losses.shape[0]):
        opt['s'] = mi.Float(factors[i])
        apply_transform(params)
        img = mi.render(scene, params, seed=sess_seed, seed_grad=sess_seed_g)

        l0  = dr.sqr(dr.sum(img[:, :, 0]) - dr.sum(img_ref[:, :, 0]))
        # l0  = loss(img)
        dr.backward(l0)
        g0  = dr.grad(opt['s'])

        losses[i, 0] = l0[0]
        grads[i, 0]  = g0[0]


        opt['s'] = mi.Float(factors[i])
        apply_transform(params)
        img = mi.render(scene, params, seed=sess_seed, seed_grad=sess_seed_g)

        l1  = utils.mse(img, img_ref)
        # l1 = dr.sum(acoustic_torch.C(img[:, :, 0], fs=int(fs)))
        # l1 = dr.sum(acoustic_torch.TS(mi.TensorXf(time), img[:, :, 0]))
        # l1 = dr.sum(img[:, :, 0])
        dr.backward(l1)
        g1  = dr.grad(opt['s'])

        losses[i, 1] = l1[0]
        grads[i, 1]  = g1[0]

    fig, axis = plt.subplots(2, 2, figsize=(12, 8), sharex=True, sharey=False)
    axf = axis.flatten()

    axf[0].plot(factors, losses[:, 0])
    axf[1].plot(factors, losses[:, 1])
    axf[2].plot(factors, grads[:, 0])
    axf[3].plot(factors, grads[:, 1])

    axf[2].axhline(0., 0., 1.5, c="C1", linestyle="dotted")
    axf[2].axvline(1., -1., 1., c="C1", linestyle="dotted")
    axf[3].axhline(0., 0., 1.5, c="C1", linestyle="dotted")
    axf[3].axvline(1., -1., 1., c="C1", linestyle="dotted")

    # axf[0].set_title("$\\left(\sum_{i=1}^n x_i - \sum_{i=1}^n \overline{x}_i \\right)^2$")
    # axf[1].set_title("$\\frac{1}{n}\sum_{i=1}^n (x_i - \overline{x}_i)^2$")

    axf[0].set_ylabel("Loss")
    axf[2].set_ylabel("Gradient opt['s']")

    # plt.close()
    fig.show()

### Main Loop

In [None]:
opt.reset('s')
opt['s'] = mi.Float(0.7)

%matplotlib ipympl

vals, losses, grads = [], [], []

fig, (ax1, ax2, ax3) = plt.subplots(1, 3, figsize=(12, 3))
ax1.set_title("values")
ax1.set_xlim(-1, 51)
ax1.set_ylim(0.4, 2.1)

ax2.set_title("grads")
ax2.set_xlim(-1, 51)
ax2.set_ylim(-1.1, 1.1)

ax3.set_title("losses")
ax3.set_xlim(-1, 51)
ax3.set_ylim(-0.1, 1.1);

In [None]:
iters = 105
if iters > 1:
    n  = len(vals) + iters

for i in trange(iters):
    apply_transform(params)
    img = mi.render(scene, params, seed=sess_seed+i, seed_grad=sess_seed_g+i)

    # l = utils.mse(img[:, :, 0], img_ref[:, :, 0])
    # l = dr.sqr(dr.sum(img[:, :, 0]) - dr.sum(img_ref[:, :, 0]))
    l = loss(img, img_ref)
    dr.backward(l, flags=dr.ADFlag.ClearNone if iters < 2 else dr.ADFlag.Default)

    if dr.any(dr.isnan(dr.grad(opt['s']))):
        dr.set_grad(opt['s'], 0.)
        print(i, l.numpy())
        continue

    vals.append(opt['s'].numpy()[0])
    grads.append(dr.grad(opt['s']).numpy()[0])
    losses.append(l.numpy()[0])

    opt.step()

    ax1.clear()
    ax1.set_title("values")
    ax1.set_xlim(-0.02 * n, 1.02 * n)
    ax1.set_ylim(0.4, 2.1)
    ax1.plot(vals)
    ax1.axhline(1, 0., n, c="k", linestyle="dotted")

    ax2.clear()
    ax2.set_title("grads")
    ax2.set_xlim(-0.02 * n, 1.02 * n)
    ax2.plot(grads)

    ax3.clear()
    ax3.set_title("losses")
    ax3.set_xlim(-0.02 * n, 1.02 * n)
    ax3.plot(losses)

    fig.canvas.draw()

In [None]:
V = np.stack([
    np.array(vals)[:100],
    np.array(grads)[:100],
    np.array(losses)[:100],
])
# np.save("../data/room-size/room-size-mse-s0_7.npy", V)

### Estimated (Forward) Gradients vs. Finite Differences

In [None]:
analyze_grads = False
if analyze_grads:
    s = 0.85

    # forward pass
    opt['s'] = mi.Float(0.85)
    apply_transform()
    dr.set_grad(opt['s'], mi.Float(1.))

    img  = mi.render(scene, params)
    grad = dr.forward_to(img)

    # finite differences
    opt['s'] = mi.Float(s)
    apply_transform()
    img = mi.render(scene)

    opt['s'] = mi.Float(s + 1e-4)
    apply_transform()
    finite_difference = mi.render(scene) - img

    utils.plot_hist(grad[:, :, 0], **config)
    utils.plot_hist(finite_difference[:, :, 0], **config)