In [1]:
%load_ext nb_black

<IPython.core.display.Javascript object>

In [2]:
import auraloss
import IPython.display as ipd
import matplotlib.pyplot as plt
import pytorch_lightning as pl
import torch
import torch.nn as nn
import torch.nn.functional as F

from neural_field_synth.data import NSynthDataset
from neural_field_synth.signal import FIRNoiseSynth
from neural_field_synth.models import NeuralFieldSynth, LightningWrapper

<IPython.core.display.Javascript object>

In [3]:
model = NeuralFieldSynth(
    mlp_hidden_size=1024,
    mlp_hidden_layers=3,
    field_hidden_size=256,
    field_hidden_layers=3,
    wave_field_first_omega_0=100,
    wave_field_hidden_omega_0=30,
    noise_field_first_omega_0=100,
    noise_field_hidden_omega_0=30,
    noise_ir_length=128,
)
model = LightningWrapper(
    model, auraloss.freq.MultiResolutionSTFTLoss(), learning_rate=1e-3
)

<IPython.core.display.Javascript object>

In [24]:
ds = NSynthDataset("/import/c4dm-datasets/nsynth/nsynth-train/")
dl = torch.utils.data.DataLoader(
    ds, batch_size=4, num_workers=16, persistent_workers=True, shuffle=True
)

<IPython.core.display.Javascript object>

In [None]:
trainer = pl.Trainer(
    #     overfit_batches=1,
    gpus=1,
    #     strategy=pl.plugins.DDPSpawnPlugin(find_unused_parameters=False),
)
trainer.fit(model, dl)

GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [1,2,3]

  | Name    | Type                    | Params
----------------------------------------------------
0 | model   | NeuralFieldSynth        | 10.9 M
1 | loss_fn | MultiResolutionSTFTLoss | 0     
----------------------------------------------------
10.9 M    Trainable params
0         Non-trainable params
10.9 M    Total params
43.687    Total estimated model params size (MB)


Training: 0it [00:00, ?it/s]

In [14]:
dl = torch.utils.data.DataLoader(
    ds, batch_size=8, num_workers=16, persistent_workers=True, shuffle=True
)
it = iter(dl)

<IPython.core.display.Javascript object>

In [17]:
batch = next(it)

net = model.model.cuda(0)

target = batch["audio"].float().cuda(0)
instrument = batch["instrument"].float().cuda(0)
pitch = batch["pitch"].float().cuda(0)
velocity = batch["velocity"].float().cuda(0)

time = torch.linspace(-1, 1, target.shape[-1], device=target.device)[
..., None
].expand(-1, target.shape[0])

recon = net(time, pitch, velocity, instrument)


<IPython.core.display.Javascript object>

In [18]:
for i in range(target.shape[0]):
    print("item %d" % i)
    r = recon[:, i].detach().cpu()

    ipd.display(ipd.Audio(target[i, 0].cpu(), rate=model.model.sample_rate))
    ipd.display(ipd.Audio(r - r.mean(), rate=model.model.sample_rate))

item 0


item 1


item 2


item 3


item 4


item 5


item 6


item 7


<IPython.core.display.Javascript object>

In [22]:
batch = next(it)

net = model.model.cuda(0)

target = batch["audio"].float().cuda(0)
instrument = batch["instrument"].float().cuda(0)
pitch = batch["pitch"].float().cuda(0)
velocity = batch["velocity"].float().cuda(0)

time = torch.linspace(-1, 1, target.shape[-1], device=target.device)[..., None].expand(
    -1, target.shape[0]
)

recon = net(time, pitch, velocity, instrument, return_params=True)

<IPython.core.display.Javascript object>

In [23]:
ipd.Audio(recon.noise_signal[:, 0].detach().cpu(), rate=net.sample_rate)

<IPython.core.display.Javascript object>

In [21]:
recon.noise_film_params[0][0, 0][0]

tensor([ 0.3837, -0.5008,  0.3819,  0.0219, -0.2175,  0.0457, -0.1820,  0.5839,
         0.4472, -1.1891, -0.5631, -0.4189, -0.7972, -0.1462,  0.0774,  0.3691,
        -0.5524,  0.1193,  0.4174,  0.0222,  0.5347, -0.0762,  0.4659, -0.3257,
         0.0086,  0.2287, -0.1937, -0.4503, -0.3774, -0.8975, -1.0527, -0.3222,
         0.1882,  0.2633,  0.4832, -0.9545, -0.2145, -0.5877,  0.0448,  0.3486,
        -0.6852,  0.4079,  0.5532,  0.1867, -0.1524,  1.3075, -1.2358,  0.2539,
         0.2442,  0.8578, -0.1686,  0.2303,  0.5771,  0.1911, -0.6420,  0.3586,
        -0.0878,  0.4475,  0.0389, -0.5763,  0.0797, -1.0196, -0.8177, -0.5642,
        -0.3083,  0.3681,  0.5015, -0.1664,  0.1312,  0.1077,  0.3709, -0.3186,
         0.0632,  0.5694, -0.2065,  0.0953,  0.1967,  0.7968,  0.2662, -1.0173,
        -0.2676,  0.5269,  0.4234,  0.4089,  0.6791,  0.2673, -0.4887,  0.4453,
         0.8137, -0.0098,  1.0104,  0.2239, -0.5773, -0.5174,  0.1797,  0.7096,
         0.1923, -0.1660, -0.4045, -1.19

<IPython.core.display.Javascript object>

In [12]:
model.model.cuda(0).noise_field(recon.fir_sample_signal, *recon.noise_film_params)[0:2]

tensor([[[[-8.4461e+00,  8.9247e-01, -5.2777e-01,  ..., -7.6866e-03,
            1.0622e-01,  5.8947e-01],
          [-9.3929e+00, -1.8615e-01, -9.4084e-01,  ..., -3.5498e-01,
            3.7317e-02, -3.6475e-01],
          [-8.8500e+00, -1.6701e-01, -8.7000e-01,  ..., -3.5182e-01,
           -5.5786e-01,  2.3395e-01],
          ...,
          [-7.5211e+00,  3.0866e-01, -4.4366e-01,  ..., -2.1633e-01,
           -3.9521e-01,  1.3597e-01],
          [-1.1120e+01,  2.5626e-01,  5.2810e-01,  ..., -4.7717e-01,
           -2.6336e-01,  1.7428e-01],
          [-8.6235e+00, -4.7517e-01, -5.1764e-01,  ..., -7.5843e-01,
           -5.1659e-01, -1.0067e-01]],

         [[-7.6014e+00,  4.5322e-01, -3.5759e-01,  ..., -5.7691e-01,
           -3.9527e-01,  1.1652e-01],
          [-8.7000e+00,  3.5740e-01,  1.8399e-01,  ..., -4.9295e-02,
           -1.9800e-01,  2.5522e-02],
          [-8.0377e+00, -1.6745e-02,  1.9733e-01,  ...,  6.4530e-02,
           -1.1369e+00, -2.3984e-01],
          ...,
     

<IPython.core.display.Javascript object>

In [13]:
plt.stem(recon.impulse_response[100, 0].detach().cpu())

IndexError: index 100 is out of bounds for dimension 0 with size 65

<IPython.core.display.Javascript object>