In [1]:
%load_ext nb_black

<IPython.core.display.Javascript object>

In [2]:
import auraloss
import IPython.display as ipd
import matplotlib.pyplot as plt
import pytorch_lightning as pl
import torch
import torch.nn as nn
import torch.nn.functional as F

from neural_field_synth.data import NSynthDataset
from neural_field_synth.signal import FIRNoiseSynth
from neural_field_synth.models import NeuralFieldSynth, LightningWrapper

<IPython.core.display.Javascript object>

In [3]:
model = NeuralFieldSynth(
    instrument_embedding_size=8,
    field_hidden_size=512,
    field_hidden_layers=3,
    wave_field_first_omega_0=30,
    wave_field_hidden_omega_0=30,
    noise_field_first_omega_0=30,
    noise_field_hidden_omega_0=30,
    noise_ir_length=128,
    noise_window_length=128,
    noise_hop_length=64,
    #     freeze_siren=True,
)
model = LightningWrapper(
    model,
    auraloss.freq.MultiResolutionSTFTLoss(
        fft_sizes=[64, 128, 256, 512, 1024, 4096],
        hop_sizes=[32, 64, 128, 256, 512, 2048],
        win_lengths=[64, 128, 256, 512, 1024, 4096],
    ),
    #     nn.MSELoss(),
    learning_rate=1e-4,
    log_audio=False,
)

<IPython.core.display.Javascript object>

In [4]:
ds = NSynthDataset("/import/c4dm-datasets/nsynth/nsynth-test/")
dl = torch.utils.data.DataLoader(
    ds, batch_size=4, num_workers=16, persistent_workers=True, shuffle=True
)

<IPython.core.display.Javascript object>

In [5]:
trainer = pl.Trainer(
    #     overfit_batches=1,
    gpus=1,
    #     strategy=pl.plugins.DDPSpawnPlugin(find_unused_parameters=False),
)
trainer.fit(model, dl)

GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [3]

  | Name    | Type                    | Params
----------------------------------------------------
0 | model   | NeuralFieldSynth        | 1.6 M 
1 | loss_fn | MultiResolutionSTFTLoss | 0     
----------------------------------------------------
1.6 M     Trainable params
0         Non-trainable params
1.6 M     Total params
6.527     Total estimated model params size (MB)


Training: 0it [00:00, ?it/s]

  rank_zero_warn("Detected KeyboardInterrupt, attempting graceful shutdown...")


<IPython.core.display.Javascript object>

In [6]:
dl = torch.utils.data.DataLoader(
    ds, batch_size=8, num_workers=16, persistent_workers=True, shuffle=True
)
it = iter(dl)

<IPython.core.display.Javascript object>

In [7]:
batch = next(it)

net = model.model.cuda(0).eval()

target = batch["audio"].float().cuda(0)
instrument = batch["instrument"].float().cuda(0)
pitch = batch["pitch"].float().cuda(0)
velocity = batch["velocity"].float().cuda(0)

time = torch.linspace(-1, 1, target.shape[-1], device=target.device)[
..., None
].expand(-1, target.shape[0])

recon = net(time, pitch, velocity, instrument, return_params=True)


<IPython.core.display.Javascript object>

In [8]:
for i in range(target.shape[0]):
    print("item %d" % i)
    r = recon.output[:, i].detach().cpu()

    print(" -- target")
    ipd.display(ipd.Audio(target[i, 0].cpu(), rate=model.model.sample_rate))
    print(" -- recon")
    ipd.display(ipd.Audio(r - r.mean(), rate=model.model.sample_rate))
    print(" ---- wavetable")
    ipd.display(
        ipd.Audio(
            recon.wavetable_signal.cpu().detach()[:, i, 0], rate=model.model.sample_rate
        )
    )
    print(" ---- noise")
    ipd.display(
        ipd.Audio(recon.noise_signal.cpu().detach()[:, i], rate=model.model.sample_rate)
    )

item 0
 -- target


 -- recon
 ---- wavetable


 ---- noise


item 1
 -- target


 -- recon


 ---- wavetable


 ---- noise


item 2
 -- target


 -- recon


 ---- wavetable


 ---- noise


item 3
 -- target


 -- recon


 ---- wavetable


 ---- noise


item 4
 -- target


 -- recon


 ---- wavetable


 ---- noise


item 5
 -- target


 -- recon


 ---- wavetable


 ---- noise


item 6
 -- target


 -- recon


 ---- wavetable


 ---- noise


item 7
 -- target


 -- recon


 ---- wavetable


 ---- noise


<IPython.core.display.Javascript object>

In [9]:
recon.noise_signal.max()

tensor(9.4399e-06, device='cuda:0', grad_fn=<MaxBackward1>)

<IPython.core.display.Javascript object>

In [10]:
batch = next(it)

net = model.model.cuda(0)

target = batch["audio"].float().cuda(0)
instrument = batch["instrument"].float().cuda(0)
pitch = batch["pitch"].float().cuda(0)
velocity = batch["velocity"].float().cuda(0)

time = torch.linspace(-1, 1, target.shape[-1], device=target.device)[..., None].expand(
    -1, target.shape[0]
)

recon = net(time, pitch, velocity, instrument, return_params=True)

RuntimeError: CUDA out of memory. Tried to allocate 1000.00 MiB (GPU 0; 23.69 GiB total capacity; 21.04 GiB already allocated; 465.75 MiB free; 21.14 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation.  See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF

<IPython.core.display.Javascript object>

In [None]:
ipd.Audio(recon.noise_signal[:, 0].detach().cpu(), rate=net.sample_rate)

In [None]:
recon.noise_film_params[0][0, 0][0]

In [None]:
model.model.cuda(0).noise_field(recon.fir_sample_signal, *recon.noise_film_params)[0:2]

In [None]:
plt.stem(recon.impulse_response[100, 0].detach().cpu())