In [None]:
%matplotlib inline
%cd ..

In [None]:
from pathlib import Path
import os

import numpy as np
import torch
import librosa
from torch.nn import functional as F
import matplotlib.pyplot as plt
import IPython.display as ipd

from wavenet import model, train, sample, audio, datasets, utils, viz

# Train on Maestro

In [None]:
p = model.HParams(
    embed_inputs=True, 
    n_audio_chans=1, 
    squash_to_mono=True
)

print(dict(p))

In [None]:
tp = train.HParams(
    max_epochs=75, 
    batch_size=12, 
    num_workers=8, 
    learning_rate=0.0044
)

print(dict(tp))

In [None]:
utils.seed(p)
nas_path = Path('/srv/datasets/maestro/maestro-v2.0.0')
ssd_path = Path('/srv/datasets-ssd/maestro/maestro-v2.0.0')
ds_train = datasets.Tracks.from_dir(p, Path(nas_path / '2017'), cache_dir=Path(ssd_path / '2017'))
ds_test = datasets.Tracks.from_dir(p, Path(nas_path / '2018'), cache_dir=Path(ssd_path / '2018'))

In [None]:
utils.seed(p)
m = model.Wavenet(p)
t = train.Trainer(m, ds_train, ds_test, tp, None)

In [None]:
track_i = viz.plot_random_track(ds_train)
track, *_ = ds_train[track_i]
ipd.Audio(audio.mu_expand(track.squeeze().numpy(), p), rate=p.sampling_rate)

In [None]:
utils.seed(p)
t.train()

In [None]:
utils.seed(p)
tracks, logits, g = sample.fast(m, ds_train.transforms, utils.decode_nucleus(), n_samples=32000, batch_size=10)

In [None]:
for track in tracks:
    track = ds_train.transforms.normalise(track.numpy())
    track = audio.mu_expand(track, p)
    ipd.display(ipd.Audio(track, rate=p.sampling_rate))