In [1]:
''' imports '''

# set auto reload imported modules
%load_ext autoreload
%autoreload 2

# general imports
import os, shutil

# numpy for array handling
import numpy as np

# import pytorch core libs
import torch

# write audio to file
from librosa.output import write_wav


''' sample-rnn components '''
# add sample-rnn libs directory to path
import sys

sys.path.append('../libs/samplernn/')

# import core sample-rnn model (inc. frame-lvl rnn and sample-lvl mlp)
from model import SampleRNN
from model import Predictor
from model import Generator

# wrapper for optimiser
from optim import gradient_clipping

# training criterion
from nn import sequence_nll_loss_bits

# import audio dataset management
from dataset import FolderDataset
from dataset import DataLoader


In [2]:
''' initialise models components '''
# "new" model parameters
#_frame_sizes = (4, 4, 8, 8)
#_n_rnn = 2
#_dim = 1024
#_learn_h0 = False
#_q_levels = 256 # 8 bit depth
#_weight_norm = Tru

# model parameters
_frame_sizes = (4, 4, 4)
_n_rnn = 1
_dim = 1024
_learn_h0 = True
_q_levels = 256 # 8 bit depth
_weight_norm = True

# initialise sample-rnn model
model = SampleRNN(
    frame_sizes = _frame_sizes,
    n_rnn = _n_rnn,
    dim = _dim,
    learn_h0 = _learn_h0,
    q_levels = _q_levels,
    weight_norm = _weight_norm
)

# intitialise predictor model
predictor = Predictor(model)

  init(chunk)


In [3]:
generator = Generator(model)

In [4]:
''' push to device '''

# get computing device
device = ("cuda" if torch.cuda.is_available() else "cpu")

        # step opt
# push models to device
model = model.to(device)
predictor = predictor.to(device)


In [6]:
''' init optimiser '''

# get model parameters
params = predictor.parameters()


# initialise optimiser
optimizer = gradient_clipping( torch.optim.Adam(params, lr = 1e-4))
#optimizer = torch.optim.Adam(params)


In [7]:

        # step opt''' initialise dataset and dataloader '''

# define dataset
_datasets_path = '../data/'
_dataset = 'vox'
_path = os.path.join(_datasets_path, _dataset)


# get number frame samples of final frame-level rnn in model
_overlap_len = model.lookback

_seq_len = 2048
_batch_size = 64

_train_frac = 1

# initialise dataset
train_dataset = FolderDataset(
    _path,
    _overlap_len,
    _q_levels,
    0,
    _train_frac,

)

# intitialise dataloader
train_data_loader = DataLoader(
    train_dataset,
    batch_size = _batch_size,
    seq_len = _seq_len,
    overlap_len = _overlap_len,
    
    shuffle = True,
    num_workers = 4,
    drop_last = True,
)


In [18]:
''' training loop '''

# set training epochs
epochs = 5

# perform training model over epochs, iterate over range epoch limit
for _epoch in range(epochs):

    #print('epoch: ', _epoch)
    
    ## model training, given dataset compute loss and update model parameters
    
    # set model to training mode (gradients stored)
    predictor.train()
    
    # iterate over dataset
    for (_iteration, data) in enumerate(train_data_loader):

        #print('iteration: ', _iteration)
        
        # step opt
        # zero gradients and step optimiser
        optimizer.zero_grad()

        # unpack dataset
        batch_inputs = data[0].to(device)
        batch_target = data[-1].to(device)
        
        # reevaluate the function multiple times; clear the gradients, compute and return loss
        def closure():

            # pass inputs through model, return output
            batch_output = predictor(batch_inputs, reset = data[1])
            #batch_output = predictor(batch_inputs, reset = False)

            # calculate loss for inputs to outputs
            loss = sequence_nll_loss_bits(batch_output, batch_target)

            #print(loss.item())

            # calculate gradients and return loss`
            loss.backward()

            return loss

        try:
            # step optimiser with closure
            optimizer.step(closure)
            
        except:
            pass




IsADirectoryError: Caught IsADirectoryError in DataLoader worker process 1.
Original Traceback (most recent call last):
  File "/home/brendan/.local/share/virtualenvs/eurovision-ai-vARDGvzQ/lib/python3.8/site-packages/librosa/core/audio.py", line 129, in load
    with sf.SoundFile(path) as sf_desc:
  File "/home/brendan/.local/share/virtualenvs/eurovision-ai-vARDGvzQ/lib/python3.8/site-packages/soundfile.py", line 629, in __init__
    self._file = self._open(file, mode_int, closefd)
  File "/home/brendan/.local/share/virtualenvs/eurovision-ai-vARDGvzQ/lib/python3.8/site-packages/soundfile.py", line 1183, in _open
    _error_check(_snd.sf_error(file_ptr),
  File "/home/brendan/.local/share/virtualenvs/eurovision-ai-vARDGvzQ/lib/python3.8/site-packages/soundfile.py", line 1357, in _error_check
    raise RuntimeError(prefix + _ffi.string(err_str).decode('utf-8', 'replace'))
RuntimeError: Error opening '../data/vox/output': File contains data in an unknown format.

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/home/brendan/.local/share/virtualenvs/eurovision-ai-vARDGvzQ/lib/python3.8/site-packages/torch/utils/data/_utils/worker.py", line 178, in _worker_loop
    data = fetcher.fetch(index)
  File "/home/brendan/.local/share/virtualenvs/eurovision-ai-vARDGvzQ/lib/python3.8/site-packages/torch/utils/data/_utils/fetch.py", line 44, in fetch
    data = [self.dataset[idx] for idx in possibly_batched_index]
  File "/home/brendan/.local/share/virtualenvs/eurovision-ai-vARDGvzQ/lib/python3.8/site-packages/torch/utils/data/_utils/fetch.py", line 44, in <listcomp>
    data = [self.dataset[idx] for idx in possibly_batched_index]
  File "../libs/samplernn/dataset.py", line 34, in __getitem__
    (seq, _) = load(self.file_names[index], sr = None, mono = True)
  File "/home/brendan/.local/share/virtualenvs/eurovision-ai-vARDGvzQ/lib/python3.8/site-packages/librosa/core/audio.py", line 162, in load
    y, sr_native = __audioread_load(path, offset, duration, dtype)
  File "/home/brendan/.local/share/virtualenvs/eurovision-ai-vARDGvzQ/lib/python3.8/site-packages/librosa/core/audio.py", line 186, in __audioread_load
    with audioread.audio_open(path) as input_file:
  File "/home/brendan/.local/share/virtualenvs/eurovision-ai-vARDGvzQ/lib/python3.8/site-packages/audioread/__init__.py", line 111, in audio_open
    return BackendClass(path)
  File "/home/brendan/.local/share/virtualenvs/eurovision-ai-vARDGvzQ/lib/python3.8/site-packages/audioread/rawread.py", line 62, in __init__
    self._fh = open(filename, 'rb')
IsADirectoryError: [Errno 21] Is a directory: '../data/vox/output'


In [19]:
''' perform sample generation '''

# define datasethttps://www.youtube.com/watch?v=t8WEIKBUSAw
_output_path = '../data/vox/output-2'

#_sample_rate = 44000
_sample_rate = 16000
_n_samples = 1
_sample_length = int(_sample_rate * 10)

# intiialise generator

samples = generator(_n_samples, _sample_length).cpu().float().numpy()

for i in range(_n_samples):
    write_wav(
        os.path.join(_output_path, 'vox_euro_04_{}.wav'.format(i)),
        samples[i, :], sr = _sample_rate, norm = True)
    

In [15]:
''' save checkpoint '''

torch.save(model.state_dict(), '../data/vox/output-2/chkpt-03')


In [11]:
''' load checkpoint '''

#_state_dict = torch.load('../data/chkpt')
_state_dict = torch.load('../data/vox/output/chkpt-new-05')

model.load_state_dict(_state_dict)
    

<All keys matched successfully>