In [1]:
from google.colab import drive
import librosa
from tqdm import tqdm
import matplotlib.pyplot as plt
import numpy as np
import os

drive.mount('/content/drive', force_remount=False)

DATA_PATH="/content/drive/My Drive/MLFolder/Onlab/Waveform_classical/"

Mounted at /content/drive


In [None]:
full = []

for idx, file in enumerate(tqdm(os.listdir(DATA_PATH), position=0, leave=True)):
    song, sr = librosa.load(os.path.join(DATA_PATH, file), sr=16000)
    for i in song:
        full.append(i)
    if idx == 0:
        fullnp = np.asarray(full)
    else :
        fullnp = np.concatenate((fullnp, np.asarray(full)))
    full = []



In [3]:
print(fullnp.shape)

(397960285,)


In [None]:
np.save("schubert.npy", fullnp)

Parsing the 12 songs with 16 kHz sampling rate takes around 23 minutes. An alternative can be to save it to a binary .npy file, but it takes up 1.48 Gigabytes of space.

In [None]:
scaling_factor = np.max(np.abs(fullnp))

In [4]:
scaled = fullnp / scaling_factor

In [None]:
from keras.utils import to_categorical

def mulaw_encode(samples):
    # Encode to -128..127. Return 0..255.
    return (librosa.mu_compress(samples, quantize=True) + 128).astype('uint8')

def mulaw_decode(samples):
    # Rescale from 0..255 to -128..127. Decode to -1.0..1.0.
    return (librosa.mu_expand(samples.astype('int16') - 128, quantize=True) )

In [None]:
encoded = mulaw_encode(scaled)

In [10]:
from keras.utils import Sequence

SLICE_SIZE = 2048

class MyDatagen(Sequence):
  def __init__(self, list_IDs, batch_size=16, dim=(SLICE_SIZE), shuffle=True, validation=False):
    'Initialization'
    self.dim = dim
    self.batch_size = batch_size
    self.list_IDs = list_IDs
    self.shuffle = shuffle
    self.validation=validation
    self.on_epoch_end()

  def __len__(self):
    return int(np.floor(len(self.list_IDs) / self.batch_size))

  def __getitem__(self, index):
    # Generate indexes of the batch
    indexes = self.indexes[index*self.batch_size:(index+1)*self.batch_size]

    # Find list of IDs
    list_IDs_temp = [self.list_IDs[k] for k in indexes]

    # Generate data
    X, y = self.__data_generation(list_IDs_temp)

    return X, y

  def on_epoch_end(self):
      #Updates indexes after each epoch
    self.indexes = np.arange(len(self.list_IDs))
    if self.shuffle == True:
      np.random.shuffle(self.indexes)

  def __data_generation(self, list_IDs_temp):
    #Generates data containing batch_size samples
    if self.validation:
      X = np.empty((self.batch_size, self.dim))
      y = np.empty((self.batch_size, 1))

      # Generate data
      for i, ID in enumerate(list_IDs_temp):
        X[i] = to_categorical(encoded[ID:ID+SLICE_SIZE])

        y[i] = to_categorical(encoded[ID+SLICE_SIZE])

      return X, y
    else:
      X = np.empty((self.batch_size, self.dim))
      y = np.empty((self.batch_size, 1))

      # Generate data
      for i, ID in enumerate(list_IDs_temp):
        X[i] = scaled[ID:ID+SLICE_SIZE]

        y[i] = scaled[ID+SLICE_SIZE]

      return X, y

In [23]:
from keras.layers import Conv1D, Flatten, Dense, Input, Activation, Add, Multiply
from keras.models import Model

# This code served as inspiration:
# https://github.com/usernaamee/keras-wavenet
def wavenet_residual_block(filters, kernel_size, dilation_rate):
    def f(input_):
        conv = Conv1D(filters, kernel_size, 
                      dilation_rate=dilation_rate,
                      padding='same')(input_)
        tanh = Activation('tanh')(conv)
        sigmoid = Activation('sigmoid')(conv)
        merged = Multiply()([tanh, sigmoid])
        out = Conv1D(1, 1, padding='same')(merged)
        residual_out = Add()([out, input_])
        skip_out = Activation('relu')(out)
        return residual_out, skip_out
    return f

def wavenet_convolutional_layers(filters, kernel_size, depth):
    def f(input_):
        residual_out = input_
        skip_connections = []
        for i in range(1, depth+1):
            dilation_rate = 2**(i % 9)
            residual_out, skip_out = wavenet_residual_block(
                filters, kernel_size, dilation_rate)(residual_out)
            skip_connections.append(skip_out)
        sum_ = Add()(skip_connections)
        act = Activation('relu')(sum_)
        return act
    return f

def wavenet(input_size):
    input_ = Input(shape=(input_size, 1))
    net = wavenet_convolutional_layers(128, 3, 30)(input_)
    net = Conv1D(2048, 3, padding='same')(net)
    net = Conv1D(256, 3, padding='same')(net)
    net = Flatten()(net)
    net = Dense(1)(net)
    model = Model(input_, net)
    model.compile(loss='MAE', optimizer='adam')
    return model


model = wavenet(SLICE_SIZE)

In [None]:
model.summary()

In [17]:
print(scaled.shape[0]/16)

24872517.8125


In [19]:
print(24872517 - 4872517)

20000000


In [24]:
training_gen=MyDatagen(range(200000))
val_gen=MyDatagen(range(200000, 248725), validation=True)

model.fit(x=training_gen, validation_data=val_gen, epochs=100)

Epoch 1/100
Epoch 2/100
 1150/12500 [=>............................] - ETA: 47:35 - loss: 0.0015

KeyboardInterrupt: ignored

In [81]:
test_gen = MyDatagen(range(20000000, 24872517), validation=True)

model.evaluate(test_gen)

   223/304532 [..............................] - ETA: 7:19:24 - loss: 0.0127

KeyboardInterrupt: ignored

In [72]:
def make_prediction(model, starting_slice, prediction_length, scaling_factor=1.0):
    pattern = np.asarray(starting_slice)
    pattern = np.atleast_3d(pattern)

    res = []

    for i in tqdm(range(prediction_length), position=0, leave=True):
        nextvalue = model.predict(pattern, batch_size=1)
        pattern = np.append(pattern, nextvalue)
        pattern = pattern[1:len(pattern)]
        pattern = np.atleast_3d(pattern)
        res.append(nextvalue)

    return np.asarray(res) * scaling_factor

In [73]:
SR = 16384
SECONDS = 2
PRED_LEN = SR*SECONDS

predicted = make_prediction(model, scaled[30000000:30000000+SLICE_SIZE], PRED_LEN, np.max(fullnp))

100%|██████████| 32768/32768 [22:00<00:00, 24.82it/s]


In [74]:
print(predicted.shape)

(32768, 1, 1)


In [77]:
print(np.min(predicted))

-52.607742


In [75]:
from IPython.lib.display import Audio

Audio(np.squeeze(predicted), rate=2*8192, autoplay=False)

In [None]:
!pip install simpleaudio

In [49]:
import simpleaudio as sa
Itt j
audio = predicted * (2**15 - 1) / np.max(np.abs(predicted))
audio = audio.astype(np.int16)

In [None]:
!pip install wavio

In [57]:
import wavio

wavio.write("myfile.wav", audio, 16000, sampwidth=2)

In [58]:
wavio.write("music1.wav", predicted, 16000, sampwidth=2)

In [None]:
!pip install --upgrade pip setuptools

In [None]:
!sudo apt-get install -y python3-dev libasound2-dev

In [None]:
play_obj = sa.play_buffer(audio, 1, 2, 16000)
play_obj.wait_done()

In [None]:
import gc

gc.collect()