In [None]:
!pip install librosa==0.7.2

Collecting librosa==0.7.2
[?25l  Downloading https://files.pythonhosted.org/packages/77/b5/1817862d64a7c231afd15419d8418ae1f000742cac275e85c74b219cbccb/librosa-0.7.2.tar.gz (1.6MB)
[K     |████████████████████████████████| 1.6MB 2.8MB/s 
Collecting soundfile>=0.9.0
  Downloading https://files.pythonhosted.org/packages/eb/f2/3cbbbf3b96fb9fa91582c438b574cff3f45b29c772f94c400e2c99ef5db9/SoundFile-0.10.3.post1-py2.py3-none-any.whl
Building wheels for collected packages: librosa
  Building wheel for librosa (setup.py) ... [?25l[?25hdone
  Created wheel for librosa: filename=librosa-0.7.2-cp36-none-any.whl size=1612885 sha256=ecf5c8fbf28022b1859330e8b6b8691cd6247f2d82a718123128000f33813441
  Stored in directory: /root/.cache/pip/wheels/4c/6e/d7/bb93911540d2d1e44d690a1561871e5b6af82b69e80938abef
Successfully built librosa
Installing collected packages: soundfile, librosa
  Found existing installation: librosa 0.6.3
    Uninstalling librosa-0.6.3:
      Successfully uninstalled librosa-0.6

In [None]:
import os
import time
import glob
from typing import Tuple

import numpy as np
import tensorflow as tf
from tensorflow.keras.layers import Input, Dense, \
  Conv2D, Conv2DTranspose, UpSampling2D, \
  LeakyReLU, ZeroPadding2D, Cropping2D, MaxPooling2D, \
  BatchNormalization, Concatenate
from tensorflow.keras.models import Model

import librosa
import librosa.display
import librosa.feature
import librosa.output

import matplotlib
import matplotlib.pyplot as plt

from google.colab import drive
from IPython.display import clear_output

In [None]:
drive.mount('/gdrive/')

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3aietf%3awg%3aoauth%3a2.0%3aoob&response_type=code&scope=email%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdocs.test%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive.photos.readonly%20https%3a%2f%2fwww.googleapis.com%2fauth%2fpeopleapi.readonly

Enter your authorization code:
··········
Mounted at /gdrive/


In [None]:
!ls '/gdrive/My Drive/bosch/processed/pop' | head -10

00Mb3DuaIH1kjrwOku9CGU.npy
02tvc9CFnTyHuSRlGeNv9w.npy
03mMSLEJCPoGJwQhHpN5y0.npy
04ZTP5KsCypmtCmQg5tH9R.npy
06scTb0zbkxYNgpAB3J9fN.npy
07Oz5StQ7GRoygNLaXs2pd.npy
08bNPGLD8AhKpnnERrAc6G.npy
08tq1XcHwVt5PHXUo87i0D.npy
09PGubKAMryhOWv1LHpCYz.npy
0afhq8XCExXpqazXczTSve.npy


In [None]:
!ls '/gdrive/My Drive/bosch/processed-instr/pop' | head -10

00Mb3DuaIH1kjrwOku9CGU.npy
02tvc9CFnTyHuSRlGeNv9w.npy
03mMSLEJCPoGJwQhHpN5y0.npy
04ZTP5KsCypmtCmQg5tH9R.npy
06scTb0zbkxYNgpAB3J9fN.npy
07Oz5StQ7GRoygNLaXs2pd.npy
08bNPGLD8AhKpnnERrAc6G.npy
08tq1XcHwVt5PHXUo87i0D.npy
09PGubKAMryhOWv1LHpCYz.npy
0afhq8XCExXpqazXczTSve.npy


In [None]:
H = 128
W = 1290
C = 1
SAMPLES = 15
SR = 22050
BATCH_SIZE = 64

# Reference values to normalize data
min_level_db=-100
ref_level_db=20

ROOT_DIR = '/gdrive/My Drive/bosch/'
WEIGHTS_PATH = '/gdrive/My Drive/bosch/devoicing'
GENRES = ['pop', 'blues', 'rockabilly', 'hip-hop']

assert W % SAMPLES == 0

# Dataset preparation

In [None]:
def normalize(S):
  return np.clip((((S - min_level_db) / -min_level_db)*2.) - 1., -1, 1)

def denormalize(S):
  return (((np.clip(S, -1, 1) + 1.) / 2.) * -min_level_db) + min_level_db

def prep(S: np.array):
  S_db = librosa.power_to_db(S) - ref_level_db
  return normalize(S_db)

# def deprep(S):
#   S = denormalize(S) + ref_level_db
#   S = librosa.db_to_power(S)
#   wv = GRAD(np.expand_dims(S,0), melspecfunc, maxiter=2000, evaiter=10, tol=1e-8)
#   return np.array(np.squeeze(wv))

In [None]:
filenames = [
  (f'{ROOT_DIR}/processed/{gen}/{full_fn}', f'{ROOT_DIR}/processed-instr/{gen}/{instr_fn}') 
  for gen in GENRES 
  for (full_fn, instr_fn) in zip(os.listdir(f'{ROOT_DIR}/processed/{gen}')[:100], os.listdir(f'{ROOT_DIR}/processed/{gen}')[:100])
]

# for fns in filenames:
#   print(fns)

dataset = [
  (np.expand_dims(np.load(full_fn)[:, :1290], axis=2), np.expand_dims(np.load(instr_fn)[:, :1290], axis=2))
  for (full_fn, instr_fn) in filenames
]

dataset = [
  (s_full, s_instr) for row in dataset 
  for (s_full, s_instr) in zip(np.split(row[0], SAMPLES, axis=1), np.split(row[1], SAMPLES, axis=1))
]

dataset = [(prep(s_full), prep(s_instr)) for (s_full, s_instr) in dataset]

assert dataset[0][0].shape == (128, W // SAMPLES, 1)

dataset = tf.data.Dataset.from_tensor_slices(dataset).repeat(20).shuffle(100000).batch(BATCH_SIZE, drop_remainder=True)

In [None]:
NUM_BATCHES = tf.data.experimental.cardinality(dataset).numpy()

train_batches = 0.8 * NUM_BATCHES
train_ds = dataset.take(train_batches)

valid_test_ds = dataset.skip(train_batches)
valid_batches = 0.1 * NUM_BATCHES
valid_ds = valid_test_ds.take(valid_batches)
test_ds = valid_test_ds.skip(valid_batches)

print('TOTAL', NUM_BATCHES)
print('VALID', tf.data.experimental.cardinality(valid_ds).numpy())
print('TEST', tf.data.experimental.cardinality(test_ds).numpy())

TOTAL 1875
VALID 187
TEST 188


# Model

In [None]:
init = tf.keras.initializers.he_uniform()

def conv2d(layer_input, filters, kernel_size=4, strides=2, padding='same', leaky=True, bnorm=True):
  if leaky:
    Activ = LeakyReLU(alpha=0.2)
  else:
    Activ = ReLU()
  d = Conv2D(filters, kernel_size=kernel_size, strides=strides, padding=padding, kernel_initializer=init, use_bias=False)(layer_input)
  if bnorm:
    d = BatchNormalization()(d)
  d = Activ(d)
  return d

def deconv2d(layer_input, layer_res, filters, kernel_size=4, conc=True, scalev=False, bnorm=True, up=True, padding='same', strides=2):
  if up:
    u = UpSampling2D((1,2))(layer_input)
    u = Conv2D(filters, kernel_size, strides=(1,1), kernel_initializer=init, use_bias=False, padding=padding)(u)
  else:
    u = Conv2DTranspose(filters, kernel_size, strides=strides, kernel_initializer=init, use_bias=False, padding=padding)(layer_input)
  if bnorm:
    u = BatchNormalization()(u)
  u = LeakyReLU(alpha=0.2)(u)
  if conc:
    u = Concatenate()([u,layer_res])
  return u

In [None]:
#U-NET style architecture
def build_model(input_shape):
  h, w, c = input_shape
  inp = Input(shape=input_shape)
  #downscaling
  g0 = tf.keras.layers.ZeroPadding2D((0,1))(inp)
  print('G0', g0.shape)
  g1 = conv2d(g0, 256, kernel_size=(h,3), strides=1, padding='valid')
  print('G1', g1.shape)
  g1_p = tf.keras.layers.ZeroPadding2D((0,1))(g1)
  g2 = conv2d(g1_p, 256, kernel_size=(1,9), strides=(1,2))
  print('G2', g2.shape)
  g3 = conv2d(g2, 256, kernel_size=(1,7), strides=(1,2))
  print('G3', g3.shape)
  #upscaling
  g4 = deconv2d(g3, g2, 256, kernel_size=(1,7), strides=(1,2))
  print('G4', g4.shape)
  g5 = deconv2d(g4, tf.keras.layers.ZeroPadding2D((0,1))(g1), 256, kernel_size=(1,9), strides=(1,2), bnorm=False)
  print('G5', g5.shape)
  g6 = Conv2DTranspose(1, kernel_size=(h,1), strides=(1,1), activation='tanh', kernel_initializer=init, padding='valid')(g5)
  print('G6', g6.shape)
  g7 = tf.keras.layers.Cropping2D(cropping=((0, 0), (1, 1)))(g6)
  print('G7', g7.shape)
  return Model(inp, g7, name='G')

In [None]:
LOAD_WEIGHTS = True

print(W // SAMPLES)
model = build_model((H, W // SAMPLES, C))

if LOAD_WEIGHTS == True and len(glob.glob(f'{WEIGHTS_PATH}/*.h5')) != 0:
  print('LOADING WEIGHTS..')
  model.load_weights(f'{WEIGHTS_PATH}/model.h5')

model.summary()

86
G0 (None, 128, 88, 1)
G1 (None, 1, 86, 256)
G2 (None, 1, 44, 256)
G3 (None, 1, 22, 256)
G4 (None, 1, 44, 512)
G5 (None, 1, 88, 512)
G6 (None, 128, 88, 1)
G7 (None, 128, 86, 1)
LOADING WEIGHTS..
Model: "G"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            [(None, 128, 86, 1)] 0                                            
__________________________________________________________________________________________________
zero_padding2d (ZeroPadding2D)  (None, 128, 88, 1)   0           input_1[0][0]                    
__________________________________________________________________________________________________
conv2d (Conv2D)                 (None, 1, 86, 256)   98304       zero_padding2d[0][0]             
__________________________________________________________________________________________________


# Training

In [None]:
def mse(x, y):
  """Calculate L2 error between given vectors."""
  return tf.reduce_mean((x-y)**2)

In [None]:
EPOCHS = 10
LR = 0.0002
REPORT_STEP = 100

In [None]:
optimizer = tf.keras.optimizers.Adam(LR, 0.5)

In [None]:
# train_loss = []
# valid_loss = []

# for epoch in range(EPOCHS):
#   start_time = time.time()
#   epoch_loss = []  # Use it for collecting training losses

#   # Training
#   for batch_idx, batch in enumerate(train_ds):

#     with tf.GradientTape() as tape:
#       devocal_tensor = model(batch[:, 0, :, :, :])  # Input the full spectograms
#       loss = mse(devocal_tensor, batch[:, 1, :, :, :])  # Compare output with instrumental spectograms
#       gradient = tape.gradient(loss, model.trainable_variables)
#       optimizer.apply_gradients(zip(gradient, model.trainable_variables))
    
#     epoch_loss.append((epoch, loss))
#     if (batch_idx + 1) % REPORT_STEP == 0:
#       print(f'[Epoch {epoch}/{EPOCHS}] [Batch {batch_idx+1}/ {NUM_BATCHES}] ', end='')
#       print(f'[Loss {np.mean(epoch_loss[-REPORT_STEP:])}]', end='')
#       print(f' Time/Batch {(time.time()-start_time)/(batch_idx + 1)}')
#       print('WEIGHTS SAVED..')
#       model.save_weights(f'{WEIGHTS_PATH}/model.h5')

#   train_loss.append(np.mean(epoch_loss))

#   # Validation
#   epoch_loss = []  # Reuse the array for collecting validation losses
#   for batch_idx, batch in enumerate(valid_ds):

#     full_spec = batch[:, 0, :, :, :]
#     instr_spec = batch[:, 1, :, :, :]

#     devocal_tensor = model(full_spec)
#     loss = mse(devocal_tensor, instr_spec)
#     epoch_loss.append(loss)

#   clear_output(wait=True)
#   valid_loss.append(np.mean(epoch_loss))
#   for loss in valid_loss:
#     print(loss)