In [1]:
import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf
import random
from glob import glob
import librosa

import soundfile

from tensorflow.keras import layers, losses
from tensorflow.keras.datasets import fashion_mnist

from tensorflow.keras.layers import Conv2D, Input, LeakyReLU, Flatten, Dense, Reshape, Conv2DTranspose, BatchNormalization, Activation
from tensorflow.keras import Model, Sequential


In [2]:
tf.test.is_built_with_cuda()
if len(tf.config.list_physical_devices('GPU'))==0:
    print("Training CPU")
else:
    print("Training GPU")

Training GPU


In [3]:
def load_mel(filepth="data/train.part1/noisy/25/25_88353_25-88353-0017.npy"):
    mel_spec=np.load(filepth).astype(np.float64)
    return mel_spec


def reconstruct_audio_from_mel(mel_spec, out='rec.flac'):
    sr=16000
    hop_length=2561 
    fmin=20
    fmax=8000

    mel_spec = np.exp((mel_spec - 1)*10).T
    y_inv = librosa.feature.inverse.mel_to_audio(M=mel_spec, sr=16000, n_fft=1024, hop_length=256, fmin=20, fmax=8000)
    soundfile.write(out, y_inv, samplerate=sr)


def show_mel_spectra(img_pth="data/train.part1/clean/31/31_121969_31-121969-0000.npy"):
    plt.figure(figsize=(20,6))
    mel_img=np.load(img_pth)
    mel_img = (mel_img-mel_img.mean()) / mel_img.std()
    plt.imshow(mel_img.astype(np.float64).T)
    print(mel_img.mean())

# Training

In [4]:
numFeatures = 80 # размер скользящего окна
numSegments = 8 # кочличество фурье-веторов для авторегрессии 

In [5]:
class DenoisingDataGen(tf.keras.utils.Sequence):

    def __init__(self, data_folders : list,
                 batch_size,
                 numSegments=numSegments,
                 shuffle=True):

        self.batch_size = batch_size
        self.numSegments = numSegments
        self.shuffle = shuffle

        self.file_paths = []
        for data_folder in data_folders:
            self.file_paths += list(zip(glob(f"{data_folder}/noisy/*/*.npy"), glob(f"{data_folder}/clean/*/*.npy")))
            random.shuffle(self.file_paths)

        self.n = len(self.file_paths)

    def on_epoch_end(self):
        if self.shuffle:
            random.shuffle(self.file_paths)

    def __load_noisy(self, path):
        mel_image = np.load(path)[0:numSegments]
        mel_image = mel_image.T
        mel_image = np.expand_dims(mel_image, axis=-1)
        return tf.convert_to_tensor(mel_image)

    def __load_clear(self, path):
        mel_image = np.load(path)[numSegments]
        mel_image = mel_image.T
        mel_image = np.expand_dims(mel_image, axis=-1)
        mel_image = np.expand_dims(mel_image, axis=-1)
        return tf.convert_to_tensor(mel_image)

    def __get_data(self, file_path_batches):
        X_batch = []
        y_batch = []

        X_batch.extend([self.__load_noisy(pth[0]) for pth in file_path_batches])
        y_batch.extend([self.__load_clear(pth[1]) for pth in file_path_batches])

        X_batch = np.array(X_batch)
        y_batch = np.array(y_batch)

        return X_batch, y_batch

    def __getitem__(self, index):
        file_path_batches = self.file_paths[index * self.batch_size:(index + 1) * self.batch_size]
        X, y = self.__get_data(file_path_batches)        
        return X, y

    def __len__(self):
        return self.n // self.batch_size


In [6]:
traingen = DenoisingDataGen(data_folders=["data/train.part1"], batch_size=3)
valgen = DenoisingDataGen(data_folders=["data/val"], batch_size=3)

In [7]:
traingen[0][1].shape

(3, 80, 1, 1)

In [18]:
def build_model(l2_strength):
  inputs = Input(shape=[numFeatures, numSegments, 1])
  x = inputs

  # 1 -----
  x = tf.keras.layers.ZeroPadding2D(((4,4), (0,0)))(x)
  x = Conv2D(filters=18, kernel_size=[9,8], strides=[1, 1], padding='valid', use_bias=False,
             kernel_regularizer=tf.keras.regularizers.l2(l2_strength))(x)
  x = Activation('relu')(x)
  x = BatchNormalization()(x)

  skip0 = Conv2D(filters=30, kernel_size=[5,1], strides=[1, 1], padding='same', use_bias=False,
                 kernel_regularizer=tf.keras.regularizers.l2(l2_strength))(x)
  x = Activation('relu')(skip0)
  x = BatchNormalization()(x)

  x = Conv2D(filters=8, kernel_size=[9,1], strides=[1, 1], padding='same', use_bias=False,
              kernel_regularizer=tf.keras.regularizers.l2(l2_strength))(x)
  x = Activation('relu')(x)
  x = BatchNormalization()(x)

  # 2 -----
  x = Conv2D(filters=18, kernel_size=[9,1], strides=[1, 1], padding='same', use_bias=False,
              kernel_regularizer=tf.keras.regularizers.l2(l2_strength))(x)
  x = Activation('relu')(x)
  x = BatchNormalization()(x)

  skip1 = Conv2D(filters=30, kernel_size=[5,1], strides=[1, 1], padding='same', use_bias=False,
                 kernel_regularizer=tf.keras.regularizers.l2(l2_strength))(x)
  x = Activation('relu')(skip1)
  x = BatchNormalization()(x)

  x = Conv2D(filters=8, kernel_size=[9,1], strides=[1, 1], padding='same', use_bias=False,
              kernel_regularizer=tf.keras.regularizers.l2(l2_strength))(x)
  x = Activation('relu')(x)
  x = BatchNormalization()(x)

  # 3 ----
  x = Conv2D(filters=18, kernel_size=[9,1], strides=[1, 1], padding='same', use_bias=False,
              kernel_regularizer=tf.keras.regularizers.l2(l2_strength))(x)
  x = Activation('relu')(x)
  x = BatchNormalization()(x)
  
  x = Conv2D(filters=30, kernel_size=[5,1], strides=[1, 1], padding='same', use_bias=False,
              kernel_regularizer=tf.keras.regularizers.l2(l2_strength))(x)
  x = Activation('relu')(x)
  x = BatchNormalization()(x)

  x = Conv2D(filters=8, kernel_size=[9,1], strides=[1, 1], padding='same', use_bias=False,
              kernel_regularizer=tf.keras.regularizers.l2(l2_strength))(x)
  x = Activation('relu')(x)
  x = BatchNormalization()(x)

  # 4 ----
  x = Conv2D(filters=18, kernel_size=[9,1], strides=[1, 1], padding='same', use_bias=False,
              kernel_regularizer=tf.keras.regularizers.l2(l2_strength))(x)
  x = Activation('relu')(x)
  x = BatchNormalization()(x)

  x = Conv2D(filters=30, kernel_size=[5,1], strides=[1, 1], padding='same', use_bias=False,
             kernel_regularizer=tf.keras.regularizers.l2(l2_strength))(x)
  x = x + skip1
  x = Activation('relu')(x)
  x = BatchNormalization()(x)

  x = Conv2D(filters=8, kernel_size=[9,1], strides=[1, 1], padding='same', use_bias=False,
              kernel_regularizer=tf.keras.regularizers.l2(l2_strength))(x)
  x = Activation('relu')(x)
  x = BatchNormalization()(x)

  # 5 ----
  x = Conv2D(filters=18, kernel_size=[9,1], strides=[1, 1], padding='same', use_bias=False,
              kernel_regularizer=tf.keras.regularizers.l2(l2_strength))(x)
  x = Activation('relu')(x)
  x = BatchNormalization()(x)

  x = Conv2D(filters=30, kernel_size=[5,1], strides=[1, 1], padding='same', use_bias=False,
             kernel_regularizer=tf.keras.regularizers.l2(l2_strength))(x)
  x = x + skip0
  x = Activation('relu')(x)
  x = BatchNormalization()(x)

  x = Conv2D(filters=8, kernel_size=[9,1], strides=[1, 1], padding='same', use_bias=False,
              kernel_regularizer=tf.keras.regularizers.l2(l2_strength))(x)
  x = Activation('relu')(x)
  x = BatchNormalization()(x)

  # 6 ----
  x = tf.keras.layers.SpatialDropout2D(0.2)(x)
  x = Conv2D(filters=1, kernel_size=[129,1], strides=[1, 1], padding='same')(x)

  model = Model(inputs=inputs, outputs=x)

  optimizer = tf.keras.optimizers.Adam(3e-4)
  #optimizer = RAdam(total_steps=10000, warmup_proportion=0.1, min_lr=3e-4)

  model.compile(optimizer=optimizer, loss='mse', 
                metrics=[tf.keras.metrics.RootMeanSquaredError('rmse')])
  return model

In [19]:
model = build_model(l2_strength=0.0)

In [20]:
model.fit(traingen,
          validation_data=valgen,
          epochs=3
        )

Epoch 1/3

KeyboardInterrupt: 

In [14]:
model.summary()

Model: "model_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_2 (InputLayer)        [(None, 80, 8, 1)]        0         
                                                                 
 zero_padding2d_1 (ZeroPaddi  (None, 88, 8, 1)         0         
 ng2D)                                                           
                                                                 
 conv2d_1 (Conv2D)           (None, 80, 1, 18)         1296      
                                                                 
 activation_1 (Activation)   (None, 80, 1, 18)         0         
                                                                 
 batch_normalization (BatchN  (None, 80, 1, 18)        72        
 ormalization)                                                   
                                                                 
Total params: 1,368
Trainable params: 1,332
Non-trainable p