In [68]:
import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf
import random
from glob import glob
import librosa

import soundfile

from tensorflow.keras import layers, losses
from tensorflow.keras.datasets import fashion_mnist
from tensorflow.keras.models import Model


In [60]:
tf.test.is_built_with_cuda()
if len(tf.config.list_physical_devices('GPU'))==0:
    print("Training CPU")
else:
    print("Training GPU")

Training GPU


In [69]:
def reconstruct_audio(filepth="data/train.part1/noisy/25/25_88353_25-88353-0017.npy"):
    mel_spec=np.load(filepth).astype(np.float64)

    sr=16000
    hop_length=2561 
    fmin=20
    fmax=8000

    n_iter=100

    mel_spec = np.exp((mel_spec - 1)*10).T
    y_inv = librosa.feature.inverse.mel_to_audio(M=mel_spec, sr=16000, n_fft=1024, hop_length=256, fmin=20, fmax=8000)
    soundfile.write('inv.flac', y_inv, samplerate=sr)
    # audio-preview

In [12]:
IMG_SHAPE = (224, 224, 3)

In [31]:
class ClassificationDataGen(tf.keras.utils.Sequence):
    def __init__(self, data_folders : list,
                 batch_size,
                 input_size=IMG_SHAPE,
                 shuffle=True):

        self.batch_size = batch_size
        self.input_size = input_size
        self.shuffle = shuffle

        self.file_paths = []
        for data_folder in data_folders:
            self.file_paths += list(zip(glob(f"{data_folder}/noisy/*/*.npy"), glob(f"{data_folder}/clean/*/*.npy")))
            random.shuffle(self.file_paths)

        self.n = len(self.file_paths)

    def on_epoch_end(self):
        if self.shuffle:
            random.shuffle(self.file_paths)

    def __load_image(self, path, target_size):
        image_arr = np.expand_dims(np.load(path), axis=-1)
        image_arr = np.dstack([image_arr for _ in range(target_size[-1])])
        image_arr = tf.image.resize(image_arr,(target_size[0], target_size[1])).numpy()
        return tf.keras.utils.normalize(image_arr)

    def __get_data(self, file_path_batches):
        X_batch = np.asarray([self.__load_image(pth[0], self.input_size) for pth in file_path_batches])
        y_batch = np.asarray([self.__load_image(pth[1], self.input_size) for pth in file_path_batches])
        return X_batch, y_batch

    def __getitem__(self, index):
        file_path_batches = self.file_paths[index * self.batch_size:(index + 1) * self.batch_size]
        X, y = self.__get_data(file_path_batches)        
        return X, y

    def __len__(self):
        return self.n // self.batch_size


In [32]:
traingen = ClassificationDataGen(data_folders=["data/train.part1"], batch_size=8)
valgen = ClassificationDataGen(data_folders=["data/val"], batch_size=8)

In [36]:
class Denoise(Model):
  def __init__(self, input_shape):
    super(Denoise, self).__init__()
    self.encoder = tf.keras.Sequential([
      layers.Input(shape=input_shape),
      layers.Conv2D(16, (3, 3), activation='relu', padding='same', strides=2),
      layers.Conv2D(8, (3, 3), activation='relu', padding='same', strides=2)])

    self.decoder = tf.keras.Sequential([
      layers.Conv2DTranspose(8, kernel_size=3, strides=2, activation='relu', padding='same'),
      layers.Conv2DTranspose(16, kernel_size=3, strides=2, activation='relu', padding='same'),
      layers.Conv2D(1, kernel_size=(3, 3), activation='sigmoid', padding='same')])

  def call(self, x):
    encoded = self.encoder(x)
    decoded = self.decoder(encoded)
    return decoded

autoencoder = Denoise(input_shape=IMG_SHAPE)


In [37]:
autoencoder.compile(optimizer='adam', loss=losses.MeanSquaredError())
history = autoencoder.fit(traingen,
                          epochs=10,
                          shuffle=True,
                          validation_data=valgen)


Epoch 1/10

KeyboardInterrupt: 