# Notebook to make DCUnet code run

### Data preparation from MS-SNSD dataset

In [99]:
import os
import shutil
import numpy as np
import pandas as pd

In [16]:
files_dir = '../datasets/raw/NoisySpeech_training'

In [17]:
files_list = os.listdir(files_dir) 

In [18]:
files_list

['clnsp244.wav',
 'clnsp268.wav',
 'clnsp197.wav',
 'clnsp32.wav',
 'clnsp182.wav',
 'clnsp395.wav',
 'clnsp233.wav',
 'clnsp125.wav',
 'clnsp402.wav',
 'clnsp537.wav',
 'clnsp223.wav',
 'clnsp6.wav',
 'clnsp262.wav',
 'clnsp376.wav',
 'clnsp110.wav',
 'clnsp494.wav',
 'clnsp292.wav',
 'clnsp411.wav',
 'clnsp578.wav',
 'clnsp55.wav',
 'clnsp589.wav',
 'clnsp308.wav',
 'clnsp599.wav',
 'clnsp335.wav',
 'clnsp344.wav',
 'clnsp248.wav',
 'clnsp358.wav',
 'clnsp81.wav',
 'clnsp593.wav',
 'clnsp112.wav',
 'clnsp192.wav',
 'clnsp270.wav',
 'clnsp387.wav',
 'clnsp78.wav',
 'clnsp564.wav',
 'clnsp251.wav',
 'clnsp531.wav',
 'clnsp169.wav',
 'clnsp468.wav',
 'clnsp40.wav',
 'clnsp240.wav',
 'clnsp273.wav',
 'clnsp289.wav',
 'clnsp134.wav',
 'clnsp437.wav',
 'clnsp271.wav',
 'clnsp318.wav',
 'clnsp542.wav',
 'clnsp394.wav',
 'clnsp333.wav',
 'clnsp178.wav',
 'clnsp95.wav',
 'clnsp555.wav',
 'clnsp69.wav',
 'clnsp521.wav',
 'clnsp73.wav',
 'clnsp356.wav',
 'clnsp386.wav',
 'clnsp206.wav',
 'clnsp

In [19]:
# rename in the way to drop the prefix

In [20]:
for count, filename in enumerate(os.listdir(files_dir)):
    os.rename(files_dir + '/' + filename, files_dir + '/' +  filename.split('_')[-1])

# Fix-length training samples off-line creation

In [270]:
def get_file_list_from_dir(datadir):
    all_files = os.listdir(os.path.abspath(datadir))
    data_files = list(filter(lambda file: file.endswith('.wav'), all_files))
    return data_files

In [271]:
noisy_raw_recordings_dir = '../datasets/raw/NoisySpeech_training/'
clean_raw_recordings_dir = '../datasets/raw/CleanSpeech_training/'

In [272]:
noisy_preprocessed_samples_dir = '../datasets/preprocessed/noisy/'
clean_preprocessed_samples_dir = '../datasets/preprocessed/clean/'

In [273]:
os.makedirs(noisy_preprocessed_samples_dir, exist_ok=True)
os.makedirs(clean_preprocessed_samples_dir, exist_ok=True )

In [274]:
def get_cut_padding(y, required_length, random_state, deterministic=False):

    if isinstance(y, list):
        audio_length = y[0].shape[-1]
    else:
        audio_length = y.shape[-1]

    if audio_length < required_length:
        if deterministic:
            pad_left = 0
        else:
            pad_left = random_state.randint(required_length - audio_length + 1)  # 0 ~ 50 random
        pad_right = required_length - audio_length - pad_left  # 50~ 0

        if isinstance(y, list):
            for i in range(len(y)):
                y[i] = F.pad(y[i], (pad_left, pad_right))
            audio_length = y[0].shape[-1]
        else:
            y = F.pad(y, (pad_left, pad_right))
            audio_length = y.shape[-1]

    if deterministic:
        audio_begin = 0
    else:
        audio_begin = random_state.randint(audio_length - required_length + 1)
    audio_end = required_length + audio_begin
    if isinstance(y, list):
        for i in range(len(y)):
            y[i] = y[i][..., audio_begin:audio_end]
    else:
        y = y[..., audio_begin:audio_end]
    return y

In [275]:
seed = 101
random_state = np.random.RandomState(seed)
required_length = 16384
samples_per_recording_second = 3
sample_rate = 16_000

In [276]:
import librosa, librosa.display
import soundfile as sf

In [277]:
#### cut -out training samples

In [278]:
files_list = get_file_list_from_dir(noisy_raw_recordings_dir)
for file_name in files_list:
    noisy_wave, sr = sf.read(noisy_raw_recordings_dir + file_name,  dtype="float32")
    clean_wave, sr = sf.read(clean_raw_recordings_dir + file_name,  dtype="float32")
    
    assert sr == sample_rate
        
    assert len(noisy_wave) == len(clean_wave)
    
    for sample_idx in range(0,samples_per_recording_second * floor(len(noisy_wave)/sample_rate)):
            
        noisy_sample, clean_sample = cut_padding([noisy_wave,clean_wave], required_length, random_state, deterministic=False)
        
        sf.write(noisy_preprocessed_samples_dir + file_name.split('.')[0] + "_" + \
                                 str(sample_idx) + ".wav", noisy_sample, sample_rate, subtype='FLOAT')
        sf.write(clean_preprocessed_samples_dir + file_name.split('.')[0] + "_" + \
                         str(sample_idx) + ".wav", clean_sample, sample_rate, subtype='FLOAT')


In [279]:
import scipy
scipy.io.wavfile.read("../datasets/preprocessed/clean/clnsp244_10.wav")

  scipy.io.wavfile.read("../datasets/preprocessed/clean/clnsp244_10.wav")


(16000,
 array([ 0.00079346, -0.01022339,  0.0090332 , ...,  0.00457764,
         0.00463867,  0.00482178], dtype=float32))

## train-test split

In [280]:
training_dataset_directory = '../datasets/'


In [281]:
from math import floor

def get_training_and_testing_sets(file_list):
    split = 0.7
    split_index = floor(len(file_list) * split)
    training = file_list[:split_index]
    testing = file_list[split_index:]
    return training, testing

In [282]:
files_list = get_file_list_from_dir(noisy_preprocessed_samples_dir)


In [283]:
training, testing = get_training_and_testing_sets(files_list)

In [284]:
for case in ['train', 'test']:
    for xy in ['clean', 'noisy']:
        dir_path = training_dataset_directory + case + "_" + xy
        os.makedirs(dir_path, exist_ok=True)              

In [285]:
for f in training:
    shutil.copy(noisy_preprocessed_samples_dir + f, training_dataset_directory + 'train_noisy')
    shutil.copy(clean_preprocessed_samples_dir + f, training_dataset_directory + 'train_clean')
    

In [286]:
for f in testing:
    shutil.copy(noisy_preprocessed_samples_dir + f, training_dataset_directory + 'test_noisy')
    shutil.copy(clean_preprocessed_samples_dir + f, training_dataset_directory + 'test_clean')

### testing data generator module

In [244]:
import sys
sys.path.insert(0,'..')

In [245]:
import model_data

In [246]:
train_noisy_path = '../datasets/train_noisy/'
train_clean_path = '../datasets/train_clean/'
batch_size = 64

In [247]:
train_arguments = {"inputs_ids" : os.listdir(train_noisy_path), 
                    "outputs_ids" : os.listdir(train_clean_path),
                    "inputs_dir" : train_noisy_path, 
                    "outputs_dir" : train_clean_path,
                    "batch_size" : batch_size}

In [248]:
train_generator = model_data.datagenerator(**train_arguments)

In [249]:
train_generator.indexes

array([12433,  2796,  1352, ...,  3851,  1953, 11318])

In [250]:
for index, (train_noisy_speech, train_clean_speech) in enumerate(train_generator):
    
    break


In [251]:
train_noisy_speech

array([[[ 1395],
        [ 1387],
        [ 1328],
        ...,
        [ -212],
        [ -323],
        [ -373]],

       [[   26],
        [    2],
        [   -3],
        ...,
        [ 1290],
        [ 1468],
        [ 1499]],

       [[   95],
        [  112],
        [ -229],
        ...,
        [  464],
        [  105],
        [  336]],

       ...,

       [[  753],
        [  857],
        [  710],
        ...,
        [   71],
        [ 1039],
        [ -206]],

       [[ 1783],
        [ 1861],
        [ 1881],
        ...,
        [-3100],
        [-3864],
        [-4543]],

       [[ -486],
        [ -182],
        [ -117],
        ...,
        [ -425],
        [-1263],
        [-1487]]], dtype=int16)

In [186]:
self = train_generator

In [187]:
index = 0

In [188]:
indexes = self.indexes[index * self.batch_size: (index + 1) * self.batch_size]


In [189]:
indexes

array([11927,  4726,  1582,  4723,  6875,  1241, 11502,  4009, 13296,
       10718,  7826, 11918,  9261,  5333, 12557,  1427,  8381,  5206,
        5413,  7640,  4040, 12924,  6295, 10386,  9805,  4601,  8201,
        2535, 13150,  8779,   361, 11524, 13605,  5467,  3515,  8378,
        2340,  4742,  8084,  6673,  7496, 12773,  4079,  5751,  4088,
        6272, 11019, 10042,  2167,  7547,  6125,  3002,   931,  9359,
       12076, 12376,  2560, 13298,  9202,  8794,  4886, 12801, 12506,
        8744])

In [190]:
inputs_batch_ids  = [self.inputs_ids[k] for k in indexes]
outputs_batch_ids = [self.outputs_ids[k] for k in indexes]

In [191]:
inputs_batch_ids

['clnsp3_28.wav',
 'clnsp211_24.wav',
 'clnsp500_28.wav',
 'clnsp166_16.wav',
 'clnsp174_32.wav',
 'clnsp485_22.wav',
 'clnsp125_5.wav',
 'clnsp86_15.wav',
 'clnsp466_11.wav',
 'clnsp469_20.wav',
 'clnsp587_4.wav',
 'clnsp503_19.wav',
 'clnsp173_7.wav',
 'clnsp280_24.wav',
 'clnsp431_14.wav',
 'clnsp58_8.wav',
 'clnsp118_24.wav',
 'clnsp153_26.wav',
 'clnsp110_1.wav',
 'clnsp585_3.wav',
 'clnsp299_28.wav',
 'clnsp327_16.wav',
 'clnsp17_18.wav',
 'clnsp239_14.wav',
 'clnsp380_31.wav',
 'clnsp338_20.wav',
 'clnsp42_3.wav',
 'clnsp346_7.wav',
 'clnsp362_20.wav',
 'clnsp490_32.wav',
 'clnsp71_3.wav',
 'clnsp233_8.wav',
 'clnsp256_32.wav',
 'clnsp7_15.wav',
 'clnsp45_10.wav',
 'clnsp158_4.wav',
 'clnsp363_5.wav',
 'clnsp113_22.wav',
 'clnsp308_17.wav',
 'clnsp364_23.wav',
 'clnsp556_14.wav',
 'clnsp571_8.wav',
 'clnsp288_4.wav',
 'clnsp338_21.wav',
 'clnsp481_23.wav',
 'clnsp273_4.wav',
 'clnsp413_29.wav',
 'clnsp208_16.wav',
 'clnsp126_18.wav',
 'clnsp236_8.wav',
 'clnsp200_4.wav',
 'clnsp

In [192]:
outputs_batch_ids

['clnsp3_28.wav',
 'clnsp211_24.wav',
 'clnsp500_28.wav',
 'clnsp166_16.wav',
 'clnsp174_32.wav',
 'clnsp485_22.wav',
 'clnsp125_5.wav',
 'clnsp86_15.wav',
 'clnsp466_11.wav',
 'clnsp469_20.wav',
 'clnsp587_4.wav',
 'clnsp503_19.wav',
 'clnsp173_7.wav',
 'clnsp280_24.wav',
 'clnsp431_14.wav',
 'clnsp58_8.wav',
 'clnsp118_24.wav',
 'clnsp153_26.wav',
 'clnsp110_1.wav',
 'clnsp585_3.wav',
 'clnsp299_28.wav',
 'clnsp327_16.wav',
 'clnsp17_18.wav',
 'clnsp239_14.wav',
 'clnsp380_31.wav',
 'clnsp338_20.wav',
 'clnsp42_3.wav',
 'clnsp346_7.wav',
 'clnsp362_20.wav',
 'clnsp490_32.wav',
 'clnsp71_3.wav',
 'clnsp233_8.wav',
 'clnsp256_32.wav',
 'clnsp7_15.wav',
 'clnsp45_10.wav',
 'clnsp158_4.wav',
 'clnsp363_5.wav',
 'clnsp113_22.wav',
 'clnsp308_17.wav',
 'clnsp364_23.wav',
 'clnsp556_14.wav',
 'clnsp571_8.wav',
 'clnsp288_4.wav',
 'clnsp338_21.wav',
 'clnsp481_23.wav',
 'clnsp273_4.wav',
 'clnsp413_29.wav',
 'clnsp208_16.wav',
 'clnsp126_18.wav',
 'clnsp236_8.wav',
 'clnsp200_4.wav',
 'clnsp

In [114]:
import natsort
inputs_batch_ids  = natsort.natsorted(inputs_batch_ids, reverse = False)
outputs_batch_ids = natsort.natsorted(outputs_batch_ids, reverse = False)


In [115]:
inputs_batch_ids

['clnsp4.wav',
 'clnsp26.wav',
 'clnsp27.wav',
 'clnsp34.wav',
 'clnsp53.wav',
 'clnsp69.wav',
 'clnsp73.wav',
 'clnsp74.wav',
 'clnsp75.wav',
 'clnsp80.wav',
 'clnsp81.wav',
 'clnsp89.wav',
 'clnsp107.wav',
 'clnsp123.wav',
 'clnsp129.wav',
 'clnsp138.wav',
 'clnsp159.wav',
 'clnsp164.wav',
 'clnsp166.wav',
 'clnsp180.wav',
 'clnsp183.wav',
 'clnsp197.wav',
 'clnsp201.wav',
 'clnsp203.wav',
 'clnsp225.wav',
 'clnsp226.wav',
 'clnsp263.wav',
 'clnsp267.wav',
 'clnsp271.wav',
 'clnsp277.wav',
 'clnsp290.wav',
 'clnsp299.wav',
 'clnsp336.wav',
 'clnsp340.wav',
 'clnsp345.wav',
 'clnsp356.wav',
 'clnsp360.wav',
 'clnsp364.wav',
 'clnsp366.wav',
 'clnsp368.wav',
 'clnsp376.wav',
 'clnsp380.wav',
 'clnsp388.wav',
 'clnsp392.wav',
 'clnsp393.wav',
 'clnsp395.wav',
 'clnsp401.wav',
 'clnsp416.wav',
 'clnsp435.wav',
 'clnsp440.wav',
 'clnsp441.wav',
 'clnsp445.wav',
 'clnsp449.wav',
 'clnsp452.wav',
 'clnsp465.wav',
 'clnsp493.wav',
 'clnsp500.wav',
 'clnsp515.wav',
 'clnsp522.wav',
 'clnsp548

In [116]:
inputs_list = list()
output_list = list()

In [117]:
import scipy

In [118]:
inputs

array([-115, -134, -120, ...,  -10,  -18,  -39], dtype=int16)

In [120]:
# inputs_path  = os.path.join(self.inputs_dir + inputs)


In [121]:
scipy.io.wavfile.read(inputs_path)

(48000, array([-115, -134, -120, ...,  -10,  -18,  -39], dtype=int16))

In [79]:
import numpy as np
for inputs, outputs in zip(inputs_batch_ids, outputs_batch_ids):

    x, y = self.__data_generation__(inputs, outputs)
    print("shape x: ", x.shape)
    # x = tf.zeros([16384] - tf.shape(x), dtype=tf.float32)
    # y = tf.zeros([16384] - tf.shape(y), dtype=tf.float32)

    x = np.reshape(x, (16384, 1))
    y = np.reshape(y, (16384, 1))

    inputs_list.append(x)
    output_list.append(y)

shape x:  (175211,)


ValueError: cannot reshape array of size 175211 into shape (16384,1)

In [80]:
x

array([128, 211, 201, ..., 267, 170, 541], dtype=int16)

In [88]:
inputs_path = '/home/jerzy/filler-words-rm/data/clean_trainset_28spk_wav/p226_001.wav'

In [92]:
 _, inputs  = scipy.io.wavfile.read(inputs_path,)

In [93]:
 _, inputs 

(48000, array([-115, -134, -120, ...,  -10,  -18,  -39], dtype=int16))

In [95]:
inputs.shape

(109440,)

In [98]:
inputs.shape[0]/3

36480.0

In [90]:
# return sampling_rate, sound (smapling_rate = 16000)
_, x  = scipy.io.wavfile.read(inputs_path)
_, y = scipy.io.wavfile.read(inputs_path)

In [91]:
# x, y = self.__data_generation__(inputs, outputs)
print("shape x: ", x.shape)
# x = tf.zeros([16384] - tf.shape(x), dtype=tf.float32)
# y = tf.zeros([16384] - tf.shape(y), dtype=tf.float32)

x = np.reshape(x, (16384, 1))
y = np.reshape(y, (16384, 1))

inputs_list.append(x)
output_list.append(y)

shape x:  (109440,)


ValueError: cannot reshape array of size 109440 into shape (16384,1)

In [195]:
import tensorflow as tf


In [198]:
x = tf.constant([1.8, 2.2], dtype=tf.float32)
x = tf.cast(x, tf.int32)

In [199]:
x

<tf.Tensor: shape=(2,), dtype=int32, numpy=array([1, 2], dtype=int32)>