This creates a long file with clip FT magnitudes transposed and concatenated into a long series of samples

It's designed for a process that just grabs a window instead of using a prepared frame (since that creates massive data duplication)

In [1]:
import numpy as np
import librosa
import librosa.display
from IPython.display import Audio, display
import numpy as np

import os
import time
import cmath

In [2]:
# Get shared constants and functions
%run "NN Audio Core.py"

In [3]:
# Number of all zero samples between clips
PAD_SIZE = 50

In [6]:
def frames_file(data_path, max_samples):
    frames_file = data_path + "\\fsamples-S" + str(SAMPLE_BINS) + "-" + str(max_samples)
    filename = os.fsdecode(frames_file)
    return filename

def targets_file(data_path, max_samples):
    targets_file = data_path + "\\ftargets-S" + str(SAMPLE_BINS) + "-" + str(max_samples)
    filename = os.fsdecode(targets_file)
    return filename

# Iterate over clean & noisy folders to create frames and targets
# Updated to select clips based on difference between clean & noisy versions
def create_data(wav_root, data_path, max_samples = 10000, min_diff = 70, max_diff=300, mmap=True):
    clean_dir = wav_root + "\\Clean\\"
    noisy_dir = wav_root + "\\Noisy\\"
    sample_index = 0
    if (mmap):
        frames = np.memmap(frames_file(data_path, max_samples), mode='w+', dtype=DTYPE, shape=(max_samples,SAMPLE_BINS))
        targets = np.memmap(targets_file(data_path, max_samples), mode='w+', dtype=DTYPE, shape=(max_samples,SAMPLE_BINS))
    else:
        frames = np.empty(dtype=DTYPE, shape=(max_samples,SAMPLE_BINS))
        targets = np.empty(dtype=DTYPE, shape=(max_samples,SAMPLE_BINS))
    file_list = os.listdir(clean_dir)
    file_index = 0
    while (sample_index < max_samples) and (file_index < len(file_list)) :
        file = file_list[file_index]
        filename = os.fsdecode(file)
        file_index += 1
        noisy_ft = get_ft_from_file(noisy_dir + file)
        clean_ft = get_ft_from_file(clean_dir + file)
        diff = diff_ft(noisy_ft, clean_ft)
        if (diff > min_diff) and (diff < max_diff):
            new_frames = get_samples(noisy_dir + file)
            max_step = min(new_frames.shape[0], max_samples-sample_index)
            frames[sample_index:sample_index+max_step,:] = new_frames[:max_step,:]
            new_targets = get_samples(clean_dir + file)
            targets[sample_index:sample_index+max_step,:] = new_targets[:max_step,:]
            sample_index += new_targets.shape[0]
            if (PAD_SIZE > 0) and (sample_index < max_samples):
                pad_width = min(PAD_SIZE, max_samples-sample_index)
                pad = np.zeros((pad_width, SAMPLE_BINS))
                frames[sample_index:sample_index+pad_width,:] = pad[:pad_width]
                targets[sample_index:sample_index+pad_width,:] = pad[:pad_width]
                sample_index += pad_width
    print("Reached sample # " + str(min(sample_index, max_samples)))
    return frames, targets

In [9]:
# small test data "Assets\\DataShareArchive\\Test"
# 28K "F:\\Audiodata\\Train28Spk"
f, t = create_data("F:\\Audiodata\\Train28Spk", "f:\\Audiodata", max_samples=10000, min_diff=30, max_diff=250, mmap=True)

Reached sample # 10000


In [30]:
# To release the files if we need to
del f, t