In [None]:
import os
import glob
import librosa
import random
import numpy as np
import librosa.display
import matplotlib.pyplot as plt
import time
from pydub import AudioSegment
from scipy.special import comb
%matplotlib inline


# directories
work_dir = os.getcwd()
source_dir = os.path.join(work_dir, "audio_data")
sample_dir = os.path.join(work_dir, "samples")
fdomain_dir = os.path.join(work_dir, "fdomain")

if not os.path.exists(sample_dir):
    os.mkdir(sample_dir)

if not os.path.exists(fdomain_dir):
    os.mkdir(fdomain_dir)


In [None]:
def LoadAudioFile(wave_names, sounds):            
    files = glob.glob(os.path.join(source_dir, "*.wav"))
    files.sort()
    max_length = 0
    for f in files:
        sound = AudioSegment.from_file(f, format="wav")
        sounds.append(sound)
        name = os.path.basename(f)
        wave_names.append(name[:name.index('.')])
        print("wave: %s loaded %d samples" % (name, len(sound)))
        max_length = max(max_length, len(sound))
    return max_length

def GenerateRandomMixedSamples(wave_names_in, sounds_in, num, length = 6000):    
    # generate wave sample
    files = glob.glob(os.path.join(sample_dir, "*.wav"))
    for f in files:
        os.remove(f)

    assert len(wave_names_in) == len(sounds_in)
    src_wav_cnt = len(wave_names_in)
            
    #exlode mixing comnbinatin
    total_combi = 2 ** src_wav_cnt
    for i in range(1, total_combi):
        for j in range(num):
            name = None
            mix = AudioSegment.silent(duration=length * 2)
            max_start = 0
            for k in range(src_wav_cnt):
                if i & (1 << k) != 0:
                    start = random.randrange(0, len(sounds_in[k]))
                    max_start = max(max_start, start)
                    mix = mix.overlay(sounds_in[k], position=start, loop=True)
                    if name == None:
                        name = wave_names[k]
                    else:
                        name += ("-" + wave_names[k])
            # get the last of the samples
            mix = mix[-length:]
            gain = random.randrange(-10.0, 10.0)
            mix = mix.apply_gain(gain)
            mix.export(os.path.join(sample_dir, name) + "_" + str(j) + ".wav", format="wav")
    return

def GenerateFrequencyDomainSample(source_dir, dest_dir):
    # clean current sample
    files = glob.glob(os.path.join(dest_dir, "*.npy"))
    for f in files:
        os.remove(f)

    start = time.clock()
    files = glob.glob(os.path.join(source_dir, "*.wav"))
    #plt.figure(figsize=(40,60))
    files.sort()
    file_cnt = len(files)
    for i in range(file_cnt):
        file_name = files[i]
        #y, sr = librosa.load(file_name)
        #plt.subplot(file_cnt, 1, i + 1)
        #librosa.display.waveplot(y)
        D = librosa.stft(y)
        fdomain_file_name = os.path.join(dest_dir, os.path.splitext(os.path.basename(file_name))[0])
        np.save(fdomain_file_name, D)
        print(fdomain_file_name, D.shape)
        #log_power = librosa.logamplitude(D**2, ref_power=np.max)
        #librosa.display.specshow(log_power, x_axis='time', y_axis='log')
        #np.save(os.path.join(tft_dir, str(i)), D)    
        #plt.title(file_name)  
    #plt.show()
    print("time elapse: %d", time.clock() - start)
    
# not used
def GenerateSample(wave_names_in, sounds_in, num_samples):
    assert len(wave_names_in) == len(sounds_in)
    samples_cnt_in = len(wave_names_in)
    for i in range(samples_cnt_in):
        s = sounds_in[i]
        audio_len = len(s)
        repeat_audio = s + s
        for j in range(num_samples):
            start = random.randrange(0, audio_len)
            end = start + audio_len
            new_name = os.path.join(sample_dir, wave_names_in[i] + "-" + str(j) + ".wav")
            #print("new_name: %s(%d), start: %d, end: %d" % (os.path.basename(new_name), len(repeat_audio), start, end))
            sj = repeat_audio[start:end]
            sj.export(new_name, format="wav")
    return

#not used
def ExplodeCombination(wave_names_in, sounds_in):
    wave_names_out = []
    samples_out = []
    total_combi = 2 ** len(wave_names)
    for i in range(1, total_combi):
        name = None
        mix = AudioSegment.silent(duration=max_length)
        for j in range(len(wave_names)):
            if i & (1 << j) != 0:
                mix = mix.overlay(sounds[j])
                if name == None:
                    name = wave_names[j]
                else:
                    name += ("-" + wave_names[j])
        assert name != None
        #rint(name, mix, len(mix))
        wave_names_out.append(name)
        samples_out.append(mix)
        #mix.export(os.path.join(output_dir, name) + ".wav", format="wav")
    return wave_names_out, samples_out


In [None]:
random.seed()

#wave names and sound files
wave_names = []
sounds = []
max_length = 0
max_length = LoadAudioFile(wave_names, sounds)
print("max_length=%d" % (max_length))
GenerateRandomMixedSamples(wave_names, sounds, 2)

#generate frequency domain sample
GenerateFrequencyDomainSample(sample_dir, fdomain_dir)

In [None]:
# load and plot and check if they match
import time
import IPython

start = time.clock()
files = glob.glob(os.path.join(fdomain_dir, "*.npy"))
plt.figure(figsize=(40,60))
files.sort()
file_cnt = len(files)
for i in range(file_cnt):
    file_name = files[i]
    D_loaded = np.load(file_name)
    log_power_loaded = librosa.logamplitude(D_loaded**2, ref_power=np.max)
    plt.subplot(file_cnt, 1, i + 1)
    subfig_idx += 1
    librosa.display.specshow(log_power_loaded, x_axis="time", y_axis='log')
plt.show()
print("time elapse: %d", time.clock() - start)