In [1]:
from IPython.display import Audio
import soundfile as sf
import sounddevice as sd
import librosa

import os
import glob
import random
import scipy.io

from database import Sample
import pickle

In [2]:
path = "raw_data/clean_speech"

In [3]:
# max_dur = 32.645
# min_dur = 1.445

t = 5
speech_data = []
speech_tags = []

for root, dirs, files in os.walk(path):
    if files:
        for file in files:
            if file.endswith(".flac"):
                filepath = root + "/" + file

                speech, sr = sf.read(filepath)
                n_samples = t * sr

                dur = len(speech) / sr
                if dur > t:
                    speech_data.append(speech[:n_samples])
                    speech_tags.append(file.removesuffix(".flac"))

print(f"There are {len(speech_data)} clean speech files longer than {t} seconds.")


There are 1585 clean speech files longer than 5 seconds.


In [10]:
# play with sounddevice
sd.play(speech_data[2], 16000)

ALSA lib pcm.c:8306:(snd_pcm_recover) underrun occurred
ALSA lib pcm.c:8306:(snd_pcm_recover) underrun occurred


In [53]:
# play with IPython
Audio(speech_data[0], rate=16000)

In [4]:
noise_path = "raw_data/noise"

t = 5
noise_data = []
noise_tags = []

for root, dirs, files in os.walk(noise_path):

    if files:
        for file in files:
            if file.endswith(".wav"):
                filepath = root + "/" + file
                dur = librosa.get_duration(filename=filepath)
                n_samples = t * 48000

                if dur >= t:
                    init_sample = random.randint(0, int(dur*48000)-n_samples)
                    noise, sr = sf.read(filepath, start=init_sample, frames=t*48000)

                    noise = noise[::3]
                    noise_data.append(noise[:, 0])
                    noise_tags.append(file.removesuffix(".wav"))

print(f"There are {len(noise_data)} noise files longer than {t} seconds.")

There are 16 noise files longer than 5 seconds.


In [5]:
for speech_tag, speech in zip(speech_tags, speech_data):
    for noise_tag, noise in zip(noise_tags, noise_data):

        name = speech_tag + "-" + noise_tag
        sample_obj = Sample(speech, noise, name)

        pickle.dump(sample_obj, open("data/" + name + ".pkl", "wb"))
        print(f"{name} is dumped")

        del sample_obj


3536-23268-0024-REVERB-POOL-2 is dumped
3536-23268-0024-REVERB-POOL-1 is dumped
3536-23268-0024-REVERB-CARPARK-2 is dumped
3536-23268-0024-REVERB-CARPARK-1 is dumped
3536-23268-0024-STREET-CITY-1 is dumped
3536-23268-0024-STREET-CITY-2 is dumped
3536-23268-0024-STREET-KG-1 is dumped
3536-23268-0024-STREET-KG-2 is dumped
3536-23268-0024-HOME-LIVINGB-2 is dumped
3536-23268-0024-HOME-LIVINGB-1 is dumped
3536-23268-0024-HOME-KITCHEN-2 is dumped
3536-23268-0024-HOME-KITCHEN-1 is dumped
3536-23268-0024-CAR-WINUPB-2 is dumped
3536-23268-0024-CAR-WINDOWNB-2 is dumped
3536-23268-0024-CAR-WINDOWNB-1 is dumped
3536-23268-0024-CAR-WINUPB-1 is dumped
3536-23268-0016-REVERB-POOL-2 is dumped
3536-23268-0016-REVERB-POOL-1 is dumped
3536-23268-0016-REVERB-CARPARK-2 is dumped
3536-23268-0016-REVERB-CARPARK-1 is dumped
3536-23268-0016-STREET-CITY-1 is dumped
3536-23268-0016-STREET-CITY-2 is dumped
3536-23268-0016-STREET-KG-1 is dumped
3536-23268-0016-STREET-KG-2 is dumped
3536-23268-0016-HOME-LIVINGB-2 i

In [30]:
pickle_files = glob.glob("data/*.pkl")
pickle_files = pickle_files[:50]

for pickle_file in pickle_files:
    with open(pickle_file, "rb") as file:
        sample_obj = pickle.load(file)
        scipy.io.savemat("data/mat_data/" + sample_obj.name + ".mat", mdict={"speech": sample_obj.speech,
                                                                             "noise": sample_obj.noise,
                                                                             "name": sample_obj.name})

        print(f"{sample_obj.name} saved")

1993-147966-0001-HOME-LIVINGB-1 saved
3170-137482-0035-HOME-KITCHEN-2 saved
5536-43358-0018-CAR-WINUPB-1 saved
5895-34615-0014-REVERB-POOL-1 saved
1272-135031-0024-HOME-KITCHEN-1 saved
6345-93302-0005-STREET-KG-1 saved
5694-64038-0017-CAR-WINDOWNB-1 saved
84-121123-0025-CAR-WINUPB-1 saved
5694-64029-0016-REVERB-POOL-2 saved
1988-148538-0008-REVERB-CARPARK-2 saved
1462-170138-0023-REVERB-CARPARK-2 saved
3170-137482-0030-HOME-LIVINGB-2 saved
3576-138058-0018-STREET-KG-2 saved
251-136532-0011-CAR-WINDOWNB-2 saved
7850-111771-0007-CAR-WINUPB-1 saved
2277-149897-0002-CAR-WINUPB-2 saved
3536-8226-0004-HOME-LIVINGB-2 saved
174-50561-0001-HOME-KITCHEN-1 saved
3170-137482-0043-STREET-KG-1 saved
3536-23268-0023-HOME-KITCHEN-1 saved
6319-275224-0019-HOME-KITCHEN-1 saved
6313-66125-0022-HOME-LIVINGB-1 saved
2078-142845-0015-STREET-CITY-1 saved
8842-302196-0002-CAR-WINDOWNB-1 saved
1272-141231-0027-HOME-KITCHEN-2 saved
2035-147961-0011-CAR-WINUPB-2 saved
84-121550-0030-HOME-LIVINGB-2 saved
2428-837

In [8]:
with open("data/84-121123-0002-STREET-CITY-1.pkl", "rb") as file:
    sample_obj = pickle.load(file)

In [13]:
sd.play(sample_obj.noise, 16000)

ALSA lib pcm.c:8306:(snd_pcm_recover) underrun occurred
ALSA lib pcm.c:8306:(snd_pcm_recover) underrun occurred


In [None]:
from database import DataBase

db = DataBase(5)
# del db

In [None]:
for sample in db.samples:
    print(sample.name)

In [10]:
from database import DataBase

db = DataBase(5)
# del db

5 pickle files are loaded.


In [16]:
for sample in db.samples:
    print(sample.name)

1993-147966-0001-HOME-LIVINGB-1
3170-137482-0035-HOME-KITCHEN-2
5536-43358-0018-CAR-WINUPB-1
5895-34615-0014-REVERB-POOL-1
1272-135031-0024-HOME-KITCHEN-1
