In [1]:
import os
import numpy as np
import pandas as pd
from scipy.io import wavfile
from keras.utils.np_utils import to_categorical
from tqdm import tqdm
from utils import which_set, BASE_TRAIN_FOLDER_SPEC, BASE_TRAIN_FOLDER_WAV, labels_to_ints

Using TensorFlow backend.


In [6]:
sets = {
    "train": {
        "x": [],
        "y": [],
    },
    "validation": {
        "x": [],
        "y": [],
    },
    "testing": {
        "x": [],
        "y": [],
    }
}

In [7]:
RESAMPLE_RATE = 8000

In [8]:
for label in os.listdir(BASE_TRAIN_FOLDER_WAV):
    label_folder_wav = os.path.join(BASE_TRAIN_FOLDER_WAV, label)
    if not os.path.isdir(label_folder_wav):
        continue
    if label.startswith('_'):
        continue

    label_files = [
        filename for filename in os.listdir(label_folder_wav) 
        if filename.endswith('.wav')
    ]
    for i, filename in tqdm(enumerate(label_files), desc=f"{label}"):
        file_path = os.path.join(label_folder_wav, filename)
        sample_rate, samples = wav2arr1s(file_path, sample_rate=RESAMPLE_RATE)
        samples = samples.astype(np.float16)
        set_name = which_set(filename, 10, 10)
        
        sets[set_name]["x"].append(samples)
        sets[set_name]["y"].append(labels_to_ints[label])

right: 2367it [00:05, 396.88it/s]
eight: 2352it [00:07, 319.35it/s]
cat: 1733it [00:05, 300.07it/s]
tree: 1733it [00:08, 202.98it/s]
bed: 1713it [00:11, 146.83it/s]
happy: 1742it [00:11, 158.19it/s]
go: 2372it [00:15, 149.31it/s]
silence: 402it [00:01, 226.59it/s]
dog: 1746it [00:11, 150.68it/s]
no: 2375it [00:16, 143.39it/s]
wow: 1745it [00:11, 146.29it/s]
nine: 2364it [00:13, 170.33it/s]
left: 2353it [00:13, 174.93it/s]
stop: 2380it [00:16, 143.85it/s]
three: 2356it [00:16, 144.94it/s]
sheila: 1734it [00:12, 141.58it/s]
one: 2370it [00:17, 138.16it/s]
bird: 1731it [00:11, 133.68it/s]
zero: 2376it [00:14, 166.18it/s]
seven: 2377it [00:15, 155.52it/s]
up: 2375it [00:18, 130.58it/s]
marvin: 1746it [00:12, 142.56it/s]
two: 2373it [00:16, 141.60it/s]
house: 1750it [00:12, 138.72it/s]
down: 2359it [00:16, 141.31it/s]
six: 2369it [00:15, 157.82it/s]
yes: 2377it [00:14, 165.79it/s]
on: 2367it [00:15, 156.37it/s]
five: 2357it [00:14, 160.38it/s]
off: 2357it [00:15, 149.34it/s]
four: 2372it [0

In [9]:
np.array(sets["train"]["x"]).shape, np.array(sets["train"]["y"]).shape

((51490, 8000), (51490,))

In [8]:
COMPILED_FOLDER = "compiled-wav"

In [11]:
os.makedirs(COMPILED_FOLDER, exist_ok=True)
for set_name in ["train", "validation", "testing"]:
    sets[set_name]["x"] = np.array(sets[set_name]["x"])
    sets[set_name]["y"] = to_categorical(sets[set_name]["y"], num_classes=len(labels_to_ints))
    np.save(os.path.join(COMPILED_FOLDER, f"x_{set_name}.npy"), sets[set_name]["x"])
    np.save(os.path.join(COMPILED_FOLDER, f"y_{set_name}.npy"), sets[set_name]["y"])