# Prepare audio data for image recognition

The data is pretty good, but there's a few samples that aren't exactly 1 second long and some samples that are either truncated or don't contain very much of the word.

The code in the notebook attempts to filter out the broken audio so that we are only using good audio.

We then generate spectrograms of each word. We mix in background noise with the words to make it a more realistic audio sample.

## Download data set
Download from: https://storage.cloud.google.com/download.tensorflow.org/data/speech_commands_v0.02.tar.gz - approx 2.3 GB

And then run
```
tar -xzf data_speech_commands_v0.02.tar.gz -C speech_data
```

In [1]:
import tensorflow as tf
import numpy as np
from tensorflow.io import gfile
import tensorflow_io as tfio
from tensorflow.python.ops import gen_audio_ops as audio_ops
from tqdm.notebook import tqdm
import matplotlib.pyplot as plt

In [2]:
SPEECH_DATA='speech_data'

In [3]:
# The audio is all sampled at 16KHz and should all be 1 second in length - so 1 second is 16000 samples
EXPECTED_SAMPLES=16000
# Noise floor to detect if any audio is present
NOISE_FLOOR=0.1
# How many samples should be abover the noise floor?
MINIMUM_VOICE_LENGTH=EXPECTED_SAMPLES/4

In [4]:
# list of folders we want to process in the speech_data folder
from tensorflow.python.ops import gen_audio_ops as audio_ops
words = [
    'backward',
    'bed',
    'bird',
    'cat',
    'dog',
    'down',
    'eight',
    'five',
    'follow',
    'forward',
    'four',
    'friday',
    'go',
    'happy',
    'house',
    'learn',
    'left',
    'marvin',
    'nine',
    'no',
    'off',
    'on',
    'one',
    'right',
    'seven',
    'sheila',
    'six',
    'stop',
    'three',
    'tree',
    'two',
    'up',
    'visual',
    'wow',
    'yes',
    'zero',
    '_background',
]

In [5]:
# get all the files in a directory
def get_files(word):
    return gfile.glob(SPEECH_DATA + '/'+word+'/*.wav')

# get the location of the voice
def get_voice_position(audio, noise_floor):
    audio = audio - np.mean(audio)
    audio = audio / np.max(np.abs(audio))
    return tfio.audio.trim(audio, axis=0, epsilon=noise_floor)

# Work out how much of the audio file is actually voice
def get_voice_length(audio, noise_floor):
    position = get_voice_position(audio, noise_floor)
    return (position[1] - position[0]).numpy()

# is enough voice present?
def is_voice_present(audio, noise_floor, required_length):
    voice_length = get_voice_length(audio, noise_floor)
    return voice_length >= required_length

# is the audio the correct length?
def is_correct_length(audio, expected_length):
    return (audio.shape[0]==expected_length).numpy()


def is_valid_file(file_name):
    # load the audio file
    audio_tensor = tfio.audio.AudioIOTensor(file_name)
    # check the file is long enough
    if not is_correct_length(audio_tensor, EXPECTED_SAMPLES):
        return False
    # convert the audio to an array of floats and scale it to betweem -1 and 1
    audio = tf.cast(audio_tensor[:], tf.float32)
    audio = audio - np.mean(audio)
    audio = audio / np.max(np.abs(audio))
    # is there any voice in the audio?
    if not is_voice_present(audio, NOISE_FLOOR, MINIMUM_VOICE_LENGTH):
        return False
    return True


In [6]:
def get_spectrogram(audio):
    # normalise the audio
    audio = audio - np.mean(audio)
    audio = audio / np.max(np.abs(audio))
    # create the spectrogram
    spectrogram = audio_ops.audio_spectrogram(audio,
                                              window_size=320,
                                              stride=160,
                                              magnitude_squared=True).numpy()
    # reduce the number of frequency bins in our spectrogram to a more sensible level
    spectrogram = tf.nn.pool(
        input=tf.expand_dims(spectrogram, -1),
        window_shape=[1, 6],
        strides=[1, 6],
        pooling_type='AVG',
        padding='SAME')
    spectrogram = tf.squeeze(spectrogram, axis=0)
    spectrogram = np.log10(spectrogram + 1e-6)
    return spectrogram

In [7]:
# process a file into its spectrogram
def process_file(file_path):
    # load the audio file
    audio_tensor = tfio.audio.AudioIOTensor(file_path)
    # convert the audio to an array of floats and scale it to betweem -1 and 1
    audio = tf.cast(audio_tensor[:], tf.float32)
    audio = audio - np.mean(audio)
    audio = audio / np.max(np.abs(audio))
    # randomly reposition the audio in the sample
    voice_start, voice_end = get_voice_position(audio, NOISE_FLOOR)
    end_gap=len(audio) - voice_end
    random_offset = np.random.uniform(0, voice_start+end_gap)
    audio = np.roll(audio,-random_offset+end_gap)
    # add some random background noise
    background_volume = np.random.uniform(0, 0.1)
    # get the background noise files
    background_files = get_files('_background_noise_')
    background_file = np.random.choice(background_files)
    background_tensor = tfio.audio.AudioIOTensor(background_file)
    background_start = np.random.randint(0, len(background_tensor) - EXPECTED_SAMPLES)
    # normalise the background noise
    background = tf.cast(background_tensor[background_start:background_start + EXPECTED_SAMPLES], tf.float32)
    background = background - np.mean(background)
    background = background / np.max(np.abs(background))
    # mix the audio with the scaled background  
    audio = audio + background_volume * background
    # get the spectrogram
    return get_spectrogram(audio)

In [8]:
train = []
validate = []
test = []

TRAIN_SIZE=0.8
VALIDATION_SIZE=0.1
TEST_SIZE=0.1

In [9]:
file_names = [file_name for file_name in tqdm(get_files("friday"), desc="Checking", leave=False) if is_valid_file(file_name)]
print("Valid Friday Samples: ")
len(file_names)

Checking:   0%|          | 0/728 [00:00<?, ?it/s]

Valid Friday Samples: 


718

In [10]:
def process_files(file_names, label, repeat=1):
    file_names = tf.repeat(file_names, repeat).numpy()
    return [(process_file(file_name), label) for file_name in tqdm(file_names, desc=f"{word} ({label})", leave=False)]

# process the files for a word into the spectrogram and one hot encoding word value
def process_word(word, repeat=1):
    # the index of the word word we are processing
    label = words.index(word)
    label
    # get a list of files names for the word
    file_names = [file_name for file_name in tqdm(get_files(word), desc="Checking", leave=False) if is_valid_file(file_name)]
    # randomly shuffle the filenames
    np.random.shuffle(file_names)
    # split the files into train, validate and test buckets
    train_size=int(TRAIN_SIZE*len(file_names))
    validation_size=int(VALIDATION_SIZE*len(file_names))
    test_size=int(TEST_SIZE*len(file_names))
    # get the training samples
    train.extend(
        process_files(
            file_names[:train_size],
            label,
            repeat=repeat
        )
    )
    # and the validation samples
    validate.extend(
        process_files(
            file_names[train_size:train_size+validation_size],
            label,
            repeat=repeat
        )
    )
    # and the test samples
    test.extend(
        process_files(
            file_names[train_size+validation_size:],
            label,
            repeat=repeat
        )
    )

# process all the words and all the files
for word in tqdm(words, desc="Processing words"):
    if '_' not in word:
        # add more examples of friday to balance our training set
        repeat = 190 if word == 'friday' else 1
        process_word(word, repeat=repeat)
    
print(len(train), len(test), len(validate))

Processing words:   0%|          | 0/37 [00:00<?, ?it/s]

Checking:   0%|          | 0/1664 [00:00<?, ?it/s]

backward (0):   0%|          | 0/1225 [00:00<?, ?it/s]

backward (0):   0%|          | 0/153 [00:00<?, ?it/s]

backward (0):   0%|          | 0/154 [00:00<?, ?it/s]

Checking:   0%|          | 0/2014 [00:00<?, ?it/s]

bed (1):   0%|          | 0/998 [00:00<?, ?it/s]

bed (1):   0%|          | 0/124 [00:00<?, ?it/s]

bed (1):   0%|          | 0/126 [00:00<?, ?it/s]

Checking:   0%|          | 0/2064 [00:00<?, ?it/s]

bird (2):   0%|          | 0/1293 [00:00<?, ?it/s]

bird (2):   0%|          | 0/161 [00:00<?, ?it/s]

bird (2):   0%|          | 0/163 [00:00<?, ?it/s]

Checking:   0%|          | 0/2031 [00:00<?, ?it/s]

cat (3):   0%|          | 0/921 [00:00<?, ?it/s]

cat (3):   0%|          | 0/115 [00:00<?, ?it/s]

cat (3):   0%|          | 0/116 [00:00<?, ?it/s]

Checking:   0%|          | 0/2128 [00:00<?, ?it/s]

dog (4):   0%|          | 0/1306 [00:00<?, ?it/s]

dog (4):   0%|          | 0/163 [00:00<?, ?it/s]

dog (4):   0%|          | 0/164 [00:00<?, ?it/s]

Checking:   0%|          | 0/3917 [00:00<?, ?it/s]

down (5):   0%|          | 0/2623 [00:00<?, ?it/s]

down (5):   0%|          | 0/327 [00:00<?, ?it/s]

down (5):   0%|          | 0/329 [00:00<?, ?it/s]

Checking:   0%|          | 0/3787 [00:00<?, ?it/s]

eight (6):   0%|          | 0/1858 [00:00<?, ?it/s]

eight (6):   0%|          | 0/232 [00:00<?, ?it/s]

eight (6):   0%|          | 0/233 [00:00<?, ?it/s]

Checking:   0%|          | 0/4052 [00:00<?, ?it/s]

five (7):   0%|          | 0/2376 [00:00<?, ?it/s]

five (7):   0%|          | 0/297 [00:00<?, ?it/s]

five (7):   0%|          | 0/297 [00:00<?, ?it/s]

Checking:   0%|          | 0/1579 [00:00<?, ?it/s]

follow (8):   0%|          | 0/1063 [00:00<?, ?it/s]

follow (8):   0%|          | 0/132 [00:00<?, ?it/s]

follow (8):   0%|          | 0/134 [00:00<?, ?it/s]

Checking:   0%|          | 0/1557 [00:00<?, ?it/s]

forward (9):   0%|          | 0/1113 [00:00<?, ?it/s]

forward (9):   0%|          | 0/139 [00:00<?, ?it/s]

forward (9):   0%|          | 0/140 [00:00<?, ?it/s]

Checking:   0%|          | 0/3728 [00:00<?, ?it/s]

four (10):   0%|          | 0/2250 [00:00<?, ?it/s]

four (10):   0%|          | 0/281 [00:00<?, ?it/s]

four (10):   0%|          | 0/282 [00:00<?, ?it/s]

Checking:   0%|          | 0/728 [00:00<?, ?it/s]

friday (11):   0%|          | 0/109060 [00:00<?, ?it/s]

friday (11):   0%|          | 0/13490 [00:00<?, ?it/s]

friday (11):   0%|          | 0/13870 [00:00<?, ?it/s]

Checking:   0%|          | 0/3880 [00:00<?, ?it/s]

go (12):   0%|          | 0/2157 [00:00<?, ?it/s]

go (12):   0%|          | 0/269 [00:00<?, ?it/s]

go (12):   0%|          | 0/271 [00:00<?, ?it/s]

Checking:   0%|          | 0/2054 [00:00<?, ?it/s]

happy (13):   0%|          | 0/1285 [00:00<?, ?it/s]

happy (13):   0%|          | 0/160 [00:00<?, ?it/s]

happy (13):   0%|          | 0/162 [00:00<?, ?it/s]

Checking:   0%|          | 0/2113 [00:00<?, ?it/s]

house (14):   0%|          | 0/1132 [00:00<?, ?it/s]

house (14):   0%|          | 0/141 [00:00<?, ?it/s]

house (14):   0%|          | 0/142 [00:00<?, ?it/s]

Checking:   0%|          | 0/1575 [00:00<?, ?it/s]

learn (15):   0%|          | 0/1096 [00:00<?, ?it/s]

learn (15):   0%|          | 0/137 [00:00<?, ?it/s]

learn (15):   0%|          | 0/137 [00:00<?, ?it/s]

Checking:   0%|          | 0/3801 [00:00<?, ?it/s]

left (16):   0%|          | 0/1876 [00:00<?, ?it/s]

left (16):   0%|          | 0/234 [00:00<?, ?it/s]

left (16):   0%|          | 0/235 [00:00<?, ?it/s]

Checking:   0%|          | 0/2100 [00:00<?, ?it/s]

marvin (17):   0%|          | 0/1438 [00:00<?, ?it/s]

marvin (17):   0%|          | 0/179 [00:00<?, ?it/s]

marvin (17):   0%|          | 0/181 [00:00<?, ?it/s]

Checking:   0%|          | 0/3934 [00:00<?, ?it/s]

nine (18):   0%|          | 0/2758 [00:00<?, ?it/s]

nine (18):   0%|          | 0/344 [00:00<?, ?it/s]

nine (18):   0%|          | 0/346 [00:00<?, ?it/s]

Checking:   0%|          | 0/3941 [00:00<?, ?it/s]

no (19):   0%|          | 0/2464 [00:00<?, ?it/s]

no (19):   0%|          | 0/308 [00:00<?, ?it/s]

no (19):   0%|          | 0/309 [00:00<?, ?it/s]

Checking:   0%|          | 0/3745 [00:00<?, ?it/s]

off (20):   0%|          | 0/1188 [00:00<?, ?it/s]

off (20):   0%|          | 0/148 [00:00<?, ?it/s]

off (20):   0%|          | 0/150 [00:00<?, ?it/s]

Checking:   0%|          | 0/3845 [00:00<?, ?it/s]

on (21):   0%|          | 0/2390 [00:00<?, ?it/s]

on (21):   0%|          | 0/298 [00:00<?, ?it/s]

on (21):   0%|          | 0/300 [00:00<?, ?it/s]

Checking:   0%|          | 0/3890 [00:00<?, ?it/s]

one (22):   0%|          | 0/2324 [00:00<?, ?it/s]

one (22):   0%|          | 0/290 [00:00<?, ?it/s]

one (22):   0%|          | 0/292 [00:00<?, ?it/s]

Checking:   0%|          | 0/3778 [00:00<?, ?it/s]

right (23):   0%|          | 0/2111 [00:00<?, ?it/s]

right (23):   0%|          | 0/263 [00:00<?, ?it/s]

right (23):   0%|          | 0/265 [00:00<?, ?it/s]

Checking:   0%|          | 0/3998 [00:00<?, ?it/s]

seven (24):   0%|          | 0/2753 [00:00<?, ?it/s]

seven (24):   0%|          | 0/344 [00:00<?, ?it/s]

seven (24):   0%|          | 0/345 [00:00<?, ?it/s]

Checking:   0%|          | 0/2022 [00:00<?, ?it/s]

sheila (25):   0%|          | 0/1384 [00:00<?, ?it/s]

sheila (25):   0%|          | 0/173 [00:00<?, ?it/s]

sheila (25):   0%|          | 0/173 [00:00<?, ?it/s]

Checking:   0%|          | 0/3860 [00:00<?, ?it/s]

six (26):   0%|          | 0/2321 [00:00<?, ?it/s]

six (26):   0%|          | 0/290 [00:00<?, ?it/s]

six (26):   0%|          | 0/291 [00:00<?, ?it/s]

Checking:   0%|          | 0/3872 [00:00<?, ?it/s]

stop (27):   0%|          | 0/2001 [00:00<?, ?it/s]

stop (27):   0%|          | 0/250 [00:00<?, ?it/s]

stop (27):   0%|          | 0/251 [00:00<?, ?it/s]

Checking:   0%|          | 0/3727 [00:00<?, ?it/s]

three (28):   0%|          | 0/2276 [00:00<?, ?it/s]

three (28):   0%|          | 0/284 [00:00<?, ?it/s]

three (28):   0%|          | 0/286 [00:00<?, ?it/s]

Checking:   0%|          | 0/1759 [00:00<?, ?it/s]

tree (29):   0%|          | 0/1118 [00:00<?, ?it/s]

tree (29):   0%|          | 0/139 [00:00<?, ?it/s]

tree (29):   0%|          | 0/141 [00:00<?, ?it/s]

Checking:   0%|          | 0/3880 [00:00<?, ?it/s]

two (30):   0%|          | 0/2164 [00:00<?, ?it/s]

two (30):   0%|          | 0/270 [00:00<?, ?it/s]

two (30):   0%|          | 0/271 [00:00<?, ?it/s]

Checking:   0%|          | 0/3723 [00:00<?, ?it/s]

up (31):   0%|          | 0/790 [00:00<?, ?it/s]

up (31):   0%|          | 0/98 [00:00<?, ?it/s]

up (31):   0%|          | 0/100 [00:00<?, ?it/s]

Checking:   0%|          | 0/1592 [00:00<?, ?it/s]

visual (32):   0%|          | 0/1149 [00:00<?, ?it/s]

visual (32):   0%|          | 0/143 [00:00<?, ?it/s]

visual (32):   0%|          | 0/145 [00:00<?, ?it/s]

Checking:   0%|          | 0/2123 [00:00<?, ?it/s]

wow (33):   0%|          | 0/1356 [00:00<?, ?it/s]

wow (33):   0%|          | 0/169 [00:00<?, ?it/s]

wow (33):   0%|          | 0/170 [00:00<?, ?it/s]

Checking:   0%|          | 0/4044 [00:00<?, ?it/s]

yes (34):   0%|          | 0/2364 [00:00<?, ?it/s]

yes (34):   0%|          | 0/295 [00:00<?, ?it/s]

yes (34):   0%|          | 0/297 [00:00<?, ?it/s]

Checking:   0%|          | 0/4052 [00:00<?, ?it/s]

zero (35):   0%|          | 0/2957 [00:00<?, ?it/s]

zero (35):   0%|          | 0/369 [00:00<?, ?it/s]

zero (35):   0%|          | 0/371 [00:00<?, ?it/s]

170938 21639 21209


In [11]:
# process the background noise files
def process_background(file_name, label):
    # load the audio file
    audio_tensor = tfio.audio.AudioIOTensor(file_name)
    audio = tf.cast(audio_tensor[:], tf.float32)
    audio_length = len(audio)
    samples = []
    for section_start in tqdm(range(0, audio_length-EXPECTED_SAMPLES, 8000), desc=file_name, leave=False):
        section_end = section_start + EXPECTED_SAMPLES
        section = audio[section_start:section_end]
        # get the spectrogram
        spectrogram = get_spectrogram(section)
        samples.append((spectrogram, label))

    # simulate random utterances
    for section_index in tqdm(range(1000), desc="Simulated Words", leave=False):
        section_start = np.random.randint(0, audio_length - EXPECTED_SAMPLES)
        section_end = section_start + EXPECTED_SAMPLES
        section = np.reshape(audio[section_start:section_end], (EXPECTED_SAMPLES))

        result = np.zeros((EXPECTED_SAMPLES))
        # create a pseudo bit of voice
        voice_length = np.random.randint(MINIMUM_VOICE_LENGTH/2, EXPECTED_SAMPLES)
        voice_start = np.random.randint(0, EXPECTED_SAMPLES - voice_length)
        hamming = np.hamming(voice_length)
        # amplify the voice section
        result[voice_start:voice_start+voice_length] = hamming * section[voice_start:voice_start+voice_length]
        # get the spectrogram
        spectrogram = get_spectrogram(np.reshape(section, (EXPECTED_SAMPLES, 1)))
        samples.append((spectrogram, label))
        
    
    np.random.shuffle(samples)
    
    train_size=int(TRAIN_SIZE*len(samples))
    validation_size=int(VALIDATION_SIZE*len(samples))
    test_size=int(TEST_SIZE*len(samples))
    
    train.extend(samples[:train_size])

    validate.extend(samples[train_size:train_size+validation_size])

    test.extend(samples[train_size+validation_size:])

        
for file_name in tqdm(get_files('_background_noise_'), desc="Processing Background Noise"):
    process_background(file_name, words.index("_background"))
    
print(len(train), len(test), len(validate))

Processing Background Noise:   0%|          | 0/6 [00:00<?, ?it/s]

speech_data\_background_noise_\doing_the_dishes.wav:   0%|          | 0/189 [00:00<?, ?it/s]

Simulated Words:   0%|          | 0/1000 [00:00<?, ?it/s]

speech_data\_background_noise_\dude_miaowing.wav:   0%|          | 0/122 [00:00<?, ?it/s]

Simulated Words:   0%|          | 0/1000 [00:00<?, ?it/s]

speech_data\_background_noise_\exercise_bike.wav:   0%|          | 0/121 [00:00<?, ?it/s]

Simulated Words:   0%|          | 0/1000 [00:00<?, ?it/s]

speech_data\_background_noise_\pink_noise.wav:   0%|          | 0/118 [00:00<?, ?it/s]

Simulated Words:   0%|          | 0/1000 [00:00<?, ?it/s]

speech_data\_background_noise_\running_tap.wav:   0%|          | 0/121 [00:00<?, ?it/s]

Simulated Words:   0%|          | 0/1000 [00:00<?, ?it/s]

speech_data\_background_noise_\white_noise.wav:   0%|          | 0/118 [00:00<?, ?it/s]

Simulated Words:   0%|          | 0/1000 [00:00<?, ?it/s]

176366 22324 21885


In [12]:
def process_problem_noise(file_name, label):
    samples = []
    # load the audio file
    audio_tensor = tfio.audio.AudioIOTensor(file_name)
    audio = tf.cast(audio_tensor[:], tf.float32)
    audio_length = len(audio)
    samples = []
    for section_start in tqdm(range(0, audio_length-EXPECTED_SAMPLES, 400), desc=file_name, leave=False):
        section_end = section_start + EXPECTED_SAMPLES
        section = audio[section_start:section_end]
        # get the spectrogram
        spectrogram = get_spectrogram(section)
        samples.append((spectrogram, label))
        
    np.random.shuffle(samples)
    
    train_size=int(TRAIN_SIZE*len(samples))
    validation_size=int(VALIDATION_SIZE*len(samples))
    test_size=int(TEST_SIZE*len(samples))
    
    train.extend(samples[:train_size])
    validate.extend(samples[train_size:train_size+validation_size])
    test.extend(samples[train_size+validation_size:])


for file_name in tqdm(get_files("_problem_noise_"), desc="Processing problem noise"):
    process_problem_noise(file_name, words.index("_background"))

Processing problem noise: 0it [00:00, ?it/s]

In [13]:
def process_problem_words(file_name, label):
    samples = []
    # load the audio file
    audio_tensor = tfio.audio.AudioIOTensor(file_name)
    audio = tf.cast(audio_tensor[:], tf.float32)
    audio_length = len(audio)
    samples = []
    for section_start in tqdm(range(0, audio_length-EXPECTED_SAMPLES, 4000), desc=file_name, leave=False):
        section_end = section_start + EXPECTED_SAMPLES
        section = audio[section_start:section_end]
        section = section - np.mean(section)
        section = section / np.max(np.abs(section))
        # add some random background noise
        background_volume = np.random.uniform(0, 0.1)
        # get the background noise files
        background_files = get_files('_background_noise_')
        background_file = np.random.choice(background_files)
        background_tensor = tfio.audio.AudioIOTensor(background_file)
        background_start = np.random.randint(0, len(background_tensor) - EXPECTED_SAMPLES)
        # normalise the background noise
        background = tf.cast(background_tensor[background_start:background_start+EXPECTED_SAMPLES], tf.float32)
        background = background - np.mean(background)
        background = background / np.max(np.abs(background))
        # mix the audio with the scaled background
        section = section + background_volume * background
        # get the spectrogram
        spectrogram = get_spectrogram(section)
        samples.append((spectrogram, label))
        
    np.random.shuffle(samples)
    
    train_size=int(TRAIN_SIZE*len(samples))
    validation_size=int(VALIDATION_SIZE*len(samples))
    test_size=int(TEST_SIZE*len(samples))
    
    train.extend(samples[:train_size])
    validate.extend(samples[train_size:train_size+validation_size])
    test.extend(samples[train_size+validation_size:])


for file_name in tqdm(get_files("_problem_words_"), desc="Processing problem words"):
    process_problem_words(file_name, words.index("_background"))

Processing problem words:   0%|          | 0/172 [00:00<?, ?it/s]

speech_data\_problem_words_\Day_133de547acd0414d80e8b860e0ec3273_025b0e6b06f9423f9d2d1bf96160efa7.ogg.wav.wav:…

speech_data\_problem_words_\Day_133de547acd0414d80e8b860e0ec3273_1a2de54eec2448349f7c242c5208ea68.ogg.wav.wav:…

speech_data\_problem_words_\Day_133de547acd0414d80e8b860e0ec3273_8448180ce17b4f779b18d78a0c0b9c3f.ogg.wav.wav:…

speech_data\_problem_words_\Day_133de547acd0414d80e8b860e0ec3273_d1a93b03c9274ef38ecc60c0707b0e40.ogg.wav.wav:…

speech_data\_problem_words_\Day_133de547acd0414d80e8b860e0ec3273_f84e42618b544e1195f62e4465380300.ogg.wav.wav:…

speech_data\_problem_words_\Day_1b6cebdebc034cb5920589cb4e537e6c_49414a7c25084240bb2f8e20f3582095.ogg.wav.wav:…

speech_data\_problem_words_\Day_1b6cebdebc034cb5920589cb4e537e6c_64dfc6edd4b34b4f8fd146bbca6c01d4.ogg.wav.wav:…

speech_data\_problem_words_\Day_1b6cebdebc034cb5920589cb4e537e6c_8c93f2287e6a4972a18f1757b4a8cd46.ogg.wav.wav:…

speech_data\_problem_words_\Day_1b6cebdebc034cb5920589cb4e537e6c_9817696b32dd4e76885efcda9ad2d2e1.ogg.wav.wav:…

speech_data\_problem_words_\Day_1b6cebdebc034cb5920589cb4e537e6c_acfb098625db40ab84249d14af286d55.ogg.wav.wav:…

speech_data\_problem_words_\Day_4b3864617d0b4654b0092a2fce240b7f_1222c36b70af484c94284b44d1bdfa30.ogg.wav.wav:…

speech_data\_problem_words_\Day_4b3864617d0b4654b0092a2fce240b7f_9d2e772235ba4e29862c3075fb64a75c.ogg.wav.wav:…

speech_data\_problem_words_\Day_4b3864617d0b4654b0092a2fce240b7f_d1a8313d0c3742be8738ae9025a4c2bd.ogg.wav.wav:…

speech_data\_problem_words_\Day_4b3864617d0b4654b0092a2fce240b7f_ee4275c2dff742d9aa7de4b0edf82117.ogg.wav.wav:…

speech_data\_problem_words_\Day_9e65b54f99534c78ae65527292c4d9fa_421217b703f54b1f809299139c1633bc.ogg.wav.wav:…

speech_data\_problem_words_\Day_9e65b54f99534c78ae65527292c4d9fa_c9c7fc2a47a941e0a402c39b45b93ed3.ogg.wav.wav:…

speech_data\_problem_words_\Day_9e65b54f99534c78ae65527292c4d9fa_ca51302c2c02412b905dc7852e5af543.ogg.wav.wav:…

speech_data\_problem_words_\Day_9e65b54f99534c78ae65527292c4d9fa_ccb2248e9ac7430ba2a9833e040d0114.ogg.wav.wav:…

speech_data\_problem_words_\Day_9e65b54f99534c78ae65527292c4d9fa_ed84b41608fc44879f19bacc63ecdd89.ogg.wav.wav:…

speech_data\_problem_words_\Day_af84c8af996d4de9a8573bae812b7030_2cc124cad1844f4a9b9e07c76d78318f.ogg.wav.wav:…

speech_data\_problem_words_\Day_af84c8af996d4de9a8573bae812b7030_3db7b25e4f724dcaaf8b3bf311859c8b.ogg.wav.wav:…

speech_data\_problem_words_\Day_af84c8af996d4de9a8573bae812b7030_6e7ec13dafe346929dc50fe51a310af6.ogg.wav.wav:…

speech_data\_problem_words_\Day_af84c8af996d4de9a8573bae812b7030_6ecfdba2c2534c0580237bebdc0f342b.ogg.wav.wav:…

speech_data\_problem_words_\Day_af84c8af996d4de9a8573bae812b7030_a70fcdfd1c454cf3a7b0e85279203786.ogg.wav.wav:…

speech_data\_problem_words_\Day_b936b500c4cc47f3a76af390020c6c34_00e000b3f0c44608af815d73ac4d032b.ogg.wav.wav:…

speech_data\_problem_words_\Day_b936b500c4cc47f3a76af390020c6c34_1e679cba9395426bae2951362aa59719.ogg.wav.wav:…

speech_data\_problem_words_\Day_b936b500c4cc47f3a76af390020c6c34_61f0b303c4814f32a6b2f288bf9a7258.ogg.wav.wav:…

speech_data\_problem_words_\Day_b936b500c4cc47f3a76af390020c6c34_6753d628dd8c4fa796ae9316e19df33a.ogg.wav.wav:…

speech_data\_problem_words_\Day_b936b500c4cc47f3a76af390020c6c34_7737bf1d775b41ab95cf09c7d3d755d8.ogg.wav.wav:…

speech_data\_problem_words_\Day_ddc5553de59541f085e1959a8c204d3a_6824c3f48f914666bffbac2d01e208da.ogg.wav.wav:…

speech_data\_problem_words_\Day_ddc5553de59541f085e1959a8c204d3a_9a82dba53cc644b2826f4dc86fd0fbec.ogg.wav.wav:…

speech_data\_problem_words_\Day_ddc5553de59541f085e1959a8c204d3a_b0081f898d214d09b57709d810a91e44.ogg.wav.wav:…

speech_data\_problem_words_\Day_ddc5553de59541f085e1959a8c204d3a_be99f23c04724f41aa8f4c171064ce9e.ogg.wav.wav:…

speech_data\_problem_words_\Day_ddc5553de59541f085e1959a8c204d3a_f4ee7ded29474ae881e33cea27f528db.ogg.wav.wav:…

speech_data\_problem_words_\Ei_133de547acd0414d80e8b860e0ec3273_21b4ee7e1c6e464b97682a13d2b4952b.ogg.wav.wav: …

speech_data\_problem_words_\Ei_133de547acd0414d80e8b860e0ec3273_32c78b53d99a4fa6ae409b0349bd9894.ogg.wav.wav: …

speech_data\_problem_words_\Ei_133de547acd0414d80e8b860e0ec3273_8f6da06ff8ce45d1a9cc66d7fb4ba8bc.ogg.wav.wav: …

speech_data\_problem_words_\Ei_133de547acd0414d80e8b860e0ec3273_e5983cdaa01d4ab3a3d643f755361e2f.ogg.wav.wav: …

speech_data\_problem_words_\Ei_133de547acd0414d80e8b860e0ec3273_f27b061bafaa414dad25389ff9b0ac01.ogg.wav.wav: …

speech_data\_problem_words_\Ei_1b6cebdebc034cb5920589cb4e537e6c_32bdf8311ee7486f854ffeb3a90acab9.ogg.wav.wav: …

speech_data\_problem_words_\Ei_1b6cebdebc034cb5920589cb4e537e6c_50cdb1fee5134aaf946f6cca132088cb.ogg.wav.wav: …

speech_data\_problem_words_\Ei_1b6cebdebc034cb5920589cb4e537e6c_5f77550765c841349e04ab8606d1ba6a.ogg.wav.wav: …

speech_data\_problem_words_\Ei_1b6cebdebc034cb5920589cb4e537e6c_7862593836dc4760909215d0924ca32d.ogg.wav.wav: …

speech_data\_problem_words_\Ei_1b6cebdebc034cb5920589cb4e537e6c_b5b4cdab8011438baf8e5eb2749e1dc4.ogg.wav.wav: …

speech_data\_problem_words_\Ei_4b3864617d0b4654b0092a2fce240b7f_0820e0a0764b46a3a674888fa730b64d.ogg.wav.wav: …

speech_data\_problem_words_\Ei_4b3864617d0b4654b0092a2fce240b7f_58486a3e53ed4657b1f252ab90d5c3ee.ogg.wav.wav: …

speech_data\_problem_words_\Ei_4b3864617d0b4654b0092a2fce240b7f_e92c1dbf0a8342058322c1faf0db551a.ogg.wav.wav: …

speech_data\_problem_words_\Ei_4b3864617d0b4654b0092a2fce240b7f_f08c52250c7e4a53b5a4bb54b5ce31af.ogg.wav.wav: …

speech_data\_problem_words_\Ei_4b3864617d0b4654b0092a2fce240b7f_f373975b19664df5806ed63514009016.ogg.wav.wav: …

speech_data\_problem_words_\Ei_9e65b54f99534c78ae65527292c4d9fa_072df621b8894f6ca0f14d398eb13941.ogg.wav.wav: …

speech_data\_problem_words_\Ei_9e65b54f99534c78ae65527292c4d9fa_afc5e07806fb4e30a210d9ecf595139c.ogg.wav.wav: …

speech_data\_problem_words_\Ei_9e65b54f99534c78ae65527292c4d9fa_b5c261ac3973440f88d4404a1e5e41f3.ogg.wav.wav: …

speech_data\_problem_words_\Ei_9e65b54f99534c78ae65527292c4d9fa_e56d555406264ab2b4a6f40e12be7247.ogg.wav.wav: …

speech_data\_problem_words_\Ei_9e65b54f99534c78ae65527292c4d9fa_f7a204da8d7144afaa1ce58271ca92e8.ogg.wav.wav: …

speech_data\_problem_words_\Ei_af84c8af996d4de9a8573bae812b7030_158eb15f300e415e9a8669458151d1dc.ogg.wav.wav: …

speech_data\_problem_words_\Ei_af84c8af996d4de9a8573bae812b7030_2e9271902fad4064a3de0387591c3ff0.ogg.wav.wav: …

speech_data\_problem_words_\Ei_af84c8af996d4de9a8573bae812b7030_5f373cb85a134f31a3a39843a00bebbf.ogg.wav.wav: …

speech_data\_problem_words_\Ei_af84c8af996d4de9a8573bae812b7030_a28d9a7478f343a6b51dc5f0933fd8b5.ogg.wav.wav: …

speech_data\_problem_words_\Ei_af84c8af996d4de9a8573bae812b7030_da1337a1a8cf408fb2ba3c9e0b733cfe.ogg.wav.wav: …

speech_data\_problem_words_\Ei_b936b500c4cc47f3a76af390020c6c34_29539b418ce24210877b3c55e859d8c1.ogg.wav.wav: …

speech_data\_problem_words_\Ei_b936b500c4cc47f3a76af390020c6c34_3f22df22edb746aaa1488fc3deaae0d2.ogg.wav.wav: …

speech_data\_problem_words_\Ei_b936b500c4cc47f3a76af390020c6c34_c1e0a2a6f5b14656a4fba80b6f4fb790.ogg.wav.wav: …

speech_data\_problem_words_\Ei_b936b500c4cc47f3a76af390020c6c34_e88a991496594f5eb44b149139403db5.ogg.wav.wav: …

speech_data\_problem_words_\Ei_b936b500c4cc47f3a76af390020c6c34_fe20c71abb97411f8658097dbbb38e08.ogg.wav.wav: …

speech_data\_problem_words_\Ei_ddc5553de59541f085e1959a8c204d3a_3ac1ac6f1e7d424094d16dbecfbe6131.ogg.wav.wav: …

speech_data\_problem_words_\Ei_ddc5553de59541f085e1959a8c204d3a_778e0b753864419f9420f72e05bfe747.ogg.wav.wav: …

speech_data\_problem_words_\Ei_ddc5553de59541f085e1959a8c204d3a_91d0a11c02304290a1bdb7eaff4d59c3.ogg.wav.wav: …

speech_data\_problem_words_\Ei_ddc5553de59541f085e1959a8c204d3a_fa18743763fe44d090595470353b876e.ogg.wav.wav: …

speech_data\_problem_words_\Ei_ddc5553de59541f085e1959a8c204d3a_fbd10ebd34b74399b9984b44c2dacc6a.ogg.wav.wav: …

speech_data\_problem_words_\Freitag_133de547acd0414d80e8b860e0ec3273_2c45b0624c464adaadcd36c1439a7c79.ogg.wav.…

speech_data\_problem_words_\Freitag_133de547acd0414d80e8b860e0ec3273_79bd530f59e5438eb5142169f5f0355f.ogg.wav.…

speech_data\_problem_words_\Freitag_133de547acd0414d80e8b860e0ec3273_acd7bc96a5494e1787b573d919140b58.ogg.wav.…

speech_data\_problem_words_\Freitag_133de547acd0414d80e8b860e0ec3273_ad821a2e0968408e87c18a472b83d4bd.ogg.wav.…

speech_data\_problem_words_\Freitag_133de547acd0414d80e8b860e0ec3273_d7795ef1fc32449a904a0f8f705fbd06.ogg.wav.…

speech_data\_problem_words_\Freitag_1b6cebdebc034cb5920589cb4e537e6c_1e3c617c7a2b4bfab5fe538a05f4f5a5.ogg.wav.…

speech_data\_problem_words_\Freitag_1b6cebdebc034cb5920589cb4e537e6c_592335cbdfd34497bd3fe3cf97036178.ogg.wav.…

speech_data\_problem_words_\Freitag_1b6cebdebc034cb5920589cb4e537e6c_7d6cd000edf64c71879e5f6f230c3a70.ogg.wav.…

speech_data\_problem_words_\Freitag_1b6cebdebc034cb5920589cb4e537e6c_9070dd7515214d3fbcf0e22452cf9fe0.ogg.wav.…

speech_data\_problem_words_\Freitag_1b6cebdebc034cb5920589cb4e537e6c_cfdc709016de484b9d6a361cd7411af9.ogg.wav.…

speech_data\_problem_words_\Freitag_4b3864617d0b4654b0092a2fce240b7f_25a7b48c145f4f72af66ea0184052f11.ogg.wav.…

speech_data\_problem_words_\Freitag_4b3864617d0b4654b0092a2fce240b7f_2a826bd75c0042b28896168574ed1d90.ogg.wav.…

speech_data\_problem_words_\Freitag_4b3864617d0b4654b0092a2fce240b7f_7d90ba82d76747a9a928c7f1aec0f067.ogg.wav.…

speech_data\_problem_words_\Freitag_4b3864617d0b4654b0092a2fce240b7f_e8af7b83f76041ecac1c97ea7a781873.ogg.wav.…

speech_data\_problem_words_\Freitag_9e65b54f99534c78ae65527292c4d9fa_1e8a5cee3624419d8861f04f72bbe73a.ogg.wav.…

speech_data\_problem_words_\Freitag_9e65b54f99534c78ae65527292c4d9fa_300c91d25f454cb39ac58a7d2519a766.ogg.wav.…

speech_data\_problem_words_\Freitag_9e65b54f99534c78ae65527292c4d9fa_797042fddb0a4b96b77fbf37d22603c5.ogg.wav.…

speech_data\_problem_words_\Freitag_9e65b54f99534c78ae65527292c4d9fa_ad62f7c2a5094445a8c853913b98415c.ogg.wav.…

speech_data\_problem_words_\Freitag_9e65b54f99534c78ae65527292c4d9fa_d397e97ad569456482655a434d8d628f.ogg.wav.…

speech_data\_problem_words_\Freitag_af84c8af996d4de9a8573bae812b7030_0f1c7546c2e3431aad5833e3b55960cb.ogg.wav.…

speech_data\_problem_words_\Freitag_af84c8af996d4de9a8573bae812b7030_110bb9e73e0e4957ae67d6338a2d732a.ogg.wav.…

speech_data\_problem_words_\Freitag_af84c8af996d4de9a8573bae812b7030_3812362a80a746a887b3808512e0ba3f.ogg.wav.…

speech_data\_problem_words_\Freitag_af84c8af996d4de9a8573bae812b7030_3e78a0f61ae345bfafffc2e7278699b4.ogg.wav.…

speech_data\_problem_words_\Freitag_af84c8af996d4de9a8573bae812b7030_90e0852dfdbb42d2a09f081d11cfe8e9.ogg.wav.…

speech_data\_problem_words_\Freitag_b936b500c4cc47f3a76af390020c6c34_2210eafe229f43ce970a32627f2b8d01.ogg.wav.…

speech_data\_problem_words_\Freitag_b936b500c4cc47f3a76af390020c6c34_6be8ebcde4534f3b9c5c61ee95eea984.ogg.wav.…

speech_data\_problem_words_\Freitag_b936b500c4cc47f3a76af390020c6c34_8d8476cc5b704dfaa9237045b38e621b.ogg.wav.…

speech_data\_problem_words_\Freitag_b936b500c4cc47f3a76af390020c6c34_c4871cda5a4545698a14661ea527e080.ogg.wav.…

speech_data\_problem_words_\Freitag_b936b500c4cc47f3a76af390020c6c34_f71aa47f43634ddd8ce1b1cf10c10137.ogg.wav.…

speech_data\_problem_words_\Freitag_ddc5553de59541f085e1959a8c204d3a_2a29136104c94372a50259e3406ecfcf.ogg.wav.…

speech_data\_problem_words_\Freitag_ddc5553de59541f085e1959a8c204d3a_395de9e84c124d818a4d9447fc8080e7.ogg.wav.…

speech_data\_problem_words_\Freitag_ddc5553de59541f085e1959a8c204d3a_95484a66e5d640708e653297eb4fb1b1.ogg.wav.…

speech_data\_problem_words_\Freitag_ddc5553de59541f085e1959a8c204d3a_b5a4b654fd8b45818f1b4479b4f76bf9.ogg.wav.…

speech_data\_problem_words_\Freitag_ddc5553de59541f085e1959a8c204d3a_eab47c0feeed4c41aea1a4471e62224c.ogg.wav.…

speech_data\_problem_words_\Frei_133de547acd0414d80e8b860e0ec3273_44f7b6429c7946418e1a93bdf3637949.ogg.wav.wav…

speech_data\_problem_words_\Frei_133de547acd0414d80e8b860e0ec3273_584ad253987f4a069ab041df3b214930.ogg.wav.wav…

speech_data\_problem_words_\Frei_133de547acd0414d80e8b860e0ec3273_6536e0cabca349a9844ebb2a2a66477f.ogg.wav.wav…

speech_data\_problem_words_\Frei_133de547acd0414d80e8b860e0ec3273_c26dffff77614eaa84ab1833b1ad0ebb.ogg.wav.wav…

speech_data\_problem_words_\Frei_133de547acd0414d80e8b860e0ec3273_dfdbe70d7a344d1484770e74a99d833c.ogg.wav.wav…

speech_data\_problem_words_\Frei_1b6cebdebc034cb5920589cb4e537e6c_7919df59ac774210b61a948d489e5341.ogg.wav.wav…

speech_data\_problem_words_\Frei_1b6cebdebc034cb5920589cb4e537e6c_836e57febe344cdb98812b92c79fc917.ogg.wav.wav…

speech_data\_problem_words_\Frei_1b6cebdebc034cb5920589cb4e537e6c_a1d3527166a04747a7ec03629e5c0838.ogg.wav.wav…

speech_data\_problem_words_\Frei_1b6cebdebc034cb5920589cb4e537e6c_b4bc61f9a9e041bfbe0eca46e05fdbd8.ogg.wav.wav…

speech_data\_problem_words_\Frei_1b6cebdebc034cb5920589cb4e537e6c_d31e465f459f47fbaed8ffc2e2fa9d12.ogg.wav.wav…

speech_data\_problem_words_\Frei_4b3864617d0b4654b0092a2fce240b7f_357f5ed9d6a8444196c19503d2b3a58a.ogg.wav.wav…

speech_data\_problem_words_\Frei_4b3864617d0b4654b0092a2fce240b7f_b57fd630c1144d59b11aab7881e0ab56.ogg.wav.wav…

speech_data\_problem_words_\Frei_4b3864617d0b4654b0092a2fce240b7f_c159be8b6af04201bb66ce665d2a41b4.ogg.wav.wav…

speech_data\_problem_words_\Frei_4b3864617d0b4654b0092a2fce240b7f_c51939cdd3a949aab99d984dd3b64d1d.ogg.wav.wav…

speech_data\_problem_words_\Frei_9e65b54f99534c78ae65527292c4d9fa_0646de42769b43e6b9bcc43e916507fd.ogg.wav.wav…

speech_data\_problem_words_\Frei_9e65b54f99534c78ae65527292c4d9fa_23d9b85e5acb4b5db8e4f75dec498265.ogg.wav.wav…

speech_data\_problem_words_\Frei_9e65b54f99534c78ae65527292c4d9fa_334df648e2714e1aa073a191562211f4.ogg.wav.wav…

speech_data\_problem_words_\Frei_9e65b54f99534c78ae65527292c4d9fa_929c1421ec194249a44320baad2ed95f.ogg.wav.wav…

speech_data\_problem_words_\Frei_9e65b54f99534c78ae65527292c4d9fa_b351ce23f0a643aea922616c661b17c7.ogg.wav.wav…

speech_data\_problem_words_\Frei_af84c8af996d4de9a8573bae812b7030_24d71310d2aa439883732366ab7602b2.ogg.wav.wav…

speech_data\_problem_words_\Frei_af84c8af996d4de9a8573bae812b7030_6194412e1e644a88adce663e5a5f13d7.ogg.wav.wav…

speech_data\_problem_words_\Frei_af84c8af996d4de9a8573bae812b7030_64a6dc0e239049b0bf10b54b79f2763d.ogg.wav.wav…

speech_data\_problem_words_\Frei_af84c8af996d4de9a8573bae812b7030_78cb3b52d69e445a8ca31f20295188de.ogg.wav.wav…

speech_data\_problem_words_\Frei_af84c8af996d4de9a8573bae812b7030_d25130355a4d45c1ae5996bacd9accc5.ogg.wav.wav…

speech_data\_problem_words_\Frei_b936b500c4cc47f3a76af390020c6c34_1e23c8b361d9481eb705bee5bcd0c1a7.ogg.wav.wav…

speech_data\_problem_words_\Frei_b936b500c4cc47f3a76af390020c6c34_3af5fca5604e4d669f0f333d89ef2f76.ogg.wav.wav…

speech_data\_problem_words_\Frei_b936b500c4cc47f3a76af390020c6c34_7fcbb86884354e1c86d8ce771fc167ef.ogg.wav.wav…

speech_data\_problem_words_\Frei_b936b500c4cc47f3a76af390020c6c34_c903b818c862404595b02c96d8eb0345.ogg.wav.wav…

speech_data\_problem_words_\Frei_b936b500c4cc47f3a76af390020c6c34_d476d3e504b944939062727fbac40122.ogg.wav.wav…

speech_data\_problem_words_\Frei_ddc5553de59541f085e1959a8c204d3a_31e08acfeb274ccc868e5ba6b3f4c0dd.ogg.wav.wav…

speech_data\_problem_words_\Frei_ddc5553de59541f085e1959a8c204d3a_3f7db46224454978aecee58a4fb1e9eb.ogg.wav.wav…

speech_data\_problem_words_\Frei_ddc5553de59541f085e1959a8c204d3a_75c998b93f0943849d0181623d14d709.ogg.wav.wav…

speech_data\_problem_words_\Frei_ddc5553de59541f085e1959a8c204d3a_aa5ac32f9179487f90588a1c0704b580.ogg.wav.wav…

speech_data\_problem_words_\Frei_ddc5553de59541f085e1959a8c204d3a_bdbd024b3d4545a5827b514faa8c1af0.ogg.wav.wav…

speech_data\_problem_words_\Hi_133de547acd0414d80e8b860e0ec3273_6347187f232b4c5db49292c152a244b4.ogg.wav.wav: …

speech_data\_problem_words_\Hi_133de547acd0414d80e8b860e0ec3273_b97984fcd0be48ac9608b02ec60826d9.ogg.wav.wav: …

speech_data\_problem_words_\Hi_133de547acd0414d80e8b860e0ec3273_c027993cd4e148a4938cfa4095534ae3.ogg.wav.wav: …

speech_data\_problem_words_\Hi_133de547acd0414d80e8b860e0ec3273_f752df80f17a45cbb5a5c81b085e9ca1.ogg.wav.wav: …

speech_data\_problem_words_\Hi_133de547acd0414d80e8b860e0ec3273_fccbc179f97843b193c21da19ebde6e8.ogg.wav.wav: …

speech_data\_problem_words_\Hi_1b6cebdebc034cb5920589cb4e537e6c_2b2cabc4f0164d53a5630d0fc87bce17.ogg.wav.wav: …

speech_data\_problem_words_\Hi_1b6cebdebc034cb5920589cb4e537e6c_317d312ea9e94f8aad8920c8c2826f8a.ogg.wav.wav: …

speech_data\_problem_words_\Hi_1b6cebdebc034cb5920589cb4e537e6c_3aae3670dc194fc5aa3df8a9a21ff90f.ogg.wav.wav: …

speech_data\_problem_words_\Hi_1b6cebdebc034cb5920589cb4e537e6c_e1cfe9d8dfaf4c7982b1d86fd0ed861a.ogg.wav.wav: …

speech_data\_problem_words_\Hi_1b6cebdebc034cb5920589cb4e537e6c_f68e71d5343e49aa8c9e8580a16683a6.ogg.wav.wav: …

speech_data\_problem_words_\Hi_4b3864617d0b4654b0092a2fce240b7f_aaeaea96f29d4cc8b610993b668365e8.ogg.wav.wav: …

speech_data\_problem_words_\Hi_4b3864617d0b4654b0092a2fce240b7f_b0f4d48ee4f048b6ae2b0eef60b4bf1d.ogg.wav.wav: …

speech_data\_problem_words_\Hi_4b3864617d0b4654b0092a2fce240b7f_d2af43798db544a481f31a553f1a6945.ogg.wav.wav: …

speech_data\_problem_words_\Hi_4b3864617d0b4654b0092a2fce240b7f_e7b69b9bcfac4d5aab9b007445c0feb1.ogg.wav.wav: …

speech_data\_problem_words_\Hi_4b3864617d0b4654b0092a2fce240b7f_eccb5dd78abc4a8590b9980c7af8e190.ogg.wav.wav: …

speech_data\_problem_words_\Hi_9e65b54f99534c78ae65527292c4d9fa_548dfae62e474ca8b9202ef84bc366ed.ogg.wav.wav: …

speech_data\_problem_words_\Hi_9e65b54f99534c78ae65527292c4d9fa_5a1d06bf93b54c12b6e722dd9c533e2b.ogg.wav.wav: …

speech_data\_problem_words_\Hi_9e65b54f99534c78ae65527292c4d9fa_c02da9a11f634e0883e8226bae0e353e.ogg.wav.wav: …

speech_data\_problem_words_\Hi_9e65b54f99534c78ae65527292c4d9fa_da55756991da46e59035e55e1a7f5a45.ogg.wav.wav: …

speech_data\_problem_words_\Hi_9e65b54f99534c78ae65527292c4d9fa_e3affdd9c87b4235bb519569af29b0cb.ogg.wav.wav: …

speech_data\_problem_words_\Hi_af84c8af996d4de9a8573bae812b7030_149a47633c0e4c13af117507e04321ec.ogg.wav.wav: …

speech_data\_problem_words_\Hi_af84c8af996d4de9a8573bae812b7030_15471b48f0ec46edacef27dd112cd681.ogg.wav.wav: …

speech_data\_problem_words_\Hi_af84c8af996d4de9a8573bae812b7030_5d46440f2b694574ad12d076dd68dc73.ogg.wav.wav: …

speech_data\_problem_words_\Hi_af84c8af996d4de9a8573bae812b7030_774370ffceda4e2e8d04fe89fec043bc.ogg.wav.wav: …

speech_data\_problem_words_\Hi_af84c8af996d4de9a8573bae812b7030_f7f264b960c346a38cf512a8f5d05fb8.ogg.wav.wav: …

speech_data\_problem_words_\Hi_b936b500c4cc47f3a76af390020c6c34_66f721adc363409994ad9ef28770323e.ogg.wav.wav: …

speech_data\_problem_words_\Hi_b936b500c4cc47f3a76af390020c6c34_772c67ef42454d91a2f377d8ad231a26.ogg.wav.wav: …

speech_data\_problem_words_\Hi_b936b500c4cc47f3a76af390020c6c34_80893b994f184b5c9a32f14c5da13cc0.ogg.wav.wav: …

speech_data\_problem_words_\Hi_b936b500c4cc47f3a76af390020c6c34_e8a21043ddbb42b0a736ef2160da7b23.ogg.wav.wav: …

speech_data\_problem_words_\Hi_b936b500c4cc47f3a76af390020c6c34_f4a33c8a70a047f9ba99b70c1bba9835.ogg.wav.wav: …

speech_data\_problem_words_\Hi_ddc5553de59541f085e1959a8c204d3a_285e9c5c4e1c4a7f92dfe6a0e9081a46.ogg.wav.wav: …

speech_data\_problem_words_\Hi_ddc5553de59541f085e1959a8c204d3a_893f468530044e52a1bcf982dfbe55c4.ogg.wav.wav: …

speech_data\_problem_words_\Hi_ddc5553de59541f085e1959a8c204d3a_8b1f2f23734d4d428f54c33386cf52a7.ogg.wav.wav: …

speech_data\_problem_words_\Hi_ddc5553de59541f085e1959a8c204d3a_959be379e58742e18039e30e432df427.ogg.wav.wav: …

speech_data\_problem_words_\Hi_ddc5553de59541f085e1959a8c204d3a_e764d092d39342819ecaf19a1f9fb392.ogg.wav.wav: …

In [14]:
print(len(train), len(test), len(validate))

176366 22324 21885


In [15]:
# randomise the training samples
np.random.shuffle(train)

In [16]:
X_train, Y_train = zip(*train)
X_validate, Y_validate = zip(*validate)
X_test, Y_test = zip(*test)

In [17]:
# save the computed data
np.savez_compressed(
    "training_spectrogram.npz",
    X=X_train, Y=Y_train)
print("Saved training data")
np.savez_compressed(
    "validation_spectrogram.npz",
    X=X_validate, Y=Y_validate)
print("Saved validation data")
np.savez_compressed(
    "test_spectrogram.npz",
    X=X_test, Y=Y_test)
print("Saved test data")

Saved training data
Saved validation data
Saved test data


In [None]:
# get the width and height of the spectrogram "image"
IMG_WIDTH=X_train[0].shape[0]
IMG_HEIGHT=X_train[0].shape[1]

In [None]:
def plot_images2(images_arr, imageWidth, imageHeight):
    fig, axes = plt.subplots(5, 5, figsize=(10, 20))
    axes = axes.flatten()
    for img, ax in zip(images_arr, axes):
        ax.imshow(np.reshape(img, (imageWidth, imageHeight)))
        ax.axis("off")
    plt.tight_layout()
    plt.show()
    

In [None]:
word_index = words.index("friday")

X_marvins = np.array(X_train)[np.array(Y_train) == word_index]
Y_marvins = np.array(Y_train)[np.array(Y_train) == word_index]
plot_images2(X_marvins[:20], IMG_WIDTH, IMG_HEIGHT)
print(Y_marvins[:20])

In [None]:
word_index = words.index("yes")

X_yes = np.array(X_train)[np.array(Y_train) == word_index]
Y_yes = np.array(Y_train)[np.array(Y_train) == word_index]
plot_images2(X_yes[:20], IMG_WIDTH, IMG_HEIGHT)
print(Y_yes[:20])