This repository has been archived by the owner on Sep 2, 2020. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 2
/
wav2MFCC.py
70 lines (47 loc) · 1.94 KB
/
wav2MFCC.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os.path
import tensorflow as tf
from shutil import rmtree
from librosa.feature import mfcc
import numpy as np
from tensorflow.io import gfile
import uuid
from constants import *
def read_dir():
if not os.path.isdir(SOUNDS_DIR):
raise Exception('Sound directory with name \'' + SOUNDS_DIR + '\' not found!')
data = []
for word in WANTED_WORDS:
word_dir = SOUNDS_DIR + word
if not os.path.isdir(word_dir):
raise Exception('Sounds directory for \'' + word + '\' not found at ' + word_dir + '!')
search_path = os.path.join(word_dir, '*.wav')
for wav_path in gfile.glob(search_path):
data.append({'word': word, 'file': wav_path})
return data
def get_features():
features = []
print('Extracting MFCC features from WAV files')
for data in read_dir():
mfcc_feat = get_MFCC(data['file'])
features.append({'data': mfcc_feat, 'label': data['word']})
save_features(features)
def get_MFCC(wav_path):
wav_loader = tf.io.read_file(wav_path)
wav_decoded = tf.audio.decode_wav(wav_loader, desired_channels=1).audio[:DESIRED_SAMPLES]
padding = tf.constant([[DESIRED_SAMPLES - len(wav_decoded), 0], [0, 0]])
audio_data = tf.pad(wav_decoded, padding)
reshaped_data = np.array(tf.reshape(audio_data, (SAMPLE_RATE,)))
feature = mfcc(reshaped_data, SAMPLE_RATE, n_mfcc=FEATURES_COUNT)
return tf.expand_dims(feature, -1)
def save_features(features):
if os.path.isdir(MFCCS_DIR):
rmtree(MFCCS_DIR)
print('Saving MFCC features as tensor files')
for feature in features:
filename = uuid.uuid4().hex + '.mfcc'
file_path = MFCCS_DIR + feature['label'] + '/' + filename
tensor = tf.dtypes.cast(feature['data'], dtype=tf.float32)
tf.io.write_file(file_path, tf.io.serialize_tensor(tensor))