In [2]:
#!pip install --user tflearn
#!pip install --user librosa 'llvmlite==0.19.0'
#!pip install --user samplerate

In [3]:
from scipy.io import wavfile
from tflearn.data_utils import to_categorical
import numpy as np
import random
import librosa
import glob
import samplerate

random.seed(1)

dataset = []
label_to_idx = dict()
idx_to_label = dict()
time_size = 16
feat_size = 40

rate, noise_data = wavfile.read('dataset/noise.wav')
noise_rest = len(noise_data) - rate

def normalize(data):
    data = data.astype(float)
    mn, mx = data.min(), data.max()
    data = (data - mn) / (mx - mn) * 65535 - 32768
    return data.astype('short')

def read_dataset():
    for fname in glob.glob('dataset/*/*.wav', recursive=True):
        label = fname.split('.')[0].split('/')[1:]
        rate, data = wavfile.read(fname)
        data = normalize(data)
        yield label, data

def augment_resample(dataset):
    for info, data in dataset:
        for rs in (1, 0.75, 1.15, 0.85, 0.9):
            rsdata = samplerate.resample(data, rs, 'sinc_best').astype('short')
            yield (*info, rs), rsdata

def augment_pad(dataset):
    for info, data in dataset:
        for _ in range(5):
            file_rest = rate - len(data)
            if file_rest <= 0:
                file_data = data[:rate]
            else:
                file_offset = int(random.uniform(0, file_rest))
                file_data = np.pad(data, (file_offset, file_rest - file_offset), 'constant', constant_values=(0,0))
            yield info, file_data

def augment_noise(dataset):
    for info, data in dataset:
        for noise_level in range(5):
            noise_offset = int(random.uniform(0, noise_rest))
            noise_sample = noise_data[noise_offset:noise_offset + rate] * noise_level
            file_data = data.astype(float) + noise_sample.astype(float)
            file_data = normalize(file_data)
            yield (*info, noise_level), file_data

for info, data in augment_noise(augment_pad(augment_resample(read_dataset()))):
#     wavfile.write('temp/%s.wav' % (info,), rate, data)

    mfcc = np.transpose(librosa.feature.mfcc(data, rate, n_mfcc=feat_size))

    label = info[0]
    idx = label_to_idx.get(label, None)
    if idx is None:
        idx = len(label_to_idx)
        label_to_idx[label] = idx
        idx_to_label[idx] = label
    dataset.append((mfcc, idx, info))
random.shuffle(dataset)

inp_data = [r[0] for r in dataset]
out_size = len(label_to_idx)
out_data = to_categorical([r[1] for r in dataset], out_size)

len(inp_data), out_size, label_to_idx.keys(), inp_data[0].shape

(3500, 7, dict_keys(['1', '2', '3', '4', '5', '6', 'sil']), (16, 40))

In [4]:
import tensorflow as tf
import tflearn

tf.reset_default_graph()

check_size = 20 #int(len(inp_data) * 0.1)
learn_size = len(inp_data) - check_size
trainX = inp_data[:learn_size]
trainY = out_data[:learn_size]
testX = inp_data[-check_size:]
testY = out_data[-check_size:]

g = tflearn.input_data(shape=[None,time_size,feat_size])
g = tflearn.reshape(g, [-1,time_size,feat_size,1])

g = tflearn.conv_2d(g, 16, (20,8), activation='relu')
g = tflearn.dropout(g, 0.8)
g = tflearn.max_pool_2d(g, 4)
g = tflearn.local_response_normalization(g)
g = tflearn.reshape(g, [-1,time_size,feat_size])

g = tflearn.gru(g, 384, dropout=0.7)

g = tflearn.fully_connected(g, out_size, activation='softmax')
g = tflearn.regression(g, optimizer='adam', loss='categorical_crossentropy', learning_rate=0.001)
m = tflearn.DNN(g)
m.fit(trainX, trainY, n_epoch=20, show_metric=True, snapshot_epoch=False, batch_size=64)

Training Step: 1100  | total loss: [1m[32m0.02805[0m[0m | time: 7.321s
[2K| Adam | epoch: 020 | loss: 0.02805 - acc: 0.9924 -- iter: 3480/3480


In [5]:
hits, miss = 0, 0
mxpe, mnpv = 0, 10
for i, out in enumerate(m.predict(testX)):
    i0 = out.argsort()[-1]
    lbl = idx_to_label[i0]
    info = dataset[learn_size + i][2]
    p = int(out[i0]*10)
    ok = info[0] == lbl
    if ok:
        hits += 1
        mnpv = min(mnpv, p)
    else:
        miss += 1
        mxpe = max(mxpe, p)
    print('%s %d%% %s: %s' % (('+' if ok else '-'), p, lbl, info))
print(hits, miss, mxpe, mnpv)
print('---')
for f, _, info in dataset[100:140]:
    out = m.predict([f])[0]
    i0 = out.argsort()[-1]
    lbl = idx_to_label[i0]
    print(('v' if info[0] == lbl else '-'), int(out[i0]*10), lbl, info)

+ 9% 1: ('1', 'b', 1, 1)
+ 9% sil: ('sil', 'g', 0.75, 2)
+ 9% 1: ('1', 'a', 0.75, 2)
+ 9% 3: ('3', 'a', 0.85, 1)
+ 9% 2: ('2', 'a', 1.15, 4)
+ 9% sil: ('sil', 'g', 1.15, 3)
+ 9% 3: ('3', 'd', 0.85, 0)
+ 9% sil: ('sil', 'd', 0.9, 0)
+ 9% sil: ('sil', 'e', 0.9, 0)
+ 9% 1: ('1', 'a', 1.15, 3)
+ 9% sil: ('sil', 'd', 0.75, 3)
+ 9% sil: ('sil', 'd', 0.9, 0)
+ 9% 4: ('4', 'b', 0.9, 2)
+ 9% sil: ('sil', 'g', 1, 0)
+ 6% 1: ('1', 'e', 0.75, 3)
+ 9% 2: ('2', 'd', 1.15, 3)
+ 9% sil: ('sil', 'b', 0.75, 1)
+ 9% 2: ('2', 'a', 0.85, 3)
+ 9% 1: ('1', 'c', 0.75, 0)
+ 9% 3: ('3', 'c', 1, 0)
20 0 0 6
---
v 9 1 ('1', 'c', 0.75, 3)
v 9 sil ('sil', 'f', 0.85, 3)
v 9 5 ('5', 'b', 1.15, 0)
v 9 3 ('3', 'c', 0.75, 1)
v 9 3 ('3', 'e', 1.15, 2)
v 9 2 ('2', 'd', 1, 1)
v 9 sil ('sil', 'e', 0.9, 4)
v 9 5 ('5', 'b', 0.85, 2)
v 9 3 ('3', 'e', 1.15, 3)
v 9 3 ('3', 'a', 0.9, 4)
v 9 4 ('4', 'b', 1.15, 3)
v 9 4 ('4', 'b', 0.9, 1)
v 9 4 ('4', 'a', 0.75, 0)
v 9 1 ('1', 'd', 1.15, 3)
v 9 5 ('5', 'b', 0.9, 3)
v 9 sil ('sil', '

In [11]:
rate, wave = wavfile.read('dataset/sil/a.wav')
rate, wave = wavfile.read('test3.wav')
# rate, wave = wavfile.read('approach2/tt.wav')
# rate, wave = wavfile.read('../approach2/123.wav') #23s2s21sxs3
# rate, wave = wavfile.read('approach2/123456.wav')
# rate, wave = wavfile.read('approach2/testn.wav')
buffer = wave[:4000]
labels = ''
for o in range(4000, wave.shape[0], 4000):
    chunk = wave[o:o + 4000]
    buffer = np.append(buffer, chunk)[-8000:]
    norm = normalize(buffer)
#     wavfile.write('temp/x-%d.wav' % (o,), rate, norm)
    mfcc = np.transpose(librosa.feature.mfcc(norm, rate, n_mfcc=feat_size))
    for out in m.predict([mfcc]):
        i0 = out.argsort()[-1]
        lbl = idx_to_label[i0]
        labels += lbl
print(labels)

33


In [12]:
m.save('model.tflearn')

INFO:tensorflow:/home/jovyan/anti-swear/model.tflearn is not in all_model_checkpoint_paths. Manually adding it.
