In [0]:
import numpy as np
import matplotlib.pyplot as plt
from scipy.io import wavfile
import os
from keras.models import load_model
from python_speech_features import mfcc, logfbank, delta
from pydub import AudioSegment
import pyaudio
import IPython
from keras.preprocessing.sequence import pad_sequences
import scipy
import scipy.signal
import sys
import wave
from queue import Queue
from threading import Thread

Using TensorFlow backend.


In [0]:
model = load_model("./train.h5")

Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.
Instructions for updating:
Use tf.cast instead.


In [0]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_1 (Dense)              (None, 200, 128)          3456      
_________________________________________________________________
dropout_1 (Dropout)          (None, 200, 128)          0         
_________________________________________________________________
lstm_1 (LSTM)                (None, 200, 32)           20608     
_________________________________________________________________
dropout_2 (Dropout)          (None, 200, 32)           0         
_________________________________________________________________
lstm_2 (LSTM)                (None, 200, 64)           24832     
_________________________________________________________________
dropout_3 (Dropout)          (None, 200, 64)           0         
_________________________________________________________________
lstm_3 (LSTM)                (None, 128)               98816     
__________

In [0]:
NUMB_CEP = 26
NUMB_FILT = 26
NFFT = 512
DELTA = 2
MFCC_LEN = 200

In [0]:
def get_feature(y, fs):
    y = y / np.max(abs((y)))
    mfcc_feat = mfcc(y, fs, numcep=NUMB_CEP)
    mfcc_feat = delta(mfcc_feat, DELTA)
    
    return mfcc_feat

def preprocess_data(y, fs):
    b = scipy.signal.firwin(255, [300, 3400], pass_zero=False, fs=fs)
    y_filter = scipy.signal.filtfilt(b, 1, y)
    return y_filter
    
def get_data(y, fs):
    input_data = []
    #y_filt = preprocess_data(y, fs)
    mfcc_feat = get_feature(y, fs)
    
    input_data.append(mfcc_feat)

    input_data[0] = pad_sequences(input_data[0].T, MFCC_LEN, dtype=float, padding='post', truncating='post').T
    input_data = np.array([input_data[0]])
    return input_data

def print_command(probabilities):
    arg_max = np.argmax(probabilities[0])
    if probabilities[0, arg_max] >= 0.75:
        if arg_max == 0:
            print("Background")
        if arg_max == 1:
            print("Bat den")
        elif arg_max == 2:
            print("Tat den")
        elif arg_max == 3:
            print("Bat dieu hoa")
        elif arg_max == 4:
            print("Tat dieu hoa")
        elif arg_max == 5:
            print("Bat quat")
        elif arg_max == 6:
            print("Tat quat")
        elif arg_max == 7:
            print("Bat tivi")
        elif arg_max == 8:
            print("Tat tivi")
        elif arg_max == 9:
            print("Mo cua")
        elif arg_max == 10:
            print("Dong cua")
        elif arg_max == 11:
            print("Khoa cua")
        elif arg_max == 12:
            print("Mo cong")
        elif arg_max == 13:
            print("Dong cong")
        elif arg_max == 14:
            print("Khoa cong")
        elif arg_max == 15:
            print("Doremon")
    else:
        sys.stdout.write("-")

In [0]:
### ghi âm và ghi ra file 
FORMAT = pyaudio.paInt16 
CHANNELS = 1
RATE = 8000
CHUNK = 4000
WAVE_OUTPUT_FILENAME = "test.wav"

audio = pyaudio.PyAudio()
stream = audio.open(format=FORMAT, channels=CHANNELS,
                rate=RATE, input=True,
                frames_per_buffer=CHUNK)

seconds_rec_numb = 2    ## recording in 2 second
frames = []

try:
    for i in range(0, int(seconds_rec_numb * RATE / CHUNK)):
        sys.stdout.write("-")
        data = stream.read(CHUNK)
        frames.append(data)
except (KeyboardInterrupt, SystemExit):
    stream.stop_stream()
    stream.close()
    
stream.stop_stream()
stream.close()
audio.terminate()
 
waveFile = wave.open(WAVE_OUTPUT_FILENAME, 'wb')
waveFile.setnchannels(CHANNELS)
waveFile.setsampwidth(audio.get_sample_size(FORMAT))
waveFile.setframerate(RATE)
waveFile.writeframes(b''.join(frames))
waveFile.close()

----

In [0]:
## nghe lại audio vừa ghi âm
IPython.display.Audio("test.wav")

In [0]:
## predict file ghi âm 
file_name = "./test.wav"
fs, y = wavfile.read(file_name)

x = get_data(y, fs)
prob = model.predict(x, verbose=1)
print(prob)
print_command(prob)

[[0.40333965 0.0058311  0.01046597 0.06893883 0.01479391 0.01390462
  0.00329601 0.01543166 0.01847607 0.00259428 0.05679455 0.01335789
  0.01149432 0.20914482 0.014479   0.13765731]]
-

In [0]:
### record and predict
FORMAT = pyaudio.paInt16 
CHANNELS = 1
RATE = 8000
CHUNK = 1000
#WAVE_OUTPUT_FILENAME = "record.wav"

audio = pyaudio.PyAudio()
stream = audio.open(format=FORMAT, channels=CHANNELS,
                rate=RATE, input=True,
                frames_per_buffer=CHUNK)

seconds_rec_numb = 2    ## recording in 2 second
y = np.zeros((RATE * seconds_rec_numb, ), dtype='int16')

try:
    for i in range(0, int(seconds_rec_numb * RATE / CHUNK)):
        sys.stdout.write("-")
        data = stream.read(CHUNK)
        y[CHUNK * i : CHUNK * (i+1)] = np.frombuffer(data, dtype='int16')
except (KeyboardInterrupt, SystemExit):
    stream.stop_stream()
    stream.close()
    
stream.stop_stream()
stream.close()
audio.terminate()

x = get_data(y, RATE)
prob = model.predict(x, verbose=1)
print(prob)
print_command(prob)
#waveFile = wave.open(WAVE_OUTPUT_FILENAME, 'wb')
#waveFile.setnchannels(CHANNELS)
#waveFile.setsampwidth(audio.get_sample_size(FORMAT))
#waveFile.setframerate(RATE)
#waveFile.writeframes(b''.join(frames))
#waveFile.close()

[[7.0185654e-02 1.2297672e-02 3.9782579e-04 1.4417687e-01 3.8070183e-03
  3.9218567e-02 5.4381136e-03 5.1692829e-02 6.7309779e-03 7.4570730e-02
  3.0323103e-02 7.6123187e-03 4.7142193e-01 6.3199379e-02 1.0789462e-02
  8.1375511e-03]]
-

In [0]:
### record and predict real time 
FORMAT = pyaudio.paInt16 
CHANNELS = 1
RATE = 8000
CHUNK = 4000
#WAVE_OUTPUT_FILENAME = "record.wav"

q = Queue()
seconds_rec_numb = 2    ## recording in 2 second
feed_samples = int(RATE * seconds_rec_numb)

data = np.zeros(feed_samples, dtype='int16')

def audio_callback(in_data, frame_count, time_info, status):
    global q, data
    data0 = np.frombuffer(in_data, dtype='int16')
    
    data = np.append(data, data0)
    if len(data) > feed_samples:
        data = data[-feed_samples:]
        
        q.put(data)
    return (in_data, pyaudio.paContinue)

audio = pyaudio.PyAudio()
stream = audio.open(format=FORMAT, channels=CHANNELS,
                rate=RATE, input=True,
                frames_per_buffer=CHUNK, stream_callback=audio_callback)
stream.start_stream()
try:
    
    while True:
        y = q.get()
        x = get_data(y, RATE)
        prob = model.predict(x)
            
        print_command(prob)
except (KeyboardInterrupt, SystemExit):
    stream.stop_stream()
    stream.close()
    
stream.stop_stream()
stream.close()
audio.terminate()

Background
Bat dieu hoa
Background
Background
Background
---------Background
-Bat tivi
-Doremon
Doremon
Doremon
---Bat den
-Tat tivi
Tat tivi
-Background
--Doremon
----Bat dieu hoa
---Doremon
-Bat tivi
Doremon
Doremon
--Background
-Doremon
Bat tivi
Bat tivi
Bat tivi
Doremon
-Background
-----Background
Background
--Bat tivi
--Doremon
Background
Background
Background
Doremon
Doremon
Background
-Doremon
Bat tivi
--Doremon
--Bat tivi
Background
-Tat tivi
Tat tivi
Tat tivi
--Tat tivi
--Background
---Bat tivi
Bat tivi
Tat tivi
---Bat dieu hoa
---Background
-Bat tivi
Bat dieu hoa
-Doremon
Bat tivi
Bat dieu hoa
Bat dieu hoa
Bat dieu hoa
-Background
Doremon
Doremon
-Background
-Doremon
Background
Background
-Tat tivi
-----Bat tivi
--Background
Background
Bat tivi
Background
-Tat tivi
--Background
Background
Background
Background
Background
-Khoa cong
Background
----Tat tivi
Tat tivi
Tat tivi
Tat tivi
Bat tivi
Bat tivi
Khoa cua
-Bat tivi
Background
Bat tivi
Bat tivi
Bat tivi
Background
Tat tivi


--Doremon
-Doremon
Doremon
-Doremon
-Mo cong
---Mo cong
-Background
----Doremon
Doremon
Background
-Doremon
-Bat tivi
------Doremon
Doremon
-Bat dieu hoa
-Doremon
Khoa cong
-Background
Background
Background
Background
Background
--Background
-Background
---Doremon
--Background
------Bat tivi
Doremon
Bat tivi
-Background
Background
-Background
Background
--Background
Background
Background
Background
--Bat dieu hoa
Bat dieu hoa
Bat dieu hoa
-Background
Background
Background
Background
---Doremon
-Doremon
Background
---Doremon
Doremon
Doremon
-Background
Background
Background
-----Background
Background
------Doremon
--Doremon
Doremon
Doremon
Background
Background
Doremon
Khoa cong
Khoa cong
Khoa cong
Bat tivi
---Bat den
Bat tivi
-Mo cong
--Background
---Background
Background
--Background
Bat dieu hoa
Doremon
Doremon
Doremon
-Background
----Background
Background
-Tat tivi
Background
---Bat tivi
Background
Background
Background
Background
Background
Background
-Background
-Background
Backgr