In [1]:
import librosa
from skimage import feature
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy import signal

In [2]:
from scipy.signal import butter, sosfilt, lfilter

def butter_bandpass(lowcut, highcut, fs, order=5):
    nyq = 0.5 * fs
    low = lowcut / nyq
    high = highcut / nyq
    b, a = butter(order, [low, high], btype='band')
    return b, a


def butter_bandpass_filter(data, lowcut, highcut, fs, order=5):
    b, a = butter_bandpass(lowcut, highcut, fs, order=order)
    y = lfilter(b, a, data)
    return y

In [3]:
(sig, rate) = librosa.load('Record4_mono.wav', sr=22050)

FFT parameters

In [1]:
# FFT and Filtering parameters
band_freq_min=2000
band_freq_max=10500
windowlength=512 # also equals to NFFT
N_overlap=400 # Overlap length in STFT 

Load the Keras CNN model

In [5]:
from tensorflow.keras.models import load_model
import h5py

filename='../models/my_model_epochs20.h5'
model = load_model(filename)

In [6]:
t_max=len(sig)/rate

6562.344036281179

Predict Segment results with the model - Segment length 2 seconds

In [7]:
segment_length=2
t_begin=0; t_end=t_begin+segment_length

In [8]:
result=np.zeros((0,2))
time_axis=np.zeros((0,0))

In [9]:
while t_end<t_max:
    index_begin=t_begin*rate; index_end=t_end*rate;
    segment=sig[index_begin:index_end]
    filtered=butter_bandpass_filter(segment, band_freq_min, band_freq_max, fs=rate, order=5)
    f, t, Syy = signal.stft(filtered, fs=rate,window='hann',noverlap=N_overlap,nfft=windowlength,nperseg=windowlength)
    Syy=abs(Syy) * 33.0 #Mic sensitivity conversion
    Syy=20*np.log10(Syy/0.00002)
    S=Syy.reshape(1,257,395,1)
    
    segment_result=model.predict(S)
    result=np.append(result,segment_result,axis=0)
    
    #time_values=np.linspace(t_begin,t_end-delta_T,num=segment_result.shape[1])
    #time_axis=np.append(time_axis,time_values)
    
    t_begin+=segment_length
    t_end+=segment_length

In [10]:
result

array([[1.84203500e-10, 1.00000000e+00],
       [3.65592210e-29, 1.00000000e+00],
       [4.57639512e-34, 1.00000000e+00],
       ...,
       [8.00600827e-01, 1.99399143e-01],
       [7.23817348e-01, 2.76182622e-01],
       [6.89712286e-01, 3.10287684e-01]])