In [1]:
import os
import gc
import psutil

from os.path import isdir, join
from pathlib import Path
import pandas as pd

import numpy as np
# from scipy.fftpack import fft
from scipy import signal
from scipy.io import wavfile
# import librosa
from sklearn.decomposition import PCA

from IPython.display import display
proc = psutil.Process(os.getpid())

print('Finish Import')

Finish Import


In [None]:
def log_specgram(audio, sample_rate, window_size=20,
                 step_size=10, eps=1e-10):
    nperseg = int(round(window_size * sample_rate / 1e3))
    noverlap = int(round(step_size * sample_rate / 1e3))
    freqs, times, spec = signal.spectrogram(audio,
                                    fs=sample_rate,
                                    window='hann',
                                    nperseg=nperseg,
                                    noverlap=noverlap,
                                    detrend=False)
    return freqs, times, np.log(spec.T.astype(np.float32) + eps)

func = lambda x2, x1: 100.0 * (x2 - x1) / x1

def get_data(path, nsamples=16000):
    '''Get data from the path and create a pandas dataframe to store it'''
    arr = []
    valid_label = ['yes', 'no', 'up', 'down', 'left', 'right', 'on', 'off', 'stop', 'go']
    train_path = path
    dir_names = next(os.walk(train_path))[1]
        
    for label in dir_names:
        file_names = next(os.walk(train_path + label))[2]
        
        # Valid label name
        data_size = 600
        if label == '_background_noise_':
            label_name = 'silence'
        elif label not in valid_label:
            label_name = 'unknown'
        else:
            label_name = label
            data_size = 1000
                
        for name in file_names[:data_size]:
            path = train_path + label + '/' + name
            try:
                sample_rate, samples = wavfile.read(path)
                
                # Scale the length of samples to 16000
                if len(samples) < 16000:
                    samples = np.pad(samples, (nsamples - sample_len, 0), mode='constant')
                else:
                    samples = samples[0:nsamples]
                    
                # Get spectrogram
                _, _, spectrogram = log_specgram(samples, sample_rate)
                spectrogram = np.reshape(spectrogram, spectrogram.shape + (1, ))
                arr.append((path, label_name, sample_rate, samples, spectrogram))
                del spectrogram, samples, sample_rate
            except:
                pass
#                 print(path)
            del path
        print('Finish ', label,)
    print('Finish appending array')
    return pd.DataFrame(arr, columns=['file_name', 'label', 'sample_rate', 'samples', 'spectrogram'])
df = get_data('./train/audio/')

# df.head(5)

In [None]:
display(df.describe())
display(df.info())
# display(df.head(20))
# print(df.isnull().sum())
gc.collect()
# df.to_csv('./resource/train.csv')

In [2]:
from time import time
import gc
import tensorflow as tf
from random import randint
from tensorflow import keras
from tensorflow.keras import activations, models
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Conv2D, BatchNormalization, MaxPool2D, Dense, Input, Dropout, Flatten
from tensorflow.python.keras.callbacks import TensorBoard, ReduceLROnPlateau
print('Finish import model library')

Finish import model library


In [None]:
def get_model(shape):
    '''Create a keras functional model'''
    
    inputlayer = Input(shape=shape)
    nclass = 12
    
    norm_input = BatchNormalization()(inputlayer)
    model = Conv2D(16, kernel_size=2, padding='same', activation=activations.relu)(norm_input)
    model = Conv2D(16, kernel_size=2, padding='same', activation=activations.relu)(model)
    model = MaxPool2D(pool_size=(2, 2))(model)
    model = Dropout(rate=0.2)(model)
    model = Conv2D(32, kernel_size=3, padding='same', activation=activations.relu)(model)
    model = Conv2D(32, kernel_size=3, padding='same', activation=activations.relu)(model)
    model = MaxPool2D(pool_size=(2, 2))(model)
    model = Dropout(rate=0.2)(model)
    model = Conv2D(64, kernel_size=3, padding='same', activation=activations.relu)(model)
    model = MaxPool2D(pool_size=(2, 2))(model)
    model = Dropout(rate=0.2)(model)
    model = Flatten()(model) 

    dense_1 = BatchNormalization()(Dense(128, activation=activations.relu)(model))
    dense_1 = BatchNormalization()(Dense(128, activation=activations.relu)(dense_1))
    dense_1 = Dense(nclass, activation=activations.softmax)(dense_1)

    model = models.Model(inputs=inputlayer, outputs=dense_1)
    model.compile(optimizer='adam', loss=tf.losses.binary_crossentropy, metrics=['accuracy'])
    
    return model

In [None]:
shape = (99, 161, 1)
model = get_model(shape)
# model.summary()

In [None]:
train_set = df.sample(frac=0.8, replace=False, random_state=42)
eval_set = df.loc[set(df.index) - set(train_set.index)]

y_train = np.array(train_set.label)
y_train = pd.get_dummies(y_train, dtype=bool)
x_train = np.array([spec for spec in train_set.spectrogram])

y_eval = np.array(eval_set.label)
y_eval = pd.get_dummies(y_eval, dtype=bool)
x_eval = np.array([spec for spec in eval_set.spectrogram])

print(x_train.dtype, x_eval.shape)
gc.collect()

In [None]:
def batch_generator(x, y, batch_size=16):
    # Return a random image from X, y
    ylen = len(y)
    loopcount = ylen // batch_size
    while True:
        i = randint(0,loopcount)
        yield x[i * batch_size:(i + 1) * batch_size], y[i * batch_size:(i + 1) * batch_size]

# print(len(x_train))
batch_size = 10
epochs = 12
path = './tensorboard/keras_' + str(time())
history = model.fit_generator(
    generator=batch_generator(x_train, y_train, batch_size),
    validation_data=batch_generator(x_eval, y_eval, batch_size),
    epochs=epochs,
    steps_per_epoch=y_train.shape[0] // batch_size,
    validation_steps=y_eval.shape[0] // batch_size,
    callbacks=[TensorBoard(log_dir=path)],
    verbose=1,
)

model.save('./model/model_' + str(time()) + '.h5')

In [None]:
path = './model/model_' + str(time()) + '.h5'
model.save(path)

In [5]:
from tensorflow.keras.models import load_model

model1 = load_model('./model/model_1553894437.5339348.h5')

In [14]:
def get_predict_data(path):
    '''Get prediction data from the path and create a pandas dataframe to store it'''
    arr = []
    train_path = path
    file_names = next(os.walk(train_path))[1]
    print(file_names)
    for name in file_names:
        path = train_path + name
        try:
            sample_rate, samples = wavfile.read(path)

            # Scale the length of samples to 16000
            if len(samples) < 16000:
                samples = np.pad(samples, (nsamples - sample_len, 0), mode='constant')
            else:
                samples = samples[0:nsamples]

            # Get spectrogram
            _, _, spectrogram = log_specgram(samples, sample_rate)
            spectrogram = np.reshape(spectrogram, spectrogram.shape + (1, ))
            arr.append((path, label_name, sample_rate, samples, spectrogram))
            del spectrogram, samples, sample_rate
        except:
            pass
#                 print(path)
        del path
    print('Finish appending array')
    return pd.DataFrame(arr, columns=['file_name', 'label', 'sample_rate', 'samples', 'spectrogram'])
df = get_predict_data('./test/audio/')

[]
Finish appending array


In [17]:
print(os.listdir("./test/audio"))

IOPub data rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_data_rate_limit`.

Current values:
NotebookApp.iopub_data_rate_limit=1000000.0 (bytes/sec)
NotebookApp.rate_limit_window=3.0 (secs)

