In [35]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from tqdm import tqdm
from scipy.io import wavfile
from cfg import Config
from sklearn.utils.class_weight import compute_class_weight
from python_speech_features import mfcc
from keras.layers import Conv2D, MaxPool2D, Flatten, LSTM
from keras.layers import Dropout, Dense, TimeDistributed
from keras.models import Sequential
from keras.utils import to_categorical
import pickle
from keras.callbacks import ModelCheckpoint

In [36]:
def build_rand_feat():
    X = []
    y = []

    _min, _max = float('inf'), -float('inf')

    for _ in tqdm(range(n_samples)):
        rand_class = np.random.choice(class_dist.index, p=prob_dist)
        f = np.random.choice(df[df['class'] == rand_class].index)
        rate, signal = wavfile.read(df.iloc[f].c_path)
        label = df.at[f, 'class']
        rand_index = np.random.randint(0, signal.shape[0] - config.step)
        sample = signal[rand_index:rand_index+config.step]
        X_sample = mfcc(sample, rate, numcep=config.nfeat, nfilt=config.nfilt, nfft=config.nfft)

        _min = min(np.amin(X_sample), _min)
        _max = max(np.amax(X_sample), _max)

        X.append(X_sample)
        y.append(classes.index(label))

    config.min = _min
    config.max = _max

    X, y = np.array(X), np.array(y)
    X = (X - _min) / (_max - _min)

    if config.mode == 'conv':
        X = X.reshape(X.shape[0], X.shape[1], X.shape[2], 1)
    elif config.mode == 'time':
        X = X.reshape(X.shape[0], X.shape[1], X.shape[2])
    
    y = to_categorical(y, num_classes=10)
    config.data = (X, y)

    with open(config.p_path, 'wb') as handle:
        pickle.dump(config, handle, protocol=pickle.HIGHEST_PROTOCOL)

    return X, y

In [37]:
def get_conv_model():
    model = Sequential()
    model.add(Conv2D(16, (3, 3), activation='relu', strides=(1, 1), padding='same', input_shape=input_shape))
    model.add(Conv2D(32, (3, 3), activation='relu', strides=(1, 1), padding='same', input_shape=input_shape))
    model.add(Conv2D(64, (3, 3), activation='relu', strides=(1, 1), padding='same', input_shape=input_shape))
    model.add(Conv2D(128, (3, 3), activation='relu', strides=(1, 1), padding='same', input_shape=input_shape))
    model.add(MaxPool2D((2,2)))
    model.add(Dropout(0.5))
    model.add(Flatten())
    model.add(Dense(128, activation='relu'))
    model.add(Dense(64, activation='relu'))
    model.add(Dense(10, activation='softmax'))
    model.summary()
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['acc'])

    return model

In [38]:
def get_recurrent_model():
    # Shape of data for RNN is (n, time, feat)
    model = Sequential()
    model.add(LSTM(128, return_sequences=True, input_shape=input_shape))
    model.add(LSTM(128, return_sequences=True))
    model.add(Dropout(0.5))
    model.add(TimeDistributed(Dense(64, activation='relu')))
    model.add(TimeDistributed(Dense(32, activation='relu')))
    model.add(TimeDistributed(Dense(16, activation='relu')))
    model.add(TimeDistributed(Dense(8, activation='relu')))
    model.add(Flatten())
    model.add(Dense(10, activation='softmax'))
    model.summary()
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['acc'])

    return model

In [39]:
df = pd.read_csv('./sounds.csv')
classes = list(np.unique(df['class']))
class_dist = df.groupby(['class'])['length'].mean()
n_samples = 2 * int(df.length.sum()/0.1)
prob_dist = class_dist / class_dist.sum()

In [31]:
config = Config(mode='time')

if config.mode == 'conv':
    X, y = build_rand_feat()
    y_flat = np.argmax(y, axis=1)
    input_shape = (X.shape[1], X.shape[2], 1)
    model = get_conv_model()

elif config.mode == 'time':
    X, y = build_rand_feat()
    y_flat = np.argmax(y, axis=1)
    input_shape = (X.shape[1], X.shape[2])
    model = get_recurrent_model()

100%|██████████| 51230/51230 [00:38<00:00, 1336.12it/s]


Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm (LSTM)                 (None, 9, 128)            72704     
                                                                 
 lstm_1 (LSTM)               (None, 9, 128)            131584    
                                                                 
 dropout_1 (Dropout)         (None, 9, 128)            0         
                                                                 
 time_distributed (TimeDist  (None, 9, 64)             8256      
 ributed)                                                        
                                                                 
 time_distributed_1 (TimeDi  (None, 9, 32)             2080      
 stributed)                                                      
                                                                 
 time_distributed_2 (TimeDi  (None, 9, 16)            

In [32]:
class_weight = compute_class_weight('balanced', classes=np.unique(y_flat), y=y_flat)
class_weight = dict(zip(np.unique(y_flat), class_weight))

In [33]:
checkpoint = ModelCheckpoint(config.model_path, monitor='val_acc', verbose=1, mode='max', save_best_only=True, save_weights_only=False, period=1)





In [34]:
model.fit(X, y, epochs=10, batch_size=32, shuffle=True, class_weight=class_weight, validation_split=0.1, callbacks=[checkpoint])
model.save(config.model_path)

Epoch 1/10
Epoch 1: val_acc improved from -inf to 0.90904, saving model to models/time.model
INFO:tensorflow:Assets written to: models/time.model/assets


INFO:tensorflow:Assets written to: models/time.model/assets


Epoch 2/10
Epoch 2: val_acc improved from 0.90904 to 0.94593, saving model to models/time.model
INFO:tensorflow:Assets written to: models/time.model/assets


INFO:tensorflow:Assets written to: models/time.model/assets


Epoch 3/10
Epoch 3: val_acc improved from 0.94593 to 0.98126, saving model to models/time.model
INFO:tensorflow:Assets written to: models/time.model/assets


INFO:tensorflow:Assets written to: models/time.model/assets


Epoch 4/10
Epoch 4: val_acc did not improve from 0.98126
Epoch 5/10
Epoch 5: val_acc did not improve from 0.98126
Epoch 6/10
Epoch 6: val_acc did not improve from 0.98126
Epoch 7/10
Epoch 7: val_acc did not improve from 0.98126
Epoch 8/10
Epoch 8: val_acc improved from 0.98126 to 0.98516, saving model to models/time.model
INFO:tensorflow:Assets written to: models/time.model/assets


INFO:tensorflow:Assets written to: models/time.model/assets


Epoch 9/10
Epoch 9: val_acc did not improve from 0.98516
Epoch 10/10
Epoch 10: val_acc did not improve from 0.98516
INFO:tensorflow:Assets written to: models/time.model/assets


INFO:tensorflow:Assets written to: models/time.model/assets
