In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load in 

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the "../input/" directory.
# For example, running this (by clicking run or pressing Shift+Enter) will list the files in the input directory

import os
print(os.listdir("../input"))
import librosa
import random

# Any results you write to the current directory are saved as output.


[]


In [2]:
def roll(data):
    data_roll = np.roll(data, 5000)
    return data_roll
def stretch(data, rate=2):
    input_length = 16000*3
    data = librosa.effects.time_stretch(data, rate)
    if len(data)>input_length:
        data = data[:input_length]
    else:
        data = np.pad(data, (0, max(0, input_length - len(data))), "constant")

    return data

def wnoise(data):
    wn = np.random.randn(len(data))
    data_wn = data + 0.005*wn
    return data_wn

In [3]:
# put dataset in folder named dataset

Cloning into 'dataset'...
remote: Enumerating objects: 1509, done.[K
remote: Total 1509 (delta 0), reused 0 (delta 0), pack-reused 1509[K
Receiving objects: 100% (1509/1509), 105.42 MiB | 24.75 MiB/s, done.
Resolving deltas: 100% (41/41), done.


In [4]:
import os

def get_all_files(datapath, dataset_type="keyword"):
    data_dir = datapath
    data = []
    all_files = os.listdir(data_dir)
    all_files.remove('.DS_Store')
    labels = set()
    for file in all_files:
        filelabels = file.split("-")[:3]
        data_dict = {
            "filepath": data_dir + file,
            "stress": filelabels[2],
            "environment": filelabels[1],
            "keyword":filelabels[0]
        }
        labels.add(data_dict[dataset_type])
        data.append(data_dict)

    return data,labels

In [5]:
import os
import librosa
import numpy as np
from tqdm import tqdm

class AudioFeatureDataset():

    ''' To create audio dataset
        @param dataset_type = ( keyword | stress | environment   )
    '''

    def __init__(self,datapath, samplingrate=16000, dt="keyword"):
        print(dt)
        datafiles, labels = get_all_files(datapath,dataset_type=dt)
        self.datafiles = datafiles
        self.samplingrate = samplingrate
        self.target_labels = list(labels)
        self.dataset_type = dt

    def process(self, file, max_len=16000):
        ''' extracts raw audio  and returns samps '''
        try:
            samps, sr = librosa.load(file, mono=True, sr=None)
            pad_len = max_len - samps.shape[0]
            if pad_len >= 0:
                samps = np.pad(samps, (0, pad_len), 'constant')
            return np.array(samps[:max_len])
        except:
            print(file)

    def get_dataset(self, include_background=False):
        labels = []
        features = []
        for file_data in tqdm(self.datafiles):
            labels.append(file_data[self.dataset_type])
            samps = self.process(file_data["filepath"], self.samplingrate * 3)
            features.append(samps)
            # with roll
            labels.append(file_data[self.dataset_type])
            features.append(roll(samps)) 
            # with strech
            labels.append(file_data[self.dataset_type])
            features.append(stretch(samps)) 
            # white noise
            labels.append(file_data[self.dataset_type])
            features.append(wnoise(samps)) 
        labels = np.array(labels)
        features = np.array(features)
        return features, labels


In [6]:
a = AudioFeatureDataset(datapath="dataset/data/",dt="environment")

environment


In [7]:
features,labels = a.get_dataset()

100%|██████████| 1444/1444 [03:15<00:00,  4.16it/s]


In [8]:
x = np.array(features)
Y = np.array(labels)

In [9]:
from sklearn.utils import class_weight
class_weights = class_weight.compute_class_weight('balanced',np.unique(Y),Y)

In [10]:
x.shape

(5776, 48000)

In [11]:
x = x.reshape(x.shape[0], 1, 16000 * 3)

In [12]:
df = pd.Series(Y)
target_count = df.value_counts()
print(target_count)

safe         4948
dangerous     828
dtype: int64


In [13]:
from keras.utils.np_utils import to_categorical
from sklearn.preprocessing import LabelEncoder


labelencoder = LabelEncoder()
Y = labelencoder.fit_transform(Y)

Using TensorFlow backend.


In [14]:
from sklearn.model_selection import train_test_split

def get_train_test(split_ratio=0.7, random_state=42):
    return train_test_split(x, Y, test_size= (1 - split_ratio), random_state=random_state, shuffle=True)

In [15]:
X_train, X_test, y_train, y_test = get_train_test()


In [16]:
import tensorflow as tf

In [17]:
tensorboard_callback = tf.keras.callbacks.TensorBoard("logs")


In [18]:
# y_train = labelencoder.fit_transform(y_train)
mapping = dict(zip(labelencoder.classes_, range(len(labelencoder.classes_))))
mapping

{'dangerous': 0, 'safe': 1}

In [19]:
y_test = to_categorical(y_test)
y_train = to_categorical(y_train)

In [20]:
!pip install kapre

Collecting kapre
  Downloading https://files.pythonhosted.org/packages/3f/2e/f540d1d1f05c764686163fdb5bb1e5c703f1528076d2829bfc3900683f06/kapre-0.1.4-py3-none-any.whl
Installing collected packages: kapre
Successfully installed kapre-0.1.4


In [21]:
import keras
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPooling2D

In [22]:
import keras
import kapre
from keras.models import Sequential
from keras.layers import Dense,AveragePooling2D
from kapre.time_frequency import Melspectrogram
from kapre.utils import Normalization2D
from kapre.augmentation import AdditiveNoise

# 6 channels (!), maybe 1-sec audio signal, for an example.

sr = 16000
input_shape = (1,sr*3)
model = Sequential()
# A mel-spectrogram layer
model.add(Melspectrogram(n_dft=512, n_hop=512, input_shape=input_shape,
                         padding='same', sr=sr, n_mels=128,
                         fmin=0.0, fmax=sr/2, power_melgram=1.0,
                         return_decibel_melgram=True,trainable_fb=False,
                         trainable_kernel=False,
                         name='trainable_stft'))
# Maybe some additive white noise.
model.add(AdditiveNoise(power=0.1))
# If you wanna normalise it per-frequency
model.add(Normalization2D(str_axis='freq')) # or 'channel', 'time', 'batch', 'data_sample'
# After this, it's just a usual keras workflow. For example..
# Add some layers, e.g., model.add(some convolution layers..)
# Compile the model
model.add(Conv2D(39, kernel_size=(3, 3), activation='relu', input_shape=(20, 35, 1)))
model.add(Conv2D(20, kernel_size=(3, 3), activation='relu',dim_ordering="th"))
model.add(Conv2D(39, kernel_size=(3, 3), activation='relu',dim_ordering="th"))
model.add(Dropout(0.25))
model.add(Conv2D(15, kernel_size=(3, 3), activation='relu',dim_ordering="th"))
model.add(Conv2D(39, kernel_size=(3, 3), activation='relu',dim_ordering="th"))
model.add(Dropout(0.25))
model.add(Conv2D(25, kernel_size=(3, 3), activation='relu',dim_ordering="th"))
model.add(Conv2D(39, kernel_size=(3, 3), activation='relu',dim_ordering="th"))
model.add(Dropout(0.25))
model.add(Conv2D(22, kernel_size=(3, 3), activation='relu',dim_ordering="th"))
model.add(Conv2D(39, kernel_size=(3, 3), activation='relu',dim_ordering="th"))
model.add(Dropout(0.25))
model.add(Conv2D(22, kernel_size=(3, 3), activation='relu',dim_ordering="th"))
model.add(Conv2D(39, kernel_size=(3, 3), activation='relu',dim_ordering="th"))
model.add(Dropout(0.25))
model.add(Conv2D(25, kernel_size=(3, 3), activation='relu',dim_ordering="th"))
model.add(Conv2D(39, kernel_size=(3, 3), activation='relu',dim_ordering="th"))
model.add(Conv2D(45, kernel_size=(3, 3), activation='relu',dim_ordering="th"))
model.add(AveragePooling2D(pool_size=(2, 2)))
model.add(Flatten())
model.add(Dense(2, activation='softmax'))
model.compile(loss=keras.losses.categorical_crossentropy,
                  optimizer=keras.optimizers.adam(),metrics=['accuracy'])
model.summary()



_________________________________________________________________
Layer (type)                 Output Shape              Param #   
trainable_stft (Melspectrogr (None, 128, 94, 1)        296064    
_________________________________________________________________
additive_noise_1 (AdditiveNo (None, 128, 94, 1)        0         
_________________________________________________________________
normalization2d_1 (Normaliza (None, 128, 94, 1)        0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 126, 92, 39)       390       
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 20, 90, 37)        22700     
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 39, 88, 35)        7059      
_________________________________________________________________
dropout_1 (Dropout)          (None, 39, 88, 35)        0         
__________

In [23]:
tensorboard_callback = tf.keras.callbacks.TensorBoard("logs/esna-environment")

In [24]:
model.fit(X_train, y_train, batch_size=50, epochs=50, verbose=1,class_weight=class_weights, validation_data=(X_test, y_test), callbacks=[tensorboard_callback])

Train on 4043 samples, validate on 1733 samples
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<keras.callbacks.History at 0x7fb10e006be0>

In [25]:
model.save("keyword.h5")
print(model.input.op.name)
print(model.output.op.name)

trainable_stft_input
dense_1/Softmax


In [26]:
!rm -rf dataset

In [27]:
!ls

__notebook__.ipynb  __output__.json  keyword.h5  logs


In [28]:
%load_ext tensorboard.notebook
%tensorboard --logdir logs

In [29]:
!rm -rf logs

In [30]:
!ls

__notebook__.ipynb  __output__.json  keyword.h5
