In [None]:
##safety executable check (for conda env)
import sys
sys.executable

#### Working with Environment Sound Classification Dataset (https://github.com/karoldvl/ESC-50)
Testing feature extraction and several classifiers with the ECS-10 data set

#### Feature extraction

In [3]:
# coding= UTF-8
import glob
import os
import librosa
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.pyplot import specgram
import soundfile as sf

##Return audio features 
def feature_extraction(file_name):
    X, sample_rate = librosa.load(file_name)
    if X.ndim > 1:
        X = X[:,0]
    X = X.T
    
    # Get features   
    stft = np.abs(librosa.stft(X))
    mfccs = np.mean(librosa.feature.mfcc(y=X, sr=sample_rate, n_mfcc=40).T, axis=0) #40 values
    #zcr = np.mean(librosa.feature.zero_crossing_rate)
    chroma = np.mean(librosa.feature.chroma_stft(S=stft, sr=sample_rate).T, axis=0)
    mel = np.mean(librosa.feature.melspectrogram(X, sr=sample_rate).T, axis=0)
    contrast = np.mean(librosa.feature.spectral_contrast(S=stft, sr=sample_rate).T, axis=0)
    tonnetz = np.mean(librosa.feature.tonnetz(y=librosa.effects.harmonic(X), sr=sample_rate).T, axis=0) #tonal centroid features
    
    ##Return computed features
    return mfccs, chroma, mel, contrast, tonnetz
    
# Process audio files: Return arrays with features and labels
def parse_audio_files(parent_dir, sub_dirs, file_ext='*.ogg'): ## .ogg audio format
    features, labels = np.empty((0,193)), np.empty(0) # 193 features total. This can vary
    
    for label, sub_dir in enumerate(sub_dirs): ##The enumerate() function adds a counter to an iterable.
        for file_name in glob.glob(os.path.join(parent_dir, sub_dir, file_ext)): ##parent is data, sub_dirs are the classes
            try:
                mfccs, chroma, mel, contrast, tonnetz = feature_extraction(file_name)
            except Exception as e:
                print("[Error] there was an error in feature extraction. %s" % (e))
                continue
                
            extracted_features = np.hstack([mfccs,chroma, mel, contrast, tonnetz]) #Stack arrays in sequence horizontally (column wise)
            features = np.vstack([features, extracted_features]) #Stack arrays in sequence vertically (row wise).
            labels = np.append(labels, label)
        print("Extracted features from %s, done" % (sub_dir))
    return np.array(features), np.array(labels, dtype = np.int) ## arrays with features and corresponding labels for each audio

def one_hot_encode(labels): ##Check this hot encode
    n_labels = len(labels)
    n_unique_labels = len(np.unique(labels))
    one_hot_encode = np.zeros((n_labels, n_unique_labels))
    one_hot_encode[np.arange(n_labels), labels] = 1
    return one_hot_encode

# Read sub-directories (audio classes)
audio_directories = os.listdir("audio-data/")
audio_directories.sort()
print('Audio Classes: ', audio_directories)

('Audio Classes: ', ['001 - Dog bark', '002 - Rain', '003 - Sea waves', '004 - Baby cry', '005 - Clock tick', '006 - Person sneeze', '007 - Helicopter', '008 - Chainsaw', '009 - Rooster', '010 - Fire crackling'])


In [7]:
##Get labels and features
features, labels = parse_audio_files('audio-data', audio_directories) #(parent dir,sub dirs)
np.save('feat.npy', features) ##NumPy array file created. Files are binary files to store numpy arrays
np.save('label.npy', labels)

Extracted features from 001 - Dog bark, done
Extracted features from 002 - Rain, done
Extracted features from 003 - Sea waves, done
Extracted features from 004 - Baby cry, done
Extracted features from 005 - Clock tick, done




Extracted features from 006 - Person sneeze, done
Extracted features from 007 - Helicopter, done
Extracted features from 008 - Chainsaw, done
Extracted features from 009 - Rooster, done
Extracted features from 010 - Fire crackling, done


In [8]:
# Label integer encoding 
labels = np.load('label.npy') # 10 labels total
#print(labels)

# For future label de-encoding
label_classes = np.array(['Dog bark','Rain','Sea waves','Baby cry','Clock tick','Person sneeze','Helicopter','Chainsaw','Rooster',
                          'Fire crackling'])
print(label_classes)

['Dog bark' 'Rain' 'Sea waves' 'Baby cry' 'Clock tick' 'Person sneeze'
 'Helicopter' 'Chainsaw' 'Rooster' 'Fire crackling']


In [9]:
features= np.load('feat.npy')
print(len(features)) # 400 features total
print(features)

400
[[-3.12936138e+02  1.53756654e+02 -8.42777452e+01 ...  5.63282588e-02
   2.08516431e-02 -1.87674272e-02]
 [-3.20112853e+02  1.42200505e+02 -4.77227058e+01 ...  7.24356147e-02
   1.58665382e-02 -2.03768118e-03]
 [-5.69369487e+02  3.15002805e+01  9.34787663e-01 ... -9.74065657e-03
  -2.01712929e-03  2.35111669e-02]
 ...
 [-2.86730051e+02  6.26902425e+01  1.85678427e+01 ...  9.72864029e-03
   8.37429690e-03  1.61102826e-02]
 [-3.26840023e+02  9.06312882e+01  7.19092342e+01 ...  5.48059287e-02
   1.16528212e-02  9.53665151e-03]
 [-2.96461065e+02  7.54222837e+01  1.30356668e+01 ...  9.11465921e-02
   2.24434430e-02  1.89593115e-02]]


##### Data visualization

In [17]:
#Pandas dataframe with 193 features variables for each audio
df = pd.DataFrame(features)

# Add a new column for class (label), this is our target
df['Audio class'] = pd.Categorical.from_codes(labels, label_classes)

df[[0,1,2,3,4,5,6,7,8,9,10,11,'Audio class']]

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,Audio class
0,-359.907670,176.984312,-28.405258,-77.346419,-23.394936,-8.336801,-27.351105,-21.991833,-0.599230,3.381202,-1.022481,10.735957,Dog bark
1,-364.606377,160.027782,-10.176146,-49.205103,-47.292736,-24.890667,-23.486948,-17.898796,-8.003186,-14.999630,-10.643615,3.901059,Dog bark
2,-569.158646,30.929713,5.359668,0.996636,1.891372,2.343744,-1.432037,0.311365,2.254691,1.684495,1.046472,3.674637,Dog bark
3,-419.991445,158.331816,-5.460790,-2.632195,10.701890,13.474268,9.900013,7.775121,7.893351,6.628814,7.100705,9.033844,Dog bark
4,-331.014025,124.460048,-20.819073,-29.484597,-36.932396,-7.367482,-16.957456,-7.053254,-2.576714,-8.870600,-0.729187,7.784047,Dog bark
5,-393.339552,78.339811,4.843938,-7.676779,-12.608208,-4.106547,-6.196130,-4.184623,-0.346573,2.493216,-0.460569,-3.794559,Dog bark
6,-387.847069,112.019940,11.978814,-11.534052,-15.415533,0.335486,-1.905306,-9.614974,-2.887099,-1.144487,-1.357641,7.187376,Dog bark
7,-430.738218,94.625625,11.179736,-7.445123,-2.928210,-3.732962,-8.826805,0.202373,-0.317225,-6.483365,-3.919070,-3.755173,Dog bark
8,-466.842492,57.002892,-22.152894,-25.998953,-18.681116,-5.563477,-12.930120,-5.421485,0.002677,-1.839884,-1.536207,-2.283316,Dog bark
9,-366.304506,120.885096,12.036169,-3.433854,-24.169440,-13.973501,-6.264325,-3.052905,-12.012035,-6.378392,-2.972299,-8.906094,Dog bark


#### SVM Classification 

In [91]:
# coding= UTF-8
import numpy as np
import sklearn
from sklearn.svm import SVC, LinearSVC
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import classification_report
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler

from sklearn.model_selection import RandomizedSearchCV
from scipy.stats import reciprocal, uniform

#Load data from generated numpy files
X = np.load('feat.npy') # list of features
y = np.load('label.npy').ravel() # labels are the target

# Split into train and test sets (400 Audios total)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=0)

# Data scaling (NOT IMPLEMENTING)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train.astype(np.float32))
X_test_scaled = scaler.transform(X_test.astype(np.float32))

# Implement simple linear SVM
svm_clf = SVC(C=28.0, gamma = 0.00001, decision_function_shape="ovr") #These parameters can be modified

# Fit model
svm_clf.fit(X_train, y_train) #From Beif github
#svm_clf.fit(X_train_scaled, y_train) # HandsOn book

# Make predictions
#y_pred = svm_clf.predict(X_train_scaled)
y_predict = svm_clf.predict(X_test)

#print('Prediction')
#print(y_predict)
#print
#print("Actual")
#print(y_test)

# Accuracy
acc = svm_clf.score(X_test, y_test)
print
print("accuracy=%0.3f" %acc)

Prediction
[3 2 0 4 6 1 3 6 3 5 5 3 2 8 0 1 7 3 5 4 4 6 3 5 4 1 1 0 2 2 4 2 8 5 1 6 2
 4 4 9 2 0 0 0 7 1 9 7 3 5 1 3 7 4 7 2 1 5 8 1 1 7 0 8 4 6 8 0 4 7 0 6 1 0
 2 0 9 7 9 8 8 1 1 1 7 5 3 9 9 8 9 6 1 8 4 3 3 4 7 9 6 1 7 3 1 8 2 8 3 7 2
 1 7 0 9 4 6 1 5 8 2 5 5 5 9 3 0 6 8 4 3 9 1 6 5 2 2 0 8 0 4 4 5 2 6 2 8 6
 5 2 8 3 6 2 5 3 4 3 9 7]

Actual
[3 7 8 4 6 1 3 6 3 5 0 3 2 8 0 1 7 3 5 4 4 6 3 8 4 7 1 0 7 2 4 7 8 5 1 1 2
 4 4 9 2 8 0 0 7 1 9 6 3 5 6 3 7 4 7 7 1 0 0 1 1 6 0 8 4 9 3 0 3 7 0 6 1 0
 2 0 9 7 9 8 8 1 1 2 7 5 3 9 9 8 9 6 1 0 4 3 3 9 6 9 6 1 7 3 7 8 2 8 3 7 2
 2 7 0 9 5 6 1 5 8 2 5 5 5 9 3 0 6 8 4 3 9 1 6 5 2 7 4 8 0 4 4 0 2 6 2 8 6
 5 2 8 3 7 2 5 3 6 3 4 7]

[ 0  5  8  0  0  0  0  0  0  0 -5  0  0  0  0  0  0  0  0  0  0  0  0  3
  0  6  0  0  5  0  0  5  0  0  0 -5  0  0  0  0  0  8  0  0  0  0  0 -1
  0  0  5  0  0  0  0  5  0 -5 -8  0  0 -1  0  0  0  3 -5  0 -1  0  0  0
  0  0  0  0  0  0  0  0  0  0  0  1  0  0  0  0  0  0  0  0  0 -8  0  0
  0  5 -1  0  0  0  0  0  6  0  0  0

#### Accuracy results (Reporting Beif value)

- Without data scaling: 0.817 (C = 28)
- With data scaling: 0.375

#### CNN Classification (Keras Tensorflow)

In [8]:
# coding= UTF-8
import numpy as np
import keras 
from keras.models import Sequential 
from keras.layers import Dense, Dropout, Activation
from keras.layers import Embedding
from keras.layers import Conv1D, GlobalAveragePooling1D, MaxPooling1D
from keras.optimizers import SGD
from sklearn.model_selection import train_test_split

# Load data 
X = np.load("feat.npy")
y = np.load('label.npy').ravel()

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.4, random_state = 233)

# Neural Network Construction
model = Sequential()

# Architecture
model.add(Conv1D(64, 3, activation='relu', input_shape = (193, 1)))
model.add(Conv1D(64, 3, activation='relu'))
model.add(MaxPooling1D(3))
model.add(Conv1D(128, 3, activation='relu'))
model.add(Conv1D(128, 3, activation='relu'))
model.add(GlobalAveragePooling1D())
model.add(Dropout(0.5))
model.add(Dense(10, activation='softmax'))

# Which is the best loss function for binary (multiple) classification
model.compile(loss='categorical_crossentropy',
              optimizer='rmsprop',
              metrics=['accuracy'])

# Convert label to onehot
y_train = keras.utils.to_categorical(y_train - 1, num_classes=10) # Converts a class vector (integers) to binary class matrix
y_test = keras.utils.to_categorical(y_test - 1, num_classes=10)

X_train = np.expand_dims(X_train, axis=2) # Make 2-dim into 3-dim array to fit model
X_test = np.expand_dims(X_test, axis=2)

# Train Network
model.fit(X_train, y_train, batch_size=64, epochs=100)

# Compute accuracy with test data
score, acc = model.evaluate(X_test, y_test, batch_size=16) # Computes the loss & accuracy based on the input you pass it

print('Test score:', score) #loss
print('Test accuracy:', acc)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

In [2]:
print(X_train.shape)
print(X_test.shape)
print(y_train.shape)
print(y_test.shape)

(240, 193, 1)
(160, 193, 1)
(240, 10)
(160, 10)


- Common Keras NN workflow: Sequential, Add,Compile, Fit
- Accuracy with CNN: 0.78125 (1000 epochs)
- Accuracy: 0.7125 (100 epochs)

#### Multilayer Perceptron Classification (Keras Tensorflow)

In [1]:
# coding= UTF-8
import numpy as np
import keras
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation
from keras.optimizers import SGD
from sklearn.model_selection import train_test_split

# Prepare the data
X =  np.load('feat.npy')
y =  np.load('label.npy').ravel() #Return a contiguous flattened array.

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=0)

# Build the Neural Network
model = Sequential()
model.add(Dense(512, activation='relu', input_dim=193)) ## Dense method for MLP
model.add(Dropout(0.5))
model.add(Dense(512, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(10, activation='softmax'))

model.compile(loss='categorical_crossentropy',
              optimizer='rmsprop',
              metrics=['accuracy'])

# Convert label to onehot
y_train = keras.utils.to_categorical(y_train-1, num_classes=10) # Convert class vector into binary Matrix
y_test = keras.utils.to_categorical(y_test-1, num_classes=10)

# Train and test
model.fit(X_train, y_train, epochs=100, batch_size=64) # Epochs are tunable
score, acc = model.evaluate(X_test, y_test, batch_size=32)
print('Test score:', score)
print('Test accuracy:', acc)

  from ._conv import register_converters as _register_converters
Using Theano backend.


Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

- Accuracy with MLP: 0.80625 (250 epochs)
- Accuracy: 0.63125 (100 epochs)
- MLP is the faster of the 3 neural networks
- A classification predictor can be visualized by drawing the boundary line.

#### Naive Bayes (NB) Classification (Sklearn)

In [3]:
# coding= UTF-8
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score
import pandas as pd 

#Load data 
X = np.load('feat.npy') 
y = np.load('label.npy').ravel() 

#Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)

# Initialize classifier
gnb_clf= GaussianNB() #check input params

# Train model
gnb_clf.fit(X_train, y_train)
#model = gnb_clf.fit(X_train, y_train)

# Make predictions
prediction = gnb_clf.predict(X_test)

#print('Predicted values')
#print(prediction)
#print
#print('Actual values')
#print(y_test)
#print

# Evaluate accuracy
#Similar ways to do it
#print(accuracy_score(y_test,prediction)) 
print
acc = gnb_clf.score(X_test, y_test) 
print("Accuracy = %0.3f" %acc)

Predicted values
[6 2 0 9 5 9 8 2 7 3 0 4 1 1 7 1 6 4 8 9 1 0 9 5 8 4 2 5 1 2 9 4 6 2 5 2 2
 8 0 3 4 1 6 3 2 0 3 4 0 4 9 6 8 1 4 0 5 4 8 3 3 6 7 7 2 0 8 6 9 1 8 3 0 1
 9 6 5 9 4 7 0 3 0 6 1 1 6 7 2 2 5 8 0 5 4 6 9 9 5 1 6 2 7 1 3 0 0 3 8 8 0
 4 7 9 7 1 9 7 7 1 0 2 0 8 0 0 8 8 4 6 6 3]

Actual values
[5 7 0 5 2 2 8 2 6 3 0 9 1 1 3 1 6 9 5 9 1 0 9 6 0 9 2 5 6 2 9 4 6 1 5 2 2
 8 3 3 9 1 1 0 2 9 3 4 0 4 1 8 8 1 9 0 5 4 0 3 3 7 6 1 2 0 8 6 9 1 8 3 0 1
 9 6 5 9 4 7 0 0 5 6 6 1 6 7 2 2 5 8 2 5 4 7 9 9 5 1 6 2 0 1 3 0 0 3 8 8 3
 9 7 9 7 1 1 7 7 7 3 2 0 8 0 4 8 8 4 6 7 3]

[ 1 -5  0  4  3  7  0  0  1  0  0 -5  0  0  4  0  0 -5  3  0  0  0  0 -1
  8 -5  0  0 -5  0  0  0  0  1  0  0  0  0 -3  0 -5  0  5  3  0 -9  0  0
  0  0  8 -2  0  0 -5  0  0  0  8  0  0 -1  1  6  0  0  0  0  0  0  0  0
  0  0  0  0  0  0  0  0  0  3 -5  0 -5  0  0  0  0  0  0  0 -2  0  0 -1
  0  0  0  0  0  0  7  0  0  0  0  0  0  0 -3 -5  0  0  0  0  8  0  0 -6
 -3  0  0  0  0 -4  0  0  0  0 -1  0]

Accuracy = 0.697


- Accuracy with NB: 0.697 (33% Test data)

#### Random Forest Classification (Sklearn)

In [4]:
# coding= UTF-8
from sklearn.ensemble import RandomForestClassifier #Random Forest classifier
import pandas as pd 
import numpy as np
np.random.seed(0)

In [5]:
#Load data 
X = np.load('feat.npy') 
y = np.load('label.npy').ravel() 

#Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.30, random_state=42)

# Initialize classifier
rf_clf = RandomForestClassifier(n_jobs=2, random_state=0) #Check params

# Train model
rf_clf.fit(X_train, y_train)

# Make predictions
y_prediction = rf_clf.predict(X_test)

#print('Predicted values')
#print(y_prediction)
#print
#print('Actual values')
#print(y_test)
#print

# Evaluate accuracy
print
acc = rf_clf.score(X_test, y_test) 
print("Accuracy = %0.3f" %acc)

Predicted values
[5 7 0 9 7 1 8 2 6 3 4 9 1 1 7 1 6 9 3 9 2 0 9 0 0 9 2 5 6 2 9 4 6 1 5 2 2
 8 3 3 9 1 6 0 2 9 3 4 3 4 1 6 8 1 9 0 5 4 0 3 3 7 7 2 2 0 8 6 9 1 0 3 0 1
 9 6 5 9 4 7 0 4 0 6 1 1 6 7 2 2 5 4 2 5 4 2 4 9 1 1 6 2 7 1 3 0 0 3 3 8 3
 9 7 9 7 1 1 7 7 2]

Actual values
[5 7 0 5 2 2 8 2 6 3 0 9 1 1 3 1 6 9 5 9 1 0 9 6 0 9 2 5 6 2 9 4 6 1 5 2 2
 8 3 3 9 1 1 0 2 9 3 4 0 4 1 8 8 1 9 0 5 4 0 3 3 7 6 1 2 0 8 6 9 1 8 3 0 1
 9 6 5 9 4 7 0 0 5 6 6 1 6 7 2 2 5 8 2 5 4 7 9 9 5 1 6 2 0 1 3 0 0 3 8 8 3
 9 7 9 7 1 1 7 7 7]

[ 0  0  0  4  5 -1  0  0  0  0  4  0  0  0  4  0  0  0 -2  0  1  0  0 -6
  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  5  0  0  0  0  0
  3  0  0 -2  0  0  0  0  0  0  0  0  0  0  1  1  0  0  0  0  0  0 -8  0
  0  0  0  0  0  0  0  0  0  4 -5  0 -5  0  0  0  0  0  0 -4  0  0  0 -5
 -5  0 -4  0  0  0  7  0  0  0  0  0 -5  0  0  0  0  0  0  0  0  0  0 -5]

Accuracy = 0.800


- Accuracy with Random Forest: 0.800 (30% test data)

In [6]:
# View the predicted probabilities of the first n observations
rf_clf.predict_proba(X_test)[0:10]

array([[0. , 0. , 0.1, 0. , 0. , 0.8, 0.1, 0. , 0. , 0. ],
       [0. , 0. , 0.2, 0. , 0. , 0. , 0. , 0.8, 0. , 0. ],
       [0.7, 0. , 0. , 0.1, 0.1, 0. , 0. , 0. , 0.1, 0. ],
       [0. , 0.1, 0. , 0.1, 0.3, 0.1, 0. , 0. , 0. , 0.4],
       [0. , 0.1, 0.2, 0. , 0. , 0.2, 0.2, 0.3, 0. , 0. ],
       [0. , 0.3, 0.3, 0. , 0. , 0. , 0.3, 0.1, 0. , 0. ],
       [0. , 0. , 0. , 0.1, 0.1, 0. , 0. , 0. , 0.8, 0. ],
       [0. , 0.1, 0.8, 0. , 0. , 0.1, 0. , 0. , 0. , 0. ],
       [0. , 0.1, 0.1, 0. , 0. , 0.1, 0.4, 0.2, 0. , 0.1],
       [0. , 0. , 0. , 1. , 0. , 0. , 0. , 0. , 0. , 0. ]])

In [9]:
# Dencoding predicted and actual classes (numeric to written)
prediction_decoded = label_classes[y_prediction]
actual_value_decoded = label_classes[y_test]
#print(y_prediction)
#print(y_test)
print('Prediction decoded')
print(prediction_decoded)
print
print('Actual class decoded')
print(actual_value_decoded)

Prediction decoded
['Person sneeze' 'Chainsaw' 'Dog bark' 'Fire crackling' 'Chainsaw' 'Rain'
 'Rooster' 'Sea waves' 'Helicopter' 'Baby cry' 'Clock tick'
 'Fire crackling' 'Rain' 'Rain' 'Chainsaw' 'Rain' 'Helicopter'
 'Fire crackling' 'Baby cry' 'Fire crackling' 'Sea waves' 'Dog bark'
 'Fire crackling' 'Dog bark' 'Dog bark' 'Fire crackling' 'Sea waves'
 'Person sneeze' 'Helicopter' 'Sea waves' 'Fire crackling' 'Clock tick'
 'Helicopter' 'Rain' 'Person sneeze' 'Sea waves' 'Sea waves' 'Rooster'
 'Baby cry' 'Baby cry' 'Fire crackling' 'Rain' 'Helicopter' 'Dog bark'
 'Sea waves' 'Fire crackling' 'Baby cry' 'Clock tick' 'Baby cry'
 'Clock tick' 'Rain' 'Helicopter' 'Rooster' 'Rain' 'Fire crackling'
 'Dog bark' 'Person sneeze' 'Clock tick' 'Dog bark' 'Baby cry' 'Baby cry'
 'Chainsaw' 'Chainsaw' 'Sea waves' 'Sea waves' 'Dog bark' 'Rooster'
 'Helicopter' 'Fire crackling' 'Rain' 'Dog bark' 'Baby cry' 'Dog bark'
 'Rain' 'Fire crackling' 'Helicopter' 'Person sneeze' 'Fire crackling'
 'Clock tick' '

In [10]:
## Confusion Matrix
pd.crosstab(actual_value_decoded, prediction_decoded)
#pd.crosstab(test['species'], preds, rownames=['Actual Species'], colnames=['Predicted Species'])

col_0,Baby cry,Chainsaw,Clock tick,Dog bark,Fire crackling,Helicopter,Person sneeze,Rain,Rooster,Sea waves
row_0,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
Baby cry,10,1,0,0,0,0,0,0,0,0
Chainsaw,0,8,0,0,0,0,0,0,0,2
Clock tick,0,0,6,0,0,0,0,0,0,0
Dog bark,1,1,2,11,0,0,0,0,0,0
Fire crackling,0,0,1,0,15,0,0,0,0,0
Helicopter,0,1,0,1,0,9,0,1,0,0
Person sneeze,1,0,0,1,1,0,7,1,0,0
Rain,0,0,0,0,0,1,0,14,0,2
Rooster,1,0,1,1,0,1,0,0,5,0
Sea waves,0,1,0,0,0,0,0,1,0,11


#### RNN Classification (Keras Tensorflow)

- Sigmoid vs Softmax: The sigmoid function is used for the two-class logistic regression (0 or 1, speech or non-speech), whereas the softmax function is used for the multiclass logistic regression (a.k.a. MaxEnt, multinomial logistic regression, softmax Regression, Maximum Entropy Classifier). (dog bark, sea waves, ...)
- Network Architecture: Regarding more general choices, there is rarely a "right" way to construct the architecture. Instead that should be something you test with different meta-params (such as layer sizes, number of layers, amount of drop-out), and should be results-driven (including any limits you might have on resource use for training time/memory use etc).
- https://datascience.stackexchange.com/questions/10048/what-is-the-best-keras-model-for-multi-class-classification

In [1]:
# coding= UTF-8
import os
import numpy as np
from sklearn.model_selection import train_test_split
import keras
from keras.models import Sequential
from keras.layers.recurrent import LSTM
from keras.layers import Dense
from keras.optimizers import Adam

# Load data 
X = np.load("feat.npy")
y = np.load('label.npy').ravel()

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.25, random_state = 233)

#batch_size = 35
# nb_epochs = 400

# Reshape data for LSTM (Samples, Timesteps, Features)
X_train = np.expand_dims(X_train, axis=2) #(280,193,1)
X_test = np.expand_dims(X_test, axis=2)

y_train = keras.utils.to_categorical(y_train - 1, num_classes=10) # Converts a class vector (integers) to binary class matrix
y_test = keras.utils.to_categorical(y_test - 1, num_classes=10)

# Build RNN Neural Network
print('Build LSTM RNN model ...')
model = Sequential()
model.add(LSTM(128, return_sequences=True, input_shape=X_train.shape[1:]))
model.add(LSTM(32, return_sequences=False))
model.add(Dense(y_train.shape[1], activation='softmax'))
#model.add(Dense(10, activation='sigmoid'))

#model.add(LSTM(units=128, dropout=0.05, recurrent_dropout=0.35, return_sequences=True, input_shape=input_shape))
#model.add(LSTM(units=32, dropout=0.05, recurrent_dropout=0.35, return_sequences=False))
#model.add(Dense(units=genre_features.train_Y.shape[1], activation='softmax'))
          
print("Compiling ...")
model.compile(loss='categorical_crossentropy', # for multiple classes
              optimizer='adam', 
              metrics=['accuracy'])

print(model.summary())

print("Training ...")
model.fit(X_train, y_train, batch_size=35, epochs=100)

print("\nValidating ...")
score, accuracy = model.evaluate(X_test, y_test, batch_size=35, verbose=1)
print("Loss:  ", score)
print("Accuracy:  ", accuracy)

  from ._conv import register_converters as _register_converters
Using Theano backend.


Build LSTM RNN model ...
Compiling ...
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_1 (LSTM)                (None, 193, 128)          66560     
_________________________________________________________________
lstm_2 (LSTM)                (None, 32)                20608     
_________________________________________________________________
dense_1 (Dense)              (None, 10)                330       
Total params: 87,498
Trainable params: 87,498
Non-trainable params: 0
_________________________________________________________________
None
Training ...
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epo

- RNN Accuracy: 0.660 (25% Test data, 100 epochs)

#### Overall Accuracy Results 


In [2]:
import pandas as pd

df = pd.DataFrame({'SVM': [81.7, 0,],
                   'CNN': [78.125, 71.25],
                   'MLP':[80.625, 63.125],
                   'NB': [69.7, 0],
                   'RF': [80, 0],
                   'RNN': [66, 0],})
df

Unnamed: 0,CNN,MLP,NB,RF,RNN,SVM
0,78.125,80.625,69.7,80,66,81.7
1,71.25,63.125,0.0,0,0,0.0
