In [1]:
from preprocess import *
import keras
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPooling2D, LSTM, Activation
from keras.utils import to_categorical
import wandb
from wandb.keras import WandbCallback
import matplotlib.pyplot as plt
import sklearn.metrics as metrics

Using TensorFlow backend.


In [2]:
wandb.init()
config = wandb.config

config.max_len = 21
config.buckets = 50

# Save data to array file first
save_data_to_array(max_len=config.max_len, n_mfcc=config.buckets)

#labels=np.array(["chirping_birds", "crickets", "crow", 
#                 "frog", "insects"])
labels=np.array(["GOC", "GRA", "GST", 
                 "GWG", "GWC"])

Saving vectors of label - 'GOC': 100%|█████████████████████████████████████████████████| 15/15 [00:00<00:00, 28.56it/s]
Saving vectors of label - 'GRA': 100%|███████████████████████████████████████████████| 133/133 [00:03<00:00, 40.92it/s]
Saving vectors of label - 'GST': 100%|█████████████████████████████████████████████████| 20/20 [00:00<00:00, 76.07it/s]
Saving vectors of label - 'GWC': 100%|███████████████████████████████████████████████| 100/100 [00:01<00:00, 59.24it/s]
Saving vectors of label - 'GWG': 100%|█████████████████████████████████████████████████| 52/52 [00:01<00:00, 37.26it/s]


In [3]:
# Loading train/test set
X_train, X_test, X_val, y_train, y_test, y_val = get_train_test()

In [4]:
# Setting channels to 1 to generalize stereo sound to 1 channel
channels = 1
config.epochs = 50
config.batch_size = 100

# Number of classes
num_classes = 5

# Reshape X_train and X_test to include a 4th dimension (channels)
X_train = X_train.reshape(X_train.shape[0], config.buckets, config.max_len, channels)
X_test = X_test.reshape(X_test.shape[0], config.buckets, config.max_len, channels)
X_val = X_val.reshape(X_val.shape[0], config.buckets, config.max_len, channels)

In [5]:
# Spectrogram visualized of 0th element
print(X_train.shape)
#plt.imshow(X_train[500, :, :, 0])

(153, 50, 21, 1)


In [6]:
# Getting vector number where each number corresponds to a label
y_train_hot = to_categorical(y_train)
y_test_hot = to_categorical(y_test)
y_val_hot = to_categorical(y_val)

In [7]:
# Building the model
model = Sequential()

input_shape= (config.buckets, config.max_len, channels)

model.add(Conv2D(24, (3, 3), strides=(1, 1), input_shape=input_shape))
model.add(MaxPooling2D((2, 2), strides=(2, 2)))
model.add(Activation('relu'))

model.add(Conv2D(48, (3, 3), padding="valid"))
model.add(MaxPooling2D((2, 2), strides=(2, 2)))
model.add(Activation('relu'))

model.add(Conv2D(48, (3, 1), padding="valid"))
model.add(Activation('relu'))

model.add(Flatten())
model.add(Dropout(rate=0.5))

model.add(Dense(64))
model.add(Activation('relu'))
model.add(Dropout(rate=0.5))

model.add(Dense(len(labels)))
model.add(Activation('softmax'))
model.summary()
# Conv2D: 
#    Filters: 32
#    Kernel_size: (3,3) (height/width of the 2D convolution window)     
'''model.add(Conv2D(32, (3, 3),
    input_shape=(config.buckets, config.max_len, channels),
    activation='relu'))

model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Flatten())

model.add(Dense(128, activation='relu'))
model.add(Dense(num_classes, activation='softmax'))'''


Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_1 (Conv2D)            (None, 48, 19, 24)        240       
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 24, 9, 24)         0         
_________________________________________________________________
activation_1 (Activation)    (None, 24, 9, 24)         0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 22, 7, 48)         10416     
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 11, 3, 48)         0         
_________________________________________________________________
activation_2 (Activation)    (None, 11, 3, 48)         0         
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 9, 3, 48)        

"model.add(Conv2D(32, (3, 3),\n    input_shape=(config.buckets, config.max_len, channels),\n    activation='relu'))\n\nmodel.add(MaxPooling2D(pool_size=(2, 2)))\n\nmodel.add(Flatten())\n\nmodel.add(Dense(128, activation='relu'))\nmodel.add(Dense(num_classes, activation='softmax'))"

In [8]:
# Configure CNN for training
model.compile(loss="categorical_crossentropy",
                  optimizer="adam",
                  metrics=['accuracy'])

In [9]:
wandb.init()
print(y_train_hot.shape)
print(labels.shape)
print(X_train.shape)
# Train the CNN model
#    X_train: Input data
#    y_train_hot: Target data
model.fit(X_train, y_train_hot, epochs=config.epochs, validation_data=(X_val, y_val_hot), callbacks=[WandbCallback(data_type="image", labels=labels)])

(153, 5)
(5,)
(153, 50, 21, 1)

Train on 153 samples, validate on 103 samples
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50


Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<keras.callbacks.callbacks.History at 0x1c113933bc8>

In [10]:
# Save the keras model
model.save("geo_cnn_model.h5")
print("Model has been saved.")

Model has been saved.


## Running the IntelliChirp Biophony CNN

In [11]:
from keras.models import load_model

# Load the model
loaded_model = load_model('ant_cnn_model.h5')

In [12]:
# Summarize the model
model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_1 (Conv2D)            (None, 48, 19, 24)        240       
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 24, 9, 24)         0         
_________________________________________________________________
activation_1 (Activation)    (None, 24, 9, 24)         0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 22, 7, 48)         10416     
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 11, 3, 48)         0         
_________________________________________________________________
activation_2 (Activation)    (None, 11, 3, 48)         0         
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 9, 3, 48)         

In [13]:
y_pred_ohe = loaded_model.predict(X_test)  # shape=(n_samples, 12)
y_pred_labels = np.argmax(y_pred_ohe, axis=1)  # only necessary if output has one-hot-encoding, shape=(n_samples)

confusion_matrix = metrics.confusion_matrix(y_true=y_test, y_pred=y_pred_labels)  # shape
print(confusion_matrix)

for class_i in range(len(labels)) :
    indices = np.argwhere(y_test == class_i)
    sum = 0
    for index in indices:
        sum += (y_test[index] == y_pred_labels[index])
    if(len(indices) > 0) : mean = sum/len(indices)
    else : mean = "N/A"
    print("Accuracy for class", labels[class_i], ":", mean)

print("Overall Accuracy :", np.mean(y_test == y_pred_labels))

[[ 4  0  0  0  0]
 [ 0 21  0  5  0]
 [ 0  2  0  1  0]
 [ 0  9  0 10  4]
 [ 0  2  0  2  4]]
Accuracy for class GOC : [1.]
Accuracy for class GRA : [0.80769231]
Accuracy for class GST : [0.]
Accuracy for class GWG : [0.43478261]
Accuracy for class GWC : [0.5]
Overall Accuracy : 0.609375


In [15]:
## Running the model

n_mfcc = config.buckets
max_len = config.max_len
# convert file to wav2mfcc
# Mel-frequency cepstral coefficients
file_path = "./prediction/nature_sc.wav"
big_wave, sr = librosa.load(file_path, mono=True, sr=None)
#print(wave.shape, sr)

classification = []

for sec_index in range( int(big_wave.shape[0] / sr) ) :
    start_sec = sec_index
    end_sec = sec_index + 1
    
    sec_to_trim = np.array( [ float(start_sec), float(end_sec) ] )
    print(sec_to_trim)
    sec_to_trim = np.ceil( sec_to_trim * sr )

    wave = big_wave[int(sec_to_trim[0]) : int(sec_to_trim[1])]
    print(wave)

    wave = np.asfortranarray(wave[::3])
    mfcc = librosa.feature.mfcc(wave, sr=16000, n_mfcc=n_mfcc)

    # If maximum length exceeds mfcc lengths then pad the remaining ones
    if (max_len > mfcc.shape[1]):
        pad_width = max_len - mfcc.shape[1]
        mfcc = np.pad(mfcc, pad_width=((0, 0), (0, pad_width)), mode='constant')

    # Else cutoff the remaining parts
    else:
        mfcc = mfcc[:, :max_len]

    # Convert wav to MFCC
    prediction_data = wav2mfcc('./prediction/nature_sc.wav')
    prediction_data = mfcc
    print(prediction_data.shape)
    #print(wav2mfcc())
    # Reshape to 4 dimensions
    prediction_data = prediction_data.reshape(1, config.buckets, config.max_len, channels)
    #prediction_data = prediction_data.reshape(1, 20, config.max_len, channels)

    # Run the model on the inputted file
    predicted = loaded_model.predict(prediction_data)

    # Output the prediction values for each class
    print ('PREDICTED VALUES')
    labels_indices = range(len(labels))
    max_value = 0
    max_value_index = 0
    for index in labels_indices:
        print('\n', labels[index], ": ", '%.08f' % predicted[0,index])
        if predicted[0,index] > max_value:
            max_value_index = index
            max_value = predicted[0,index]

    # Output the prediction
    if max_value < 0.5:
        print("GUESS: Nothing")
        classification.append( { "class" : "Nothing", "timestamp" : start_sec } )
    else:
        print('\n\nGUESS: ', labels[max_value_index])
        classification.append( { "class" : labels[max_value_index], "timestamp" : start_sec } )

print(classification)

[0. 1.]
[ 0.0000000e+00  1.5258789e-05  0.0000000e+00 ...  3.3020020e-02
  1.2680054e-02 -8.7432861e-03]
(50, 21)
PREDICTED VALUES

 GOC :  0.00110971

 GRA :  0.00687433

 GST :  0.00060519

 GWG :  0.28046575

 GWC :  0.71094495


GUESS:  GWC
[1. 2.]
[-0.03717041 -0.05769348 -0.06455994 ...  0.01766968  0.01895142
  0.01779175]
(50, 21)
PREDICTED VALUES

 GOC :  0.00000150

 GRA :  0.66413796

 GST :  0.00009404

 GWG :  0.00712661

 GWC :  0.32863984


GUESS:  GRA
[2. 3.]
[ 0.02345276  0.02101135  0.01712036 ... -0.01161194 -0.0141449
 -0.01431274]
(50, 21)
PREDICTED VALUES

 GOC :  0.00008299

 GRA :  0.22556210

 GST :  0.00302639

 GWG :  0.10296817

 GWC :  0.66836035


GUESS:  GWC
[3. 4.]
[-0.01583862 -0.01066589 -0.00762939 ... -0.0377655  -0.03556824
 -0.02685547]
(50, 21)
PREDICTED VALUES

 GOC :  0.00076802

 GRA :  0.19427927

 GST :  0.00463253

 GWG :  0.03950845

 GWC :  0.76081169


GUESS:  GWC
[4. 5.]
[-0.02836609 -0.02510071 -0.02012634 ...  0.0138855  -0.00386047
 -

 GWG :  0.40232626

 GWC :  0.34449333
GUESS: Nothing
[{'class': 'GWC', 'timestamp': 0}, {'class': 'GRA', 'timestamp': 1}, {'class': 'GWC', 'timestamp': 2}, {'class': 'GWC', 'timestamp': 3}, {'class': 'Nothing', 'timestamp': 4}, {'class': 'GRA', 'timestamp': 5}, {'class': 'GWC', 'timestamp': 6}, {'class': 'GRA', 'timestamp': 7}, {'class': 'GRA', 'timestamp': 8}, {'class': 'GWC', 'timestamp': 9}, {'class': 'GRA', 'timestamp': 10}, {'class': 'GWG', 'timestamp': 11}, {'class': 'Nothing', 'timestamp': 12}, {'class': 'GWG', 'timestamp': 13}, {'class': 'Nothing', 'timestamp': 14}, {'class': 'GWC', 'timestamp': 15}, {'class': 'GRA', 'timestamp': 16}, {'class': 'GRA', 'timestamp': 17}, {'class': 'GRA', 'timestamp': 18}, {'class': 'GRA', 'timestamp': 19}, {'class': 'GRA', 'timestamp': 20}, {'class': 'GRA', 'timestamp': 21}, {'class': 'GRA', 'timestamp': 22}, {'class': 'GRA', 'timestamp': 23}, {'class': 'GRA', 'timestamp': 24}, {'class': 'GRA', 'timestamp': 25}, {'class': 'Nothing', 'timestamp':