In [49]:
from preprocess import *
import keras
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPooling2D, LSTM, Activation
from keras.utils import to_categorical
import wandb
from wandb.keras import WandbCallback
import matplotlib.pyplot as plt
import sklearn.metrics as metrics

In [50]:
wandb.init()
config = wandb.config

config.max_len = 21
config.buckets = 50

# Save data to array file first
save_data_to_array(max_len=config.max_len, n_mfcc=config.buckets)

#labels=np.array(["chirping_birds", "crickets", "crow", 
#                 "frog", "insects"])
labels=np.array(["GOC", "GRA", "GST", 
                 "GWG", "GWC"])

Saving vectors of label - 'chirping_birds': 100%|██████████████████████████████████████| 40/40 [00:01<00:00, 38.71it/s]
Saving vectors of label - 'crickets': 100%|████████████████████████████████████████████| 40/40 [00:00<00:00, 49.76it/s]
Saving vectors of label - 'crow': 100%|████████████████████████████████████████████████| 40/40 [00:01<00:00, 38.64it/s]
Saving vectors of label - 'frog': 100%|████████████████████████████████████████████████| 40/40 [00:00<00:00, 65.97it/s]
Saving vectors of label - 'insects': 100%|█████████████████████████████████████████████| 40/40 [00:00<00:00, 47.98it/s]


In [51]:
# Loading train/test set
X_train, X_test, X_val, y_train, y_test, y_val = get_train_test()

In [52]:
# Setting channels to 1 to generalize stereo sound to 1 channel
channels = 1
config.epochs = 50
config.batch_size = 100

# Number of classes
num_classes = 5

# Reshape X_train and X_test to include a 4th dimension (channels)
X_train = X_train.reshape(X_train.shape[0], config.buckets, config.max_len, channels)
X_test = X_test.reshape(X_test.shape[0], config.buckets, config.max_len, channels)
X_val = X_val.reshape(X_val.shape[0], config.buckets, config.max_len, channels)

In [53]:
# Spectrogram visualized of 0th element
print(X_train.shape)
#plt.imshow(X_train[500, :, :, 0])

(96, 50, 21, 1)


In [54]:
# Getting vector number where each number corresponds to a label
y_train_hot = to_categorical(y_train)
y_test_hot = to_categorical(y_test)
y_val_hot = to_categorical(y_val)

In [55]:
# Building the model
model = Sequential()

input_shape= (config.buckets, config.max_len, channels)

model.add(Conv2D(24, (3, 3), strides=(1, 1), input_shape=input_shape))
model.add(MaxPooling2D((2, 2), strides=(2, 2)))
model.add(Activation('relu'))

model.add(Conv2D(48, (3, 3), padding="valid"))
model.add(MaxPooling2D((2, 2), strides=(2, 2)))
model.add(Activation('relu'))

model.add(Conv2D(48, (3, 1), padding="valid"))
model.add(Activation('relu'))

model.add(Flatten())
model.add(Dropout(rate=0.5))

model.add(Dense(64))
model.add(Activation('relu'))
model.add(Dropout(rate=0.5))

model.add(Dense(len(labels)))
model.add(Activation('softmax'))
model.summary()
# Conv2D: 
#    Filters: 32
#    Kernel_size: (3,3) (height/width of the 2D convolution window)     
'''model.add(Conv2D(32, (3, 3),
    input_shape=(config.buckets, config.max_len, channels),
    activation='relu'))

model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Flatten())

model.add(Dense(128, activation='relu'))
model.add(Dense(num_classes, activation='softmax'))'''

In [56]:
# Configure CNN for training
model.compile(loss="categorical_crossentropy",
                  optimizer="adam",
                  metrics=['accuracy'])

In [57]:
wandb.init()
print(y_train_hot.shape)
print(labels.shape)
print(X_train.shape)
# Train the CNN model
#    X_train: Input data
#    y_train_hot: Target data
model.fit(X_train, y_train_hot, epochs=config.epochs, validation_data=(X_val, y_val_hot), callbacks=[WandbCallback(data_type="image", labels=labels)])

(96, 5)
(5,)
(96, 50, 21, 1)
Train on 96 samples, validate on 64 samples
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<keras.callbacks.callbacks.History at 0x1666e4e1c88>

In [58]:
# Save the keras model
model.save("ant_cnn_model.h5")
print("Model has been saved.")

Model has been saved.


## Running the IntelliChirp Biophony CNN

In [59]:
from keras.models import load_model

# Load the model
loaded_model = load_model('ant_cnn_model.h5')

In [60]:
# Summarize the model
model.summary()

Model: "sequential_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_7 (Conv2D)            (None, 48, 19, 32)        320       
_________________________________________________________________
max_pooling2d_5 (MaxPooling2 (None, 24, 9, 32)         0         
_________________________________________________________________
flatten_3 (Flatten)          (None, 6912)              0         
_________________________________________________________________
dense_5 (Dense)              (None, 128)               884864    
_________________________________________________________________
dense_6 (Dense)              (None, 5)                 645       
Total params: 885,829
Trainable params: 885,829
Non-trainable params: 0
_________________________________________________________________


In [61]:
y_pred_ohe = loaded_model.predict(X_test)  # shape=(n_samples, 12)
y_pred_labels = np.argmax(y_pred_ohe, axis=1)  # only necessary if output has one-hot-encoding, shape=(n_samples)

confusion_matrix = metrics.confusion_matrix(y_true=y_test, y_pred=y_pred_labels)  # shape
print(confusion_matrix)

for class_i in range(len(labels)) :
    indices = np.argwhere(y_test == class_i)
    sum = 0
    for index in indices:
        sum += (y_test[index] == y_pred_labels[index])
    print("Accuracy for class", labels[class_i], ":", sum/len(indices))

print("Overall Accuracy :", np.mean(y_test == y_pred_labels))

'''Accuracy for class chirping_birds : [0.8]
Accuracy for class crickets : [0.25]
Accuracy for class crow : [0.83333333]
Accuracy for class frog : [0.3]
Accuracy for class insects : [0.85714286]
Overall Accuracy : 0.525'''

[[2 1 1 0 1]
 [5 6 0 0 1]
 [0 0 6 0 0]
 [1 0 6 3 0]
 [0 0 0 0 7]]
Accuracy for class chirping_birds : [0.4]
Accuracy for class crickets : [0.5]
Accuracy for class crow : [1.]
Accuracy for class frog : [0.3]
Accuracy for class insects : [1.]
Overall Accuracy : 0.6


'Accuracy for class chirping_birds : [0.8]\nAccuracy for class crickets : [0.25]\nAccuracy for class crow : [0.83333333]\nAccuracy for class frog : [0.3]\nAccuracy for class insects : [0.85714286]\nOverall Accuracy : 0.525'

In [62]:
## Running the model

n_mfcc = config.buckets
max_len = config.max_len
# convert file to wav2mfcc
# Mel-frequency cepstral coefficients
file_path = "./prediction/nature_sc.wav"
big_wave, sr = librosa.load(file_path, mono=True, sr=None)
#print(wave.shape, sr)

classification = []

for sec_index in range( int(big_wave.shape[0] / sr) ) :
    start_sec = sec_index
    end_sec = sec_index + 1
    
    sec_to_trim = np.array( [ float(start_sec), float(end_sec) ] )
    print(sec_to_trim)
    sec_to_trim = np.ceil( sec_to_trim * sr )

    wave = big_wave[int(sec_to_trim[0]) : int(sec_to_trim[1])]
    print(wave)

    wave = np.asfortranarray(wave[::3])
    mfcc = librosa.feature.mfcc(wave, sr=16000, n_mfcc=n_mfcc)

    # If maximum length exceeds mfcc lengths then pad the remaining ones
    if (max_len > mfcc.shape[1]):
        pad_width = max_len - mfcc.shape[1]
        mfcc = np.pad(mfcc, pad_width=((0, 0), (0, pad_width)), mode='constant')

    # Else cutoff the remaining parts
    else:
        mfcc = mfcc[:, :max_len]

    # Convert wav to MFCC
    prediction_data = wav2mfcc('./prediction/nature_sc.wav')
    prediction_data = mfcc
    print(prediction_data.shape)
    #print(wav2mfcc())
    # Reshape to 4 dimensions
    prediction_data = prediction_data.reshape(1, config.buckets, config.max_len, channels)
    #prediction_data = prediction_data.reshape(1, 20, config.max_len, channels)

    # Run the model on the inputted file
    predicted = loaded_model.predict(prediction_data)

    # Output the prediction values for each class
    print ('PREDICTED VALUES')
    labels_indices = range(len(labels))
    max_value = 0
    max_value_index = 0
    for index in labels_indices:
        print('\n', labels[index], ": ", '%.08f' % predicted[0,index])
        if predicted[0,index] > max_value:
            max_value_index = index
            max_value = predicted[0,index]

    # Output the prediction
    if max_value < 0.5:
        print("GUESS: Nothing")
        classification.append( { "class" : "Nothing", "timestamp" : start_sec } )
    else:
        print('\n\nGUESS: ', labels[max_value_index])
        classification.append( { "class" : labels[max_value_index], "timestamp" : start_sec } )

print(classification)

[0. 1.]
[ 0.0000000e+00  1.5258789e-05  0.0000000e+00 ...  3.3020020e-02
  1.2680054e-02 -8.7432861e-03]
(50, 21)
PREDICTED VALUES

 chirping_birds :  0.30736196

 crickets :  0.64452684

 crow :  0.01276931

 frog :  0.00179228

 insects :  0.03354968


GUESS:  crickets
[1. 2.]
[-0.03717041 -0.05769348 -0.06455994 ...  0.01766968  0.01895142
  0.01779175]
(50, 21)
PREDICTED VALUES

 chirping_birds :  0.23428890

 crickets :  0.62110490

 crow :  0.02115585

 frog :  0.00654036

 insects :  0.11690991


GUESS:  crickets
[2. 3.]
[ 0.02345276  0.02101135  0.01712036 ... -0.01161194 -0.0141449
 -0.01431274]
(50, 21)
PREDICTED VALUES

 chirping_birds :  0.36313936

 crickets :  0.43238679

 crow :  0.02465060

 frog :  0.01954458

 insects :  0.16027857
GUESS: Nothing
[3. 4.]
[-0.01583862 -0.01066589 -0.00762939 ... -0.0377655  -0.03556824
 -0.02685547]
(50, 21)
PREDICTED VALUES

 chirping_birds :  0.82177597

 crickets :  0.12068707

 crow :  0.00271698

 frog :  0.01081211

 insects :  0

 -0.12313843]
(50, 21)
PREDICTED VALUES

 chirping_birds :  0.03494295

 crickets :  0.26417795

 crow :  0.63337618

 frog :  0.00193922

 insects :  0.06556365


GUESS:  crow
[33. 34.]
[-0.09968567 -0.06376648 -0.03105164 ... -0.0138092  -0.01574707
 -0.01896667]
(50, 21)
PREDICTED VALUES

 chirping_birds :  0.00177779

 crickets :  0.04300207

 crow :  0.93167436

 frog :  0.00749555

 insects :  0.01605024


GUESS:  crow
[34. 35.]
[-0.00811768  0.00149536  0.00953674 ... -0.004776   -0.0010376
  0.00231934]
(50, 21)
PREDICTED VALUES

 chirping_birds :  0.30037296

 crickets :  0.10383671

 crow :  0.47047505

 frog :  0.01371895

 insects :  0.11159633
GUESS: Nothing
[35. 36.]
[ 0.00238037  0.00236511  0.00231934 ... -0.00193787  0.0068512
  0.00695801]
(50, 21)
PREDICTED VALUES

 chirping_birds :  0.35100570

 crickets :  0.48698586

 crow :  0.01208235

 frog :  0.01082602

 insects :  0.13910010
GUESS: Nothing
[{'class': 'crickets', 'timestamp': 0}, {'class': 'crickets', 'timest