# Imports

In [41]:
import pandas as pd
import numpy as np

import librosa
import librosa.display

import matplotlib.pyplot as plt

from tensorflow.keras.utils import to_categorical

from tqdm import tqdm

# Load Labels and Filenames

In [2]:
#File location
labels_file = 'labels.csv'

#Load label file.
df = pd.read_csv(labels_file, index_col=0)

#Check that it loaded correctly.
df.head(10)

Unnamed: 0,file,label
0,data/train/train1.aiff,0
1,data/train/train2.aiff,0
2,data/train/train3.aiff,0
3,data/train/train4.aiff,0
4,data/train/train5.aiff,0
5,data/train/train6.aiff,1
6,data/train/train7.aiff,1
7,data/train/train8.aiff,0
8,data/train/train9.aiff,1
9,data/train/train10.aiff,0


# Preprocessing

### Verify that all files are the same length and framerate.

In [3]:
import aifc

In [4]:
files = df['file']
files.head()

0    data/train/train1.aiff
1    data/train/train2.aiff
2    data/train/train3.aiff
3    data/train/train4.aiff
4    data/train/train5.aiff
Name: file, dtype: object

count = 1
param_dict ={}

for f in files:
    obj = aifc.open('data/train/train%s.aiff' %count,'r')
    param_dict['param%s' %count] = obj.getparams()
    count = count + 1

param_df = pd.DataFrame(param_dict)
param_df = param_df.T

In [32]:
param_df.describe()

Unnamed: 0,0,1,2,3,4,5
count,30000,30000,30000,30000,30000,30000
unique,1,1,1,1,1,1
top,1,2,2000,4000,b'NONE',b'not compressed'
freq,30000,30000,30000,30000,30000,30000


All files have the same parameters. For future data, steps will need to be added to the transform to make sure the files have the same parameters.

### Data Augmentation

Types of Augmention Include: <br>
1) Time Shift <br>
2) Pitch Shift  <br>
3) Time Stretch <br>
4) Adding Noise <br>
 <br>
These steps can be perfomed on the raw audio or on the spectrogram.

The audio files already contains noise and different kinds of whale sounds and different time intervals. For initial modeling, no augmentation will be performed. This process can also be built into a data transformer.

### Generate MFCC

In [None]:
def mfcc_extractor(file):
    audio, sr = librosa.load(file_name, res_type='kaiser_fast')
    mfccs_feature = librosa.feature.mfcc(y=audio, sr=sr, n_mfcc=40)
    mfccs_scaled_feature = np.mean(mfccs_feature.T, axis=0)
    return mfccs_scaled_feature

In [None]:
extracted_mfcc = []
for index_num, row in tqdm(df.iterrows()):
    file_name = str(row['file'])
    class_labels = row['label']
    data = mfcc_extractor(file_name)
    extracted_mfcc.append([data, class_labels])

In [16]:
extracted_mfcc_df = pd.DataFrame(extracted_mfcc,columns=['mfcc','class'])
extracted_mfcc_df.head(10)

Unnamed: 0,mfcc,class
0,"[-651.90155, 249.67929, 159.31563, 57.305202, ...",0
1,"[-744.4306, 237.36618, 151.30487, 53.98658, -2...",0
2,"[-726.25397, 242.09875, 153.3899, 52.72366, -2...",0
3,"[-718.62427, 237.44089, 152.56413, 52.075317, ...",0
4,"[-667.9275, 223.0544, 141.7749, 47.895996, -27...",0
5,"[-674.3936, 235.28004, 147.79329, 48.110336, -...",1
6,"[-624.9562, 246.36066, 153.83499, 45.812805, -...",1
7,"[-722.72986, 241.86436, 153.31255, 51.218334, ...",0
8,"[-712.4814, 237.32451, 151.02075, 52.3069, -26...",1
9,"[-683.81396, 231.87447, 146.75104, 47.550854, ...",0


In [67]:
extracted_mfcc_df.loc[(extracted_mfcc_df['class'] == 0), 'class'] = 'noise'
extracted_mfcc_df.loc[(extracted_mfcc_df['class'] == 1), 'class'] = 'whale'

In [68]:
extracted_mfcc_df.head(10)

Unnamed: 0,mfcc,class
0,"[-651.90155, 249.67929, 159.31563, 57.305202, ...",noise
1,"[-744.4306, 237.36618, 151.30487, 53.98658, -2...",noise
2,"[-726.25397, 242.09875, 153.3899, 52.72366, -2...",noise
3,"[-718.62427, 237.44089, 152.56413, 52.075317, ...",noise
4,"[-667.9275, 223.0544, 141.7749, 47.895996, -27...",noise
5,"[-674.3936, 235.28004, 147.79329, 48.110336, -...",whale
6,"[-624.9562, 246.36066, 153.83499, 45.812805, -...",whale
7,"[-722.72986, 241.86436, 153.31255, 51.218334, ...",noise
8,"[-712.4814, 237.32451, 151.02075, 52.3069, -26...",whale
9,"[-683.81396, 231.87447, 146.75104, 47.550854, ...",noise


In [69]:
extracted_mfcc_df.to_csv('data/mfcc.csv')

### Define features and target.

In [70]:
x = np.array(extracted_mfcc_df['mfcc'].tolist())
y = np.array(extracted_mfcc_df['class'].tolist())

In [72]:
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
yy = to_categorical(le.fit_transform(y))

### Split the Dataset

In [82]:
from sklearn.model_selection import train_test_split 

x_train, x_test, y_train, y_test = train_test_split(x, yy, test_size=0.2, random_state = 42)

In [83]:
print(x_train.shape)
print(x_test.shape)

(24000, 40)
(6000, 40)


In [84]:
print(y_train.shape)
print(y_test.shape)

(24000, 2)
(6000, 2)


# Modeling

### Model Imports

In [85]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense , Activation , Dropout
from sklearn import metrics

### Concstruct Model

In [86]:
num_labels = 2

In [87]:
model = Sequential()

model.add(Dense(256, input_shape=(40,)))
model.add(Activation('relu'))
model.add(Dropout(0.5))

model.add(Dense(256))
model.add(Activation('relu'))
model.add(Dropout(0.5))

model.add(Dense(num_labels))
model.add(Activation('softmax'))

### Compile the Model

In [88]:
model.compile(loss='categorical_crossentropy', metrics=['accuracy'], optimizer='adam')

In [89]:
model.summary()

Model: "sequential_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_8 (Dense)              (None, 256)               10496     
_________________________________________________________________
activation_8 (Activation)    (None, 256)               0         
_________________________________________________________________
dropout_6 (Dropout)          (None, 256)               0         
_________________________________________________________________
dense_9 (Dense)              (None, 256)               65792     
_________________________________________________________________
activation_9 (Activation)    (None, 256)               0         
_________________________________________________________________
dropout_7 (Dropout)          (None, 256)               0         
_________________________________________________________________
dense_10 (Dense)             (None, 2)                

### FIt the Model

In [98]:
batch_size = 26000
epochs = 1000

In [99]:
model.fit(x_train, y_train,
          batch_size=batch_size,
          epochs=epochs,
          verbose=1,
          validation_data=(x_test, y_test))

Epoch 1/1000
Epoch 2/1000
Epoch 3/1000
Epoch 4/1000
Epoch 5/1000
Epoch 6/1000
Epoch 7/1000
Epoch 8/1000
Epoch 9/1000
Epoch 10/1000
Epoch 11/1000
Epoch 12/1000
Epoch 13/1000
Epoch 14/1000
Epoch 15/1000
Epoch 16/1000
Epoch 17/1000
Epoch 18/1000
Epoch 19/1000
Epoch 20/1000
Epoch 21/1000
Epoch 22/1000
Epoch 23/1000
Epoch 24/1000
Epoch 25/1000
Epoch 26/1000
Epoch 27/1000
Epoch 28/1000
Epoch 29/1000
Epoch 30/1000
Epoch 31/1000
Epoch 32/1000
Epoch 33/1000
Epoch 34/1000
Epoch 35/1000
Epoch 36/1000
Epoch 37/1000
Epoch 38/1000
Epoch 39/1000
Epoch 40/1000
Epoch 41/1000
Epoch 42/1000
Epoch 43/1000
Epoch 44/1000
Epoch 45/1000
Epoch 46/1000
Epoch 47/1000
Epoch 48/1000
Epoch 49/1000
Epoch 50/1000
Epoch 51/1000
Epoch 52/1000
Epoch 53/1000
Epoch 54/1000
Epoch 55/1000
Epoch 56/1000
Epoch 57/1000
Epoch 58/1000
Epoch 59/1000
Epoch 60/1000
Epoch 61/1000
Epoch 62/1000
Epoch 63/1000
Epoch 64/1000
Epoch 65/1000
Epoch 66/1000
Epoch 67/1000
Epoch 68/1000
Epoch 69/1000
Epoch 70/1000
Epoch 71/1000
Epoch 72/1000
E

<tensorflow.python.keras.callbacks.History at 0x7fbbd00c7f70>

### Evaluating the model on the training and testing set

In [100]:
#1000 epochs
score = model.evaluate(x_train, y_train, verbose=0)
print("Training Accuracy: ", score[1])

score = model.evaluate(x_test, y_test, verbose=0)
print("Testing Accuracy: ", score[1])

Training Accuracy:  0.8331249952316284
Testing Accuracy:  0.8341666460037231


In [95]:
#100 epochs
score = model.evaluate(x_train, y_train, verbose=0)
print("Training Accuracy: ", score[1])

score = model.evaluate(x_test, y_test, verbose=0)
print("Testing Accuracy: ", score[1])

Training Accuracy:  0.8267916440963745
Testing Accuracy:  0.8301666378974915


In [92]:
#Epoch = 10
score = model.evaluate(x_train, y_train, verbose=0)
print("Training Accuracy: ", score[1])

score = model.evaluate(x_test, y_test, verbose=0)
print("Testing Accuracy: ", score[1])

Training Accuracy:  0.8197916746139526
Testing Accuracy:  0.8174999952316284
