In this notebook we show how to run a simple deep learning model to classify whale calls. We use the [keras](https://keras.io/) library. We will use convolutional neural networks, since they are robust to temporal and spatial shifts. 

In [None]:
# ignore warnings
import warnings
warnings.filterwarnings("ignore")

In [None]:
# importing multiple visualization libraries
%matplotlib inline

import numpy as np
import matplotlib.pyplot as plt
from matplotlib import mlab
import pylab as pl
import seaborn

In [None]:
# importing libraries to manipulate the data files
import os
from glob import glob

In [None]:
# import a library to read the .aiff format
import aifc

In [None]:
ls ../input/

In [None]:
filenames = glob(os.path.join('../input/whaledatatrainonly/whale_data_train_only/whale_data_train_only/','train','*.aiff'))

In [None]:
filenames = sorted(filenames)
len(filenames)

In [None]:
from scipy import signal

In [None]:
params = {'NFFT':256, 'Fs':2000, 'noverlap':192}
m = 60

In [None]:
from scipy import interpolate
from skimage.transform import resize

In [None]:
# read signals and apply the welch filter
feature_dict = {}
spec_dict = {}
fs = 2000
N = 5000 #sample size
for filename in filenames[:N]:
    aiff = aifc.open(filename,'r')
    whale_strSig = aiff.readframes(aiff.getnframes())
    whale_array = np.fromstring(whale_strSig, np.short).byteswap()
    # create the spectrogram
    P, freqs, bins = mlab.specgram(whale_array, **params)
    
    spec_dict[filename] = resize(P[:m,:], (128, 128), anti_aliasing=True)

# save the dimensions of the spectrogram
spec_dim = P[:m,:].shape

spec_dim = resize(np.log(P[:m,:]), (128, 128), anti_aliasing=True).shape
#spec_dim = interpolate.interp2d(np.arange(128),np.arange(128),P[:m,:]).shape
print(spec_dim)

In [None]:
X = np.stack(spec_dict.values())

In [None]:
X.shape

In [None]:
import pandas as pd
import numpy as np
import os
import keras
import matplotlib.pyplot as plt
from keras.layers import Dense,GlobalAveragePooling2D
from keras.applications import MobileNet
from keras.preprocessing import image
from keras.applications.mobilenet import preprocess_input
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Model
from keras.optimizers import Adam

from keras.layers import Dropout, Activation
from keras.layers.normalization import BatchNormalization
from sklearn.model_selection import train_test_split
from keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau, CSVLogger, TensorBoard

In [None]:
labels = pd.read_csv(os.path.join('../input/whaledatatrainonly/whale_data_train_only/whale_data_train_only/','train.csv'), index_col = 0)

In [None]:
# X = np.rollaxis(np.array([X,X,X]),0,4)

In [None]:
X = np.log(X)

In [None]:
X_colored = plt.cm.viridis( (X - X.mean())/(X.max() - X.min()))

In [None]:
keys = [key.split('/')[-1] for key in spec_dict.keys()]

In [None]:
del spec_dict

In [None]:
del X

In [None]:
# y contains the labels
y = np.array(labels['label'][keys])[:N]

Now we have X and y, so we can apply train the neural network. But first we will split the data into a training and testing datasets.

In [None]:
#from keras.applications.inception_v3 import InceptionV3
#base_model=MobileNet(input_shape = (128,128,3), weights='imagenet',include_top=False) #imports the mobilenet m

In [None]:
import ssl

ssl._create_default_https_context = ssl._create_unverified_context

In [None]:
#from keras.applications.vgg19 import VGG19
#base_model=VGG19(input_shape = (224,224,3), weights='imagenet',include_top=False) #imports the mobilenet m

In [None]:
keras.__version__

In [None]:
base_model=MobileNet(input_shape = (128,128,3), weights='../input/mobilenet-1-0-128-tf-no-top/mobilenet_1_0_128_tf_no_top.h5',include_top=False) #imports the mobilenet model and discards the last 1000 neuron layer.

In [None]:
x=base_model.output
x=GlobalAveragePooling2D()(x)
x=Dense(1024,activation='relu')(x) #we add dense layers so that the model can learn more complex functions and classify for better results.
x=Dense(1024,activation='relu')(x) #dense layer 2
x=Dense(512,activation='relu')(x) #dense layer 3
preds=Dense(1,activation='sigmoid')(x) #final layer with softmax activation

In [None]:
model=Model(inputs=base_model.input,outputs=preds)

In [None]:
len(model.layers)

In [None]:
for layer in model.layers[:-10]:
    layer.trainable=False
for layer in model.layers[-10:]:
    layer.trainable=True

In [None]:

#for layer in model.layers:
#    layer.trainable=False

In [None]:
model.compile(optimizer='Adam',loss='binary_crossentropy',metrics=['accuracy'])

In [None]:
y.shape

In [None]:
from sklearn.model_selection import train_test_split
target_names = ['Upcall', 'NO_Upcall']


X_train, X_test, y_train, y_test = train_test_split(X_colored[:,:,:,:3], y, test_size=0.20, random_state=2018)

# Convert label to onehot
#y_train = keras.utils.to_categorical(y_train, num_classes=2)
#y_test = keras.utils.to_categorical(y_test, num_classes=2)

print(X_train.shape)
#X_train = np.expand_dims(X_train, axis=2)
#X_test = np.expand_dims(X_test, axis=2)

In [None]:
train_datagen=ImageDataGenerator(preprocessing_function=preprocess_input) #included in our dependencies

train_generator=train_datagen.flow(X_train, y_train, batch_size=32,shuffle=True)

In [None]:
callbacks_list = [
    EarlyStopping(monitor = 'val_acc', patience = 6, verbose = 1),
    ReduceLROnPlateau(monitor = 'val_acc', factor = 0.1, patience = 3, verbose = 1),
    CSVLogger('model_' + str(model) + '.log')]

In [None]:
step_size_train=train_generator.n//train_generator.batch_size
model.fit_generator(generator=train_generator,
                   steps_per_epoch=step_size_train,
                   epochs=20,
                   validation_data = [X_test, y_test], callbacks = callbacks_list)

## Extra Stuff Below

In [None]:
# # f1 based metric
# import numpy as np
# from keras.callbacks import Callback
# from sklearn.metrics import confusion_matrix, f1_score, precision_score, recall_score
# class Metrics(Callback):
#    def on_train_begin(self, logs={}):
# self.val_f1s = []
#       self.val_recalls = []
#       self.val_precisions = []
#
#   def on_epoch_end(self, epoch, logs={}):
#       val_predict = (np.asarray(self.model.predict(self.validation_data[0]))).round() 
#        val_targ = self.validation_data[1]
#        _val_f1 = f1_score(val_targ, val_predict)
#        _val_recall = recall_score(val_targ, val_predict)
#        _val_precision = precision_score(val_targ, val_predict)
#        self.val_f1s.append(_val_f1)
#        self.val_recalls.append(_val_recall)
#        self.val_precisions.append(_val_precision)
#        print(' — val_f1: %f — val_precision: %f — val_recall %f' %(_val_f1, _val_precision, _val_recall))
#        return
 
#metrics = Metrics()

We see a lot of jargon:

|Term| Explanation|
|---|---|
|Convolutional Layers | layers which are robust to time shifts|
|Max Pooling/ Average Pooling |dimension reduction, robustness|
|Batch normalization |equalizes the distribution of the batches |
|Epochs |1 run of a batch |
|Adam Optimizer | an adaptive optimization scheme|
|Cross Entropy | cost function|
|ReLU | nonlinear activation function| 
|Batch Size| size of subset to process to update the estimates|
|Learning Rate | time step of the optimization algorithm|

**References:**

[Deep Learning Glossary](http://www.wildml.com/deep-learning-glossary/)

[Keras and NN Tutorial](https://indico.cern.ch/event/506145/contributions/2132944/attachments/1258124/1858154/NNinKeras_MPaganini.pdf)

[Keras Cheatsheet](https://s3.amazonaws.com/assets.datacamp.com/blog_assets/Keras_Cheat_Sheet_Python.pdf)

Free GPU usage: [Google Colaboratory notebooks](https://colab.research.google.com/notebooks/welcome.ipynb#recent=true) & [Kaggle Kernels](https://www.kaggle.com/kernels).