In [29]:
import numpy as np
import pandas as pd
import datetime
from scipy import signal
from scipy import stats
from scipy import interpolate
import xarray as xr
from sklearn.model_selection import train_test_split
import keras
from sklearn.utils import resample

#visualizing results
%matplotlib inline
import matplotlib.pyplot as plt
import seaborn as sns

import warnings
warnings.filterwarnings("ignore")

In [2]:
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)

In [3]:
features_path = 'C:/Users/Schindler/Documents/ProgrammingFun/USV_python/annot_features_full.csv'
annot_features_data = pd.read_csv(features_path)
annot_features_full = pd.DataFrame(data = annot_features_data)
print(annot_features_full.shape)
print(annot_features_full.loc[pd.isna(annot_features_full['spec_pur'])].index)
annot_features_full.head()

(1084, 13)
Int64Index([], dtype='int64')


Unnamed: 0.1,Unnamed: 0,Animal,Group,Annotation,time_stamp,power_sum,spec_pur,spec_cent,spec_spread,spec_skew,spec_kurt,spec_slope,spec_roll
0,0,533,5,low slug,46305.0,32714.904,0.368722,39977.724643,9966.541464,2.313672,9.578221,0.000271,37978.838411
1,1,533,5,low slug,149692.5,43037.465,0.306856,37931.297975,20543.321215,4.308415,22.308306,-0.000239,36034.733076
2,2,533,5,low slug,243157.5,270193.53,0.062226,11483.575333,37201.715521,6.940257,50.135018,-0.001537,10909.396566
3,3,533,5,low slug,243270.0,63014.45,0.232295,32242.081971,21947.090189,5.325133,33.372614,-0.000671,30629.977872
4,4,533,5,low multi,295560.0,74933.164,0.206291,17095.059221,11740.918797,5.372725,34.224633,-0.001093,16240.30626


In [4]:
annot_features_full = annot_features_full[annot_features_full['Annotation'] != 'noise']
annot_features_full.shape

(1009, 13)

In [5]:
annot_labels_up = annot_features_full['Annotation']
annot_labels_up = annot_labels_up.replace(regex={r'low slug': 1, 'high slug': 1, 'low multi': 1, 'high multi': 1, 'bbc': 1, 'noise': 0, 'rand_noise': 0})
annot_features_full['class'] = annot_labels_up
annot_features_full.head()

Unnamed: 0.1,Unnamed: 0,Animal,Group,Annotation,time_stamp,power_sum,spec_pur,spec_cent,spec_spread,spec_skew,spec_kurt,spec_slope,spec_roll,class
0,0,533,5,low slug,46305.0,32714.904,0.368722,39977.724643,9966.541464,2.313672,9.578221,0.000271,37978.838411,1
1,1,533,5,low slug,149692.5,43037.465,0.306856,37931.297975,20543.321215,4.308415,22.308306,-0.000239,36034.733076,1
2,2,533,5,low slug,243157.5,270193.53,0.062226,11483.575333,37201.715521,6.940257,50.135018,-0.001537,10909.396566,1
3,3,533,5,low slug,243270.0,63014.45,0.232295,32242.081971,21947.090189,5.325133,33.372614,-0.000671,30629.977872,1
4,4,533,5,low multi,295560.0,74933.164,0.206291,17095.059221,11740.918797,5.372725,34.224633,-0.001093,16240.30626,1


In [6]:
#separate and save for later testing dataset
train_orig, test_orig = train_test_split(annot_features_full, test_size=0.3, random_state=1, stratify = annot_features_full.Annotation)

In [7]:
#separate and save for later testing dataset
train_train, test_validate = train_test_split(train_orig, test_size=0.15, random_state=1, stratify = train_orig.Annotation)
train_train.shape

(600, 14)

In [8]:
train_orig_maj = train_train[train_train['Annotation'] == 'rand_noise']
train_orig_min = train_train[train_train['Annotation'] != 'rand_noise']

train_orig_min_upsamples = resample(train_orig_min, replace=True, n_samples = len(train_orig_maj), random_state = 123)

train_orig_upsamples = pd.concat([train_orig_min_upsamples, train_orig_maj])

print(train_orig_upsamples.Annotation.value_counts())

train_orig_upsamples.head()

rand_noise    536
low slug      327
low multi      80
bbc            65
high slug      34
high multi     30
Name: Annotation, dtype: int64


Unnamed: 0.1,Unnamed: 0,Animal,Group,Annotation,time_stamp,power_sum,spec_pur,spec_cent,spec_spread,spec_skew,spec_kurt,spec_slope,spec_roll,class
70,70,542,4,low slug,428580.0,51511.1,0.258035,23226.66136,18480.461686,5.006498,27.41251,-0.000941,22065.328292,1
84,84,554,4,bbc,195075.0,119023.84,0.116528,29715.373003,174674.45414,4.503008,20.474682,-0.002724,28229.604353,1
70,70,542,4,low slug,428580.0,51511.1,0.258035,23226.66136,18480.461686,5.006498,27.41251,-0.000941,22065.328292,1
94,94,555,4,low slug,143482.5,103010.07,0.137079,21004.692279,70392.707522,7.224641,54.442881,-0.001588,19954.457665,1
106,106,559,5,bbc,276277.5,146611.69,0.094746,31107.938729,154898.940229,4.70323,23.947356,-0.002707,29552.541793,1


In [9]:
def square_rgb(xr_slice):
    times = data['times'].values
    freqs = data['freq'].values
    
    #make square for Keras - need to interpolate data, first create function and then apply
    x = np.arange(0, len(times))
    y = np.arange(0, len(freqs))
    f = interpolate.interp2d(x, y, xr_slice.values)
    xnew = np.linspace(0, len(times), num=128)
    ynew = np.linspace(0, len(freqs), num=128)
    znew = f(xnew, ynew)

    X = np.log(znew)
    
    #normalize and change to rgba (remove a)
    X_colored = plt.cm.viridis((X - X.mean())/(X.max() - X.min()))
    X_colored = X_colored[:,:,:3]
    
    return X_colored

In [10]:
rgbs = []
i = 1
for index, row in train_orig_upsamples.iterrows():
    data = xr.open_dataset('C:/Users/Schindler/Documents/ProgrammingFun/USV_python/Datasets/' + str(row['Animal']) + '_xr_Dataset.nc')
    
    xr_slice = data['__xarray_dataarray_variable__'].sel(slices=row['time_stamp'])
    
    rgb = square_rgb(xr_slice)
    
    #rgbs[(str(row['Animal']) + '_' + str(row['time_stamp']))] = rgb
    rgbs.append(rgb)
    
X = np.stack(rgbs)
print(X.shape)

(1072, 128, 128, 3)


In [11]:
y = train_orig_upsamples['class']
y.shape

(1072,)

In [12]:
rgbs_test = []
i = 1
for index, row in test_validate.iterrows():
    data = xr.open_dataset('C:/Users/Schindler/Documents/ProgrammingFun/USV_python/Datasets/' + str(row['Animal']) + '_xr_Dataset.nc')
    
    xr_slice = data['__xarray_dataarray_variable__'].sel(slices=row['time_stamp'])
    
    rgb = square_rgb(xr_slice)
    
    #rgbs[(str(row['Animal']) + '_' + str(row['time_stamp']))] = rgb
    rgbs_test.append(rgb)
    
X_test = np.stack(rgbs_test)
print(X_test.shape)

(106, 128, 128, 3)


In [13]:
y_test = test_validate['class']
y_test.shape

(106,)

In [14]:
from keras.layers import Dense,GlobalAveragePooling2D
from keras.applications import MobileNet
from keras.preprocessing import image
from keras.applications.mobilenet import preprocess_input
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Model
from keras.optimizers import Adam

from keras.layers import Dropout, Activation
from keras.layers.normalization import BatchNormalization
from sklearn.model_selection import train_test_split
from keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau, CSVLogger, TensorBoard, Callback
from sklearn.metrics import confusion_matrix, f1_score, precision_score, recall_score

In [15]:
base_model=MobileNet(input_shape = (128,128,3), include_top=False) #imports the mobilenet model and discards the last 1000 neuron layer.

In [16]:
x=base_model.output
x=GlobalAveragePooling2D()(x)
x=Dense(1024,activation='relu')(x) #we add dense layers so that the model can learn more complex functions and classify for better results.
x=Dense(1024,activation='relu')(x) #dense layer 2
x=Dense(512,activation='relu')(x) #dense layer 3
preds=Dense(1,activation='sigmoid')(x) #final layer with softmax activation

In [17]:
model=Model(inputs=base_model.input,outputs=preds)

In [18]:
for layer in model.layers[:-5]:
    layer.trainable=False
for layer in model.layers[-5:]:
    layer.trainable=True

In [19]:
from keras import backend as K

def f1(y_true, y_pred):
    def recall(y_true, y_pred):
        """Recall metric.

        Only computes a batch-wise average of recall.

        Computes the recall, a metric for multi-label classification of
        how many relevant items are selected.
        """
        true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
        possible_positives = K.sum(K.round(K.clip(y_true, 0, 1)))
        recall = true_positives / (possible_positives + K.epsilon())
        return recall

    def precision(y_true, y_pred):
        """Precision metric.

        Only computes a batch-wise average of precision.

        Computes the precision, a metric for multi-label classification of
        how many selected items are relevant.
        """
        true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
        predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1)))
        precision = true_positives / (predicted_positives + K.epsilon())
        return precision
    precision = precision(y_true, y_pred)
    recall = recall(y_true, y_pred)
    
    return 2*((precision*recall)/(precision+recall+K.epsilon()))

In [20]:
model.compile(optimizer='Adam',loss='binary_crossentropy',metrics=[f1])

In [21]:
train_datagen=ImageDataGenerator(featurewise_center=True,
    featurewise_std_normalization=True, width_shift_range=0.2, preprocessing_function=preprocess_input) #included in our dependencies

train_generator=train_datagen.flow(X, y, batch_size=32,shuffle=True)

In [45]:
log_dir="C:/Users/Schindler/Documents/ProgrammingFun/USV_python/TF_logs/run_d"

#tensorboard = callback_tensorboard(log_dir="C:/Users/Schindler/Documents/ProgrammingFun/USV_python/TF_logs/run_b")

tensorboard = TensorBoard(log_dir=log_dir, histogram_freq=1, batch_size=32, write_graph=True, write_grads=False, write_images=True, embeddings_freq=0, embeddings_layer_names=None, embeddings_metadata=None, embeddings_data=None, update_freq='epoch')

#tensorboard = TensorBoard(log_dir="logs/{}".format(time()))

#TensorBoard(log_dir="C:/Users/Schindler/Documents/ProgrammingFun/USV_python/TF_logs/run_b")

callbacks_list = [
    EarlyStopping(monitor = 'val_acc', patience = 6, verbose = 1),
    ReduceLROnPlateau(monitor = 'val_acc', factor = 0.1, patience = 3, verbose = 1), tensorboard]

In [46]:
step_size_train=train_generator.n//train_generator.batch_size
model.fit_generator(generator=train_generator,
                   steps_per_epoch=step_size_train,
                   epochs=20,
                   validation_data = [X_test, y_test], callbacks = callbacks_list)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
 1/33 [..............................] - ETA: 2:22 - loss: 0.0386 - f1: 1.0000

KeyboardInterrupt: 

In [None]:
#command for starting tensorboard from terminal

#python -m tensorboard.main --logdir="C:/Users/Schindler/Documents/ProgrammingFun/USV_python/TF_logs/run_a"

In [None]:
from imblearn.keras import balanced_batch_generator
from imblearn.under_sampling import NearMiss

training_generator, steps_per_epoch = balanced_batch_generator(X, y, sampler=NearMiss(), batch_size=10, random_state=42)

callback_history = model.fit_generator(generator=training_generator, steps_per_epoch=steps_per_epoch, epochs=10, verbose=0)