In [1]:
import numpy as np
import pandas as pd
from scipy import signal
from scipy import stats
from scipy import interpolate
import xarray as xr
from sklearn.model_selection import train_test_split
import keras

#visualizing results
%matplotlib inline
import matplotlib.pyplot as plt
import seaborn as sns

import warnings
warnings.filterwarnings("ignore")

Using TensorFlow backend.


In [2]:
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)

In [3]:
features_path = 'C:/Users/Schindler/Documents/ProgrammingFun/USV_python/annot_features_full.csv'
annot_features_data = pd.read_csv(features_path)
annot_features_full = pd.DataFrame(data = annot_features_data)
print(annot_features_full.shape)
print(annot_features_full.loc[pd.isna(annot_features_full['spec_pur'])].index)
annot_features_full.head()

(1084, 13)
Int64Index([], dtype='int64')


Unnamed: 0.1,Unnamed: 0,Animal,Group,Annotation,time_stamp,power_sum,spec_pur,spec_cent,spec_spread,spec_skew,spec_kurt,spec_slope,spec_roll
0,0,533,5,low slug,46305.0,32714.904,0.368722,39977.724643,9966.541464,2.313672,9.578221,0.000271,37978.838411
1,1,533,5,low slug,149692.5,43037.465,0.306856,37931.297975,20543.321215,4.308415,22.308306,-0.000239,36034.733076
2,2,533,5,low slug,243157.5,270193.53,0.062226,11483.575333,37201.715521,6.940257,50.135018,-0.001537,10909.396566
3,3,533,5,low slug,243270.0,63014.45,0.232295,32242.081971,21947.090189,5.325133,33.372614,-0.000671,30629.977872
4,4,533,5,low multi,295560.0,74933.164,0.206291,17095.059221,11740.918797,5.372725,34.224633,-0.001093,16240.30626


In [4]:
annot_features_full = annot_features_full[annot_features_full['Annotation'] != 'noise']
annot_features_full.shape

(1009, 13)

In [26]:
annot_labels_up = annot_features_full['Annotation']
annot_labels_up = annot_labels_up.replace(regex={r'low slug': 1, 'high slug': 1, 'low multi': 1, 'high multi': 1, 'bbc': 1, 'noise': 0, 'rand_noise': 0})
annot_features_full['class'] = annot_labels_up
annot_features_full.head()

Unnamed: 0.1,Unnamed: 0,Animal,Group,Annotation,time_stamp,power_sum,spec_pur,spec_cent,spec_spread,spec_skew,spec_kurt,spec_slope,spec_roll,class
0,0,533,5,low slug,46305.0,32714.904,0.368722,39977.724643,9966.541464,2.313672,9.578221,0.000271,37978.838411,1
1,1,533,5,low slug,149692.5,43037.465,0.306856,37931.297975,20543.321215,4.308415,22.308306,-0.000239,36034.733076,1
2,2,533,5,low slug,243157.5,270193.53,0.062226,11483.575333,37201.715521,6.940257,50.135018,-0.001537,10909.396566,1
3,3,533,5,low slug,243270.0,63014.45,0.232295,32242.081971,21947.090189,5.325133,33.372614,-0.000671,30629.977872,1
4,4,533,5,low multi,295560.0,74933.164,0.206291,17095.059221,11740.918797,5.372725,34.224633,-0.001093,16240.30626,1


In [27]:
#separate and save for later testing dataset
train_orig, test_orig = train_test_split(annot_features_full, test_size=0.3, random_state=1, stratify = annot_features_full.Annotation)

In [28]:
#separate and save for later testing dataset
train_train, test_validate = train_test_split(train_orig, test_size=0.15, random_state=1, stratify = train_orig.Annotation)
train_train.shape

(600, 14)

In [29]:
train_train.Annotation.value_counts()

rand_noise    536
low slug       37
low multi      10
bbc             9
high slug       5
high multi      3
Name: Annotation, dtype: int64

In [30]:
def square_rgb(xr_slice):
    times = data['times'].values
    freqs = data['freq'].values
    
    #make square for Keras - need to interpolate data, first create function and then apply
    x = np.arange(0, len(times))
    y = np.arange(0, len(freqs))
    f = interpolate.interp2d(x, y, xr_slice.values)
    xnew = np.linspace(0, len(times), num=128)
    ynew = np.linspace(0, len(freqs), num=128)
    znew = f(xnew, ynew)

    X = np.log(znew)
    
    #normalize and change to rgba (remove a)
    X_colored = plt.cm.viridis((X - X.mean())/(X.max() - X.min()))
    X_colored = X_colored[:,:,:3]
    
    return X_colored

In [31]:
rgbs = []
i = 1
for index, row in train_train.iterrows():
    data = xr.open_dataset('C:/Users/Schindler/Documents/ProgrammingFun/USV_python/Datasets/' + str(row['Animal']) + '_xr_Dataset.nc')
    
    xr_slice = data['__xarray_dataarray_variable__'].sel(slices=row['time_stamp'])
    
    rgb = square_rgb(xr_slice)
    
    #rgbs[(str(row['Animal']) + '_' + str(row['time_stamp']))] = rgb
    rgbs.append(rgb)
    
X = np.stack(rgbs)
print(X.shape)

(600, 128, 128, 3)


In [33]:
y = train_train['class']
y.shape

(600,)

In [34]:
rgbs_test = []
i = 1
for index, row in test_validate.iterrows():
    data = xr.open_dataset('C:/Users/Schindler/Documents/ProgrammingFun/USV_python/Datasets/' + str(row['Animal']) + '_xr_Dataset.nc')
    
    xr_slice = data['__xarray_dataarray_variable__'].sel(slices=row['time_stamp'])
    
    rgb = square_rgb(xr_slice)
    
    #rgbs[(str(row['Animal']) + '_' + str(row['time_stamp']))] = rgb
    rgbs_test.append(rgb)
    
X_test = np.stack(rgbs_test)
print(X_test.shape)

(106, 128, 128, 3)


In [35]:
y_test = test_validate['class']
y_test.shape

(106,)

In [11]:
from keras.layers import Dense,GlobalAveragePooling2D
from keras.applications import MobileNet
from keras.preprocessing import image
from keras.applications.mobilenet import preprocess_input
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Model
from keras.optimizers import Adam

from keras.layers import Dropout, Activation
from keras.layers.normalization import BatchNormalization
from sklearn.model_selection import train_test_split
from keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau, CSVLogger, TensorBoard

In [12]:
base_model=MobileNet(input_shape = (128,128,3), include_top=False) #imports the mobilenet model and discards the last 1000 neuron layer.

Downloading data from https://github.com/fchollet/deep-learning-models/releases/download/v0.6/mobilenet_1_0_128_tf_no_top.h5


In [13]:
x=base_model.output
x=GlobalAveragePooling2D()(x)
x=Dense(1024,activation='relu')(x) #we add dense layers so that the model can learn more complex functions and classify for better results.
x=Dense(1024,activation='relu')(x) #dense layer 2
x=Dense(512,activation='relu')(x) #dense layer 3
preds=Dense(1,activation='sigmoid')(x) #final layer with softmax activation

In [14]:
model=Model(inputs=base_model.input,outputs=preds)

In [15]:
for layer in model.layers[:-5]:
    layer.trainable=False
for layer in model.layers[-5:]:
    layer.trainable=True

In [16]:
model.compile(optimizer='Adam',loss='binary_crossentropy',metrics=['accuracy'])

In [36]:
train_datagen=ImageDataGenerator(featurewise_center=True,
    featurewise_std_normalization=True, width_shift_range=0.2, preprocessing_function=preprocess_input) #included in our dependencies

train_generator=train_datagen.flow(X, y, batch_size=32,shuffle=True)

In [37]:
callbacks_list = [
    EarlyStopping(monitor = 'val_acc', patience = 6, verbose = 1),
    ReduceLROnPlateau(monitor = 'val_acc', factor = 0.1, patience = 3, verbose = 1)]
    #CSVLogger('model_' + str(model) + '.log')]

In [40]:
step_size_train=train_generator.n//train_generator.batch_size
model.fit_generator(generator=train_generator,
                   steps_per_epoch=step_size_train,
                   epochs=20,
                   validation_data = [X_test, y_test], callbacks = callbacks_list, class_weight={0:1,1:10})

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20

Epoch 00004: ReduceLROnPlateau reducing learning rate to 1.0000000656873453e-06.
Epoch 5/20
Epoch 6/20
Epoch 7/20

Epoch 00007: ReduceLROnPlateau reducing learning rate to 1.0000001111620805e-07.
Epoch 00007: early stopping


<keras.callbacks.History at 0x1b438a165f8>

In [None]:
from imblearn.keras import balanced_batch_generator
from imblearn.under_sampling import NearMiss

training_generator, steps_per_epoch = balanced_batch_generator(X, y, sampler=NearMiss(), batch_size=10, random_state=42)

callback_history = model.fit_generator(generator=training_generator, steps_per_epoch=steps_per_epoch, epochs=10, verbose=0)