In [1]:
import numpy as np
import pandas as pd
from os.path import exists, join
import datetime
from scipy import signal
from scipy import stats
from scipy import interpolate
import xarray as xr
from sklearn.model_selection import train_test_split
from sklearn.metrics import precision_recall_curve

import keras
from sklearn.utils import resample

#visualizing results
%matplotlib inline
import matplotlib.pyplot as plt
import seaborn as sns

import warnings
warnings.filterwarnings("ignore")

Using TensorFlow backend.


In [2]:
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)

In [3]:
features_path = 'C:/Users/Schindler/Documents/ProgrammingFun/USV_python/annot_features_full.csv'
annot_features_data = pd.read_csv(features_path)
annot_features_full = pd.DataFrame(data = annot_features_data)
print(annot_features_full.shape)
print(annot_features_full.loc[pd.isna(annot_features_full['spec_pur'])].index)
annot_features_full.head()

(959, 13)
Int64Index([], dtype='int64')


Unnamed: 0.1,Unnamed: 0,animal_number,session,time_stamp,Annotation,power_sum,spec_pur,spec_cent,spec_spread,spec_skew,spec_kurt,spec_slope,spec_roll
0,0,527,cagepair,18922.5,high slug,44921.6,0.335974,40617.050954,117729.192,6.831866,48.160918,0.000787,38586.198407
1,1,527,cagepair,24750.0,low slug,53212.637,0.295599,27042.548508,3827.200195,3.556462,20.402396,-0.000227,25690.421083
2,2,527,cagepair,56002.5,bbc,94253.766,0.189555,30090.095178,68103.292456,3.982554,18.393372,-0.002314,28585.590419
3,3,527,cagepair,174352.5,bbc,51008.836,0.306177,34324.170621,52563.703613,3.336529,10.448164,-0.00125,32607.96209
4,4,527,cagepair,342877.5,bbc,164846.11,0.144833,33090.921412,22861.514782,1.852232,3.185318,-0.001784,31436.375341


In [4]:
annot_labels_up = annot_features_full['Annotation']
annot_labels_up = annot_labels_up.replace(regex={r'low slug': 1, 'high slug': 1, 'low multi': 1, 'high multi': 1, 'bbc': 1, 'rand_noise': 0})
annot_features_full['class'] = annot_labels_up
annot_features_full.head()

Unnamed: 0.1,Unnamed: 0,animal_number,session,time_stamp,Annotation,power_sum,spec_pur,spec_cent,spec_spread,spec_skew,spec_kurt,spec_slope,spec_roll,class
0,0,527,cagepair,18922.5,high slug,44921.6,0.335974,40617.050954,117729.192,6.831866,48.160918,0.000787,38586.198407,1
1,1,527,cagepair,24750.0,low slug,53212.637,0.295599,27042.548508,3827.200195,3.556462,20.402396,-0.000227,25690.421083,1
2,2,527,cagepair,56002.5,bbc,94253.766,0.189555,30090.095178,68103.292456,3.982554,18.393372,-0.002314,28585.590419,1
3,3,527,cagepair,174352.5,bbc,51008.836,0.306177,34324.170621,52563.703613,3.336529,10.448164,-0.00125,32607.96209,1
4,4,527,cagepair,342877.5,bbc,164846.11,0.144833,33090.921412,22861.514782,1.852232,3.185318,-0.001784,31436.375341,1


In [5]:
#separate and save for later testing dataset
train_orig, test_orig = train_test_split(annot_features_full, test_size=0.3, random_state=1, stratify = annot_features_full.Annotation)

In [6]:
#separate and save for later testing dataset
train_train, test_validate = train_test_split(train_orig, test_size=0.15, random_state=1, stratify = train_orig.Annotation)
train_train.shape

(570, 14)

In [7]:
train_orig_maj = train_train[train_train['Annotation'] == 'rand_noise']
train_orig_min = train_train[train_train['Annotation'] != 'rand_noise']

train_orig_min_upsamples = resample(train_orig_min, replace=True, n_samples = len(train_orig_maj), random_state = 123)

train_orig_upsamples = pd.concat([train_orig_min_upsamples, train_orig_maj])

print(train_orig_upsamples.Annotation.value_counts())

train_orig_upsamples.head()

rand_noise    476
low slug      170
bbc           125
high slug      88
low multi      80
high multi     13
Name: Annotation, dtype: int64


Unnamed: 0.1,Unnamed: 0,animal_number,session,time_stamp,Annotation,power_sum,spec_pur,spec_cent,spec_spread,spec_skew,spec_kurt,spec_slope,spec_roll,class
124,124,556,cagepair,41512.5,bbc,283922.6,0.105281,35128.433948,27613.08297,1.746885,2.369667,-0.001916,33372.01225,1
49,49,535,CPApair,201847.5,low slug,114278.914,0.151498,16245.921214,18541.40053,5.060017,27.84086,-0.00124,15433.625153,1
131,131,556,cagepair,169807.5,high slug,47409.77,0.329336,30112.858088,13748.241415,6.074303,42.912236,-0.000247,28607.215183,1
137,137,556,cagepair,459967.5,high slug,88313.625,0.185218,34136.696251,175651.225991,6.314609,40.124449,-0.001435,32429.861439,1
147,147,557,cagepair,79177.5,bbc,121944.5,0.143545,31621.628971,96620.582439,3.46339,12.10088,-0.002297,30040.547522,1


In [8]:
def square_rgb(xr_slice):
    times = data['times'].values
    freqs = data['freq'].values
    
    #make square for Keras - need to interpolate data, first create function and then apply
    x = np.arange(0, len(times))
    y = np.arange(0, len(freqs))
    f = interpolate.interp2d(x, y, xr_slice.values)
    xnew = np.linspace(0, len(times), num=128)
    ynew = np.linspace(0, len(freqs), num=128)
    znew = f(xnew, ynew)

    X = np.log(znew)
    
    #normalize and change to rgba (remove a)
    X_colored = plt.cm.viridis((X - X.mean())/(X.max() - X.min()))
    X_colored = X_colored[:,:,:3]
    
    return X_colored

In [9]:
rgbs = []
i = 1
for index, row in train_orig_upsamples.iterrows():
    data = xr.open_dataset('C:/Users/Schindler/Documents/ProgrammingFun/USV_python/Datasets/' + str(row['animal_number']) + '_xr_Dataset.nc')
    
    xr_slice = data['__xarray_dataarray_variable__'].sel(slices=row['time_stamp'])
    
    rgb = square_rgb(xr_slice)
    
    #rgbs[(str(row['Animal']) + '_' + str(row['time_stamp']))] = rgb
    rgbs.append(rgb)
    
X = np.stack(rgbs)
print(X.shape)

(952, 128, 128, 3)


In [10]:
y = train_orig_upsamples['class']
y.shape

(952,)

In [11]:
rgbs_test = []
i = 1
for index, row in test_validate.iterrows():
    data = xr.open_dataset('C:/Users/Schindler/Documents/ProgrammingFun/USV_python/Datasets/' + str(row['animal_number']) + '_xr_Dataset.nc')
    
    xr_slice = data['__xarray_dataarray_variable__'].sel(slices=row['time_stamp'])
    
    rgb = square_rgb(xr_slice)
    
    #rgbs[(str(row['Animal']) + '_' + str(row['time_stamp']))] = rgb
    rgbs_test.append(rgb)
    
X_test = np.stack(rgbs_test)
print(X_test.shape)

(101, 128, 128, 3)


In [12]:
y_test = test_validate['class']
y_test.shape

(101,)

In [13]:
from keras.layers import Dense,GlobalAveragePooling2D
from keras.applications import MobileNet
from keras.preprocessing import image
from keras.applications.mobilenet import preprocess_input
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Model
from keras.optimizers import Adam

from keras.layers import Dropout, Activation
from keras.layers.normalization import BatchNormalization
from sklearn.model_selection import train_test_split
from keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau, CSVLogger, TensorBoard, Callback
from sklearn.metrics import confusion_matrix, f1_score, precision_score, recall_score

In [14]:
base_model=MobileNet(input_shape = (128,128,3), include_top=False) #imports the mobilenet model and discards the last 1000 neuron layer.

In [15]:
x=base_model.output
x=GlobalAveragePooling2D()(x)
#x=Dense(1024,activation='relu')(x) #we add dense layers so that the model can learn more complex functions and classify for better results.
#x=Dense(1024,activation='relu')(x) #dense layer 2
#x=Dense(512,activation='relu')(x) #dense layer 3
preds=Dense(1,activation='sigmoid', name='features')(x) #final layer with softmax activation

In [16]:
model=Model(inputs=base_model.input,outputs=preds)

In [17]:
for layer in model.layers[:-5]:
    layer.trainable=False
for layer in model.layers[-5:]:
    layer.trainable=True

In [18]:
from keras import backend as K

def f1(y_true, y_pred):
    def recall(y_true, y_pred):
        """Recall metric.

        Only computes a batch-wise average of recall.

        Computes the recall, a metric for multi-label classification of
        how many relevant items are selected.
        """
        true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
        possible_positives = K.sum(K.round(K.clip(y_true, 0, 1)))
        recall = true_positives / (possible_positives + K.epsilon())
        return recall

    def precision(y_true, y_pred):
        """Precision metric.

        Only computes a batch-wise average of precision.

        Computes the precision, a metric for multi-label classification of
        how many selected items are relevant.
        """
        true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
        predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1)))
        precision = true_positives / (predicted_positives + K.epsilon())
        return precision
    precision = precision(y_true, y_pred)
    recall = recall(y_true, y_pred)
    
    return 2*((precision*recall)/(precision+recall+K.epsilon()))

In [19]:
model.compile(optimizer='Adam',loss='binary_crossentropy',metrics=[f1])

In [20]:
train_datagen=ImageDataGenerator(featurewise_center=True,
    featurewise_std_normalization=True, width_shift_range=0.2, preprocessing_function=preprocess_input) #included in our dependencies

train_generator=train_datagen.flow(X, y, batch_size=32,shuffle=True)

In [21]:
test_datagen = ImageDataGenerator()

validation_generator = test_datagen.flow(X_test, y_test, batch_size=32,shuffle=True)

In [23]:
log_dir="C:/Users/Schindler/Documents/ProgrammingFun/USV_python/TF_logs/run_e"

with open(join(log_dir, 'metadata.tsv'), 'w') as f:
    f.write('Index\tLabel\n')
    for index,label in enumerate(y_test):
        f.write('%d\t%d\n' % (index,label))
    
#tensorboard = callback_tensorboard(log_dir="C:/Users/Schindler/Documents/ProgrammingFun/USV_python/TF_logs/run_b")

tensorboard = TensorBoard(log_dir=log_dir, histogram_freq=1, batch_size=32, write_graph=True, write_grads=False, write_images=True, embeddings_freq=1, embeddings_layer_names=['features'], embeddings_metadata=join(log_dir, 'metadata.tsv'), embeddings_data=X_test, update_freq='epoch')


callbacks_list = [
    EarlyStopping(monitor = 'val_acc', patience = 6, verbose = 1),
    ReduceLROnPlateau(monitor = 'val_acc', factor = 0.1, patience = 3, verbose = 1), tensorboard]

In [24]:
step_size_train=train_generator.n//train_generator.batch_size

model.fit_generator(generator=train_generator, steps_per_epoch=step_size_train,
                   epochs=9,
                   validation_data = [X_test, y_test],
                callbacks = callbacks_list)

#model.fit_generator(train_generator, steps_per_epoch=num_of_train_samples // batch_size, epochs=epochs, validation_data=validation_generator, validation_steps=num_of_test_samples // batch_size)

Epoch 1/9
Epoch 2/9
Epoch 3/9
Epoch 4/9
Epoch 5/9
Epoch 6/9

KeyboardInterrupt: 

In [None]:
#command for starting tensorboard from terminal

#python -m tensorboard.main --logdir="C:/Users/Schindler/Documents/ProgrammingFun/USV_python/TF_logs/run_a"

In [None]:
#Confution Matrix and Classification Report
Y_pred = model.predict_generator(validation_generator)
y_pred = np.argmax(Y_pred, axis=1)

In [None]:
print(Y_pred.shape)
print(y_test.shape)

In [None]:
precision, recall, thresholds = precision_recall_curve(y_test, Y_pred)
plt.plot(recall, precision)

plt.xlim([0, 1])
plt.ylim([0, 1.05])
plt.legend(loc="lower right")
plt.xlabel('Recall (Sensitivity)', fontsize = 15)
plt.ylabel('Precision', fontsize = 15)

In [None]:
print('Confusion Matrix')
print(confusion_matrix(validation_generator.classes, y_pred))
print('Classification Report')
target_names = ['Cats', 'Dogs', 'Horse']
print(classification_report(validation_generator.classes, y_pred, target_names=target_names))