### Callin Switzer
### Use RNN to process sounds



In [None]:
import pandas as pd
import numpy as np
%matplotlib inline
import matplotlib.pyplot as plt
from matplotlib import gridspec
import seaborn as sns
import os
import csv
import time
from scipy import signal
import itertools as it
import sys
from datetime import datetime
from sklearn.preprocessing import StandardScaler
import scipy.io
import glob
import itertools

# Neural net libs
from keras.models import Sequential
from keras.layers import Dense
import tensorflow as tf
print("TensorFlow successfully installed.")
if tf.test.is_built_with_cuda():
    print("The installed version of TensorFlow includes GPU support.")

print(sys.version, "\n")
print("last run on " + str(datetime.now()))

In [None]:

def windowsOrMacDirectories():
    """ Sets base directories for win or mac

       
    """
    if os.environ['COMPUTERNAME'] == 'SHEALMACLEARN':
        DropboxDirect = os.path.join("D:\Dropbox")
#     elif sys.platform.startswith('linux') or sys.platform.startswith('cygwin'):
#         # this excludes your current terminal "/dev/tty"
    elif sys.platform.startswith('darwin'):
        DropboxDirect = os.path.join("/Users/cswitzer/Dropbox")
    else:
        raise EnvironmentError('Unknown computer platform')
    
    baseDir = os.getcwd()
    dataDir = os.path.join(DropboxDirect, 'SonicationBehavior', 'SonBehData')
    figDir = os.path.join(DropboxDirect, 'SonicationBehavior', 'SonBehFigs')
    return baseDir, dataDir, figDir


baseDir, dataDir, figDir = windowsOrMacDirectories()
print(dataDir)

In [None]:
def readMyFile(filename):
    
    '''Read in csv 10x faster than pandas'''
    
    tmpdta = []
 
    with open(filename, newline="\n") as csvDataFile:
        csvReader = csv.reader(csvDataFile, delimiter=' ', quoting=csv.QUOTE_NONNUMERIC)
        for row in csvReader:
            tmpdta.append(row)
 
    return(pd.DataFrame(np.transpose(tmpdta)))

In [None]:
# read in dataset that was pre-classified
buzzClassDataDir = os.path.join("D:\Dropbox\SonicationBehavior\SonBehData\BuzzPartClassification")
buzzClass = pd.read_csv(os.path.join(buzzClassDataDir, 'BuzzClassifications.csv'))
print(buzzClass.shape)
buzzClass.head()

In [None]:
# read in all data into a single dataframe
bigList = []
freqSpec = []
for ii in range(buzzClass.shape[0]):
    tmp = readMyFile(buzzClass.fileName[ii])
    
#     # pad with 0's
#     tmp = readMyFile(buzzClass.fileName[ii])
#     pad = np.arange(tmp.iloc[-1,0],tmp.iloc[-1,0]+ 0.02 - np.mean(np.diff(tmp.iloc[:,0])),  np.mean(np.diff(tmp.iloc[:,0])))
#     zx = np.repeat(0, len(pad))
#     pdff = pd.DataFrame( data = {"0":pad, "1":zx} )
#     pdff.columns = tmp.columns

#     tmp = pd.concat([tmp, pdff]).reset_index(drop = True)
    
    # calculate rolling variance
    tmp["varia"] = pd.Series((tmp.iloc[:,1] - np.mean(tmp.iloc[:,1]))).rolling(int(2000), center = True, min_periods = 1).var().tolist()
    
    # calculate frequency spectrum
    f, t, Sxx = signal.spectrogram(tmp.iloc[:,1], 200000, noverlap = 900, nperseg = 1000)
    Sxx = Sxx[0:50, :]
    #scale
    Sxx = Sxx - np.min(Sxx)
    Sxx = Sxx / np.max(Sxx)
    
    
    
    freqSpec.append(pd.DataFrame(np.transpose(Sxx)))
    
    # add classes to data
    tmp["buzz"] = 0
    tmp.loc[buzzClass.buzz1[ii]:buzzClass.buzz2[ii], "buzz"] = 1
    if(np.mod(ii, 10)) == 0:
        print(ii)
    
    tmp["filename"] = buzzClass.fileName[ii]
    bigList.append(tmp)

In [None]:
tmp.head()

In [None]:
df = pd.concat(bigList)
df.reset_index(drop = True, inplace = True)

In [None]:
df.head()

In [None]:
df.rename(index=str, columns={1: "acc"}, inplace=True)

In [None]:
# scale data
from sklearn.preprocessing import minmax_scale

df['acc_scaled'] = df.groupby('filename').acc.transform(lambda x: minmax_scale(x.astype(float), feature_range = (-1,1)))

# from sklearn.preprocessing import robust_scale
# df['acc_scaled'] = df.groupby('filename').acc.transform(lambda x: scale(x.astype(float)))


df.head()

In [None]:

#plt.plot(np.array(df.iloc[0:200000, 3]))

ss = 3

y1 = np.array(df.iloc[0:200000, 3])*ss - 0.5*ss
xx = np.linspace(0, len(y1) / 200000,num = len(y1) )
y2 = np.array(df.iloc[0:200000, 5])


fig = plt.figure(figsize = (10,4))
ax = fig.add_subplot(1, 1, 1)

# Hide the right and top spines
ax.spines['right'].set_visible(False)
ax.spines['top'].set_visible(False)

# Only show ticks on the left and bottom spines
ax.yaxis.set_ticks_position('left')
ax.xaxis.set_ticks_position('bottom')
ax.plot(xx, y2, linewidth = 0.9, c = 'black')
ax.fill_between(xx,y1,-0.5*ss, alpha = 0.5, linewidth = 0)
ax.set_xlabel("Time (s)")
ax.set_ylabel("Scaled Accleration")
#plt.xticks([])
#plt.yticks([])

plt.savefig(os.path.join(figDir, "NNSeq1.png"), dpi = 500)

In [None]:
Y = pd.DataFrame(df.loc[:, "buzz"].values)
X =  pd.DataFrame(df.loc[:, "acc_scaled"].values)
X.shape

In [None]:
# add windows
for s in np.arange(1, 500):
    X['shift_{}'.format(s)] = X[0].shift(s)
    X['shift_{}'.format(s)] = X[0].shift(s)


In [None]:
split_index=  int(0.8*X.shape[0])

train_x = X[:split_index].copy()
test_x = X[split_index:].copy()

train_y = Y[:split_index].copy()
test_y = Y[split_index:].copy()

In [None]:
test_y.head()

In [None]:
train_x.head()

In [None]:
train_x.iloc[:, 0] = train_y.iloc[:,0]

In [None]:
X_train = train_x.dropna().drop(0, axis=1)
y_train = train_x.dropna()[[0]]

X_test = test_x.dropna().drop(0, axis=1)
y_test = test_y.dropna()[[0]]

In [None]:
plt.plot(np.array(X_train.iloc[0,:]))

In [None]:
X_train = X_train.values
X_test= X_test.values

y_train = y_train.values
y_test = y_test.values

In [None]:
X_train.shape, X_test.shape, y_train.shape, y_test.shape

In [None]:
from keras.layers.core import Dense, Activation, Dropout
from keras.layers.recurrent import LSTM
from keras.models import Sequential
from keras.callbacks import EarlyStopping,ReduceLROnPlateau
from keras.layers import BatchNormalization
import keras.backend as K
from keras.callbacks import EarlyStopping
earlystop = EarlyStopping(patience=5, 
                          verbose=1, mode='auto', min_delta = 0.01)

# fully connected network with on windows

In [None]:
K.clear_session()

model = Sequential()
model.add(Dense(1, input_dim=X_train.shape[1], activation='tanh'))
model.add(Dense(56, activation='tanh'))
model.add(BatchNormalization())
# model.add(Dropout(0.2))
# model.add(Dense(40, activation='tanh'))
# model.add(BatchNormalization())
# model.add(Dropout(0.2))
# model.add(Dense(40, activation='tanh'))
# model.add(BatchNormalization())
# model.add(Dropout(0.2))
model.add(Dense(1, activation = "sigmoid"))
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['binary_accuracy'])
model.summary()


In [None]:
history = model.fit(X_train, y_train, epochs=10, batch_size=2**16, 
                    callbacks = [earlystop], validation_data=(X_test, y_test))

In [None]:
print(history.history.keys())


plt.plot(history.history['loss'], c = "orange")
plt.title('Neural network loss and accuracy')
plt.ylabel('loss')

plt.xlabel('epoch')




plt.plot(history.history['binary_accuracy'], c = "purple")
plt.legend(['train_loss', 'train_acc'], loc='center')


plt.show()

In [None]:
pred = model.predict(X_test)

In [None]:
from sklearn.metrics import accuracy_score
accuracy_score(np.array(y_test)[0:50000], ((np.array(pred) > 0.5) * 1)[0:50000])

In [None]:
plt.plot(X_train[0:50000, 0])
plt.plot(np.array(y_train)[0:50000] + 1.1, c= 'pink')
plt.plot(np.array(pred)[0:50000]+ 1.1, c= 'green')

In [None]:
plt.plot(X_train[0:50000, 0])

In [None]:
X_train.shape