# Gunshot Detection Using ML
## MSDS17001 MSDS17011 MSEE17001 PhDEE17004
### Fawad Arshad, Jawad Arshad, Zeeshan Haider, Hazoor Ahmed

#### IMPORTING LIBRARIES

In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn
%matplotlib inline
import librosa
from tqdm import tqdm_notebook
import os
from sklearn.decomposition import PCA
from sklearn.externals import joblib
from sklearn.metrics import precision_recall_fscore_support, roc_auc_score, confusion_matrix
from keras.models import Sequential
from keras.layers import Dense, Dropout,Conv1D,MaxPool1D,Flatten,Conv2D,BatchNormalization,Activation
from keras.optimizers import Adam
from keras import models
from keras.callbacks import ModelCheckpoint
from sklearn import svm
from sklearn.ensemble import IsolationForest
from keras.models import Model, load_model
from keras.layers import Input
from keras import regularizers

In [2]:
ds=pd.read_csv('gunshot.csv')

### Creating feature dataset using Kaggle and other sources and storing in disk....

In [7]:
size=3*22050
dataset=[]
for i in tqdm_notebook(range(40000)):
    timeseries=np.zeros(size)
    loadedseries=librosa.core.load('./BigDataset/{}'.format(ds['fname'].iloc[i]), sr= 22050, res_type="kaiser_fast")[0]
    loadedsize=loadedseries.shape[0]
    if loadedsize>size:
        timeseries=loadedseries[:size]
    else:
        timeseries[:loadedsize]=loadedseries
    a= librosa.feature.chroma_cens(timeseries).ravel()
    b= librosa.feature.chroma_cqt(timeseries).ravel()
    c= librosa.feature.chroma_stft(timeseries,S=np.abs(librosa.stft(timeseries))).ravel()
    d= librosa.feature.delta(librosa.feature.melspectrogram(timeseries,n_mels=20)).ravel()
    e= librosa.feature.melspectrogram(timeseries,n_mels=20).ravel()
    f= librosa.feature.mfcc(timeseries,n_mfcc=40).ravel()
    g= librosa.feature.poly_features(timeseries).ravel()
    h= librosa.feature.rmse(timeseries).ravel()
    i= librosa.feature.spectral_bandwidth(timeseries).ravel() 
    j= librosa.feature.spectral_centroid(timeseries).ravel()
    k= librosa.feature.spectral_contrast(timeseries).ravel()
    l= librosa.feature.spectral_flatness(timeseries).ravel()
    m= librosa.feature.spectral_rolloff(timeseries).ravel()
    n= librosa.feature.tempogram(timeseries,win_length=10).ravel() 
    o= librosa.feature.tonnetz(timeseries).ravel()
    p= librosa.feature.zero_crossing_rate(timeseries).ravel() 
    features=np.concatenate((a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p))
    dataset.append(features)


dataset=np.array(dataset)
np.save('dataset3secCompleteFeatures',dataset)
def encode(x):
    if x=='notAGunshot':
        return 0
    else:
        return 1

labels=ds['label'].apply(encode)[:40000]
labels=labels.as_matrix()
np.save('labels',labels)

### Creating feature dataset using MIVIA Data and storing in disk....

In [None]:
size=3*22050
dataset=[]
files=os.listdir('./train_gunshots/')
for i in tqdm_notebook(range(len(files))):
    timeseries=np.zeros(size)
    loadedseries=librosa.core.load('./train_gunshots/'+files[i], sr= 22050)[0]
    loadedsize=loadedseries.shape[0]
    if loadedsize>size:
        timeseries=loadedseries[:size]
    else:
        timeseries[:loadedsize]=loadedseries
    a= librosa.feature.chroma_cens(timeseries).ravel()
    b= librosa.feature.chroma_cqt(timeseries).ravel()
    c= librosa.feature.chroma_stft(timeseries,S=np.abs(librosa.stft(timeseries))).ravel()
    d= librosa.feature.delta(librosa.feature.melspectrogram(timeseries,n_mels=20)).ravel()
    e= librosa.feature.melspectrogram(timeseries,n_mels=20).ravel()
    f= librosa.feature.mfcc(timeseries,n_mfcc=40).ravel()
    g= librosa.feature.poly_features(timeseries).ravel()
    h= librosa.feature.rmse(timeseries).ravel()
    i= librosa.feature.spectral_bandwidth(timeseries).ravel() 
    j= librosa.feature.spectral_centroid(timeseries).ravel()
    k= librosa.feature.spectral_contrast(timeseries).ravel()
    l= librosa.feature.spectral_flatness(timeseries).ravel()
    m= librosa.feature.spectral_rolloff(timeseries).ravel()
    n= librosa.feature.tempogram(timeseries,win_length=10).ravel() 
    o= librosa.feature.tonnetz(timeseries).ravel()
    p= librosa.feature.zero_crossing_rate(timeseries).ravel() 
    features=np.concatenate((a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p))
    dataset.append(features)


dataset=np.array(dataset)
np.save('datasetNewGUNSHOTS',dataset)
labels = np.ones(dataset.shape[0])
labels=labels.reshape(labels.shape[0],1)
np.save('datasetNewGUNSHOTSlabels',labels)

#### Combining DataSet from all sources

In [None]:
datasetnew=np.load('datasetNewGUNSHOTS.npy')
labelsnew=np.load('labels.npy').ravel()
dataset=np.load('dataset3secCompleteFeatures.npy')
labels=np.load('dataset3secFullFeatureslabels.npy')
dataset=np.concatenate((dataset,datasetnew))
labels=np.concatenate((labels,labelsnew))

del datasetnew
del labelsnew

### Apply PCA

In [None]:
pca=PCA(50,copy=False)
dataset=pca.fit_transform(dataset)
labels=labels.reshape(-1,1)
joblib.dump(pca,'PCAOBJECTNEW.pkl')
np.save('PCAFeaturesNEW',dataset)

### train val test split

In [None]:
dataset=list(zip(dataset,labels))
dataset=sorted(dataset,key=lambda x: x[1][0])

train=dataset[31887:31887+15000]
train.extend(dataset[:22500])


val=dataset[31887+15000:31887+15000+700]
val.extend(dataset[22500:22500+8500])


test=dataset[31887+15000+700:31887+15000+700+137]
test.extend(dataset[22500+8500:22500+8500+887])


train=np.array(train)
val=np.array(val)
test=np.array(test)

np.random.shuffle(train)
np.random.shuffle(val)
np.random.shuffle(test)


x_train=[]
y_train=[]
for i in range(train.shape[0]):
    x_train.append(train[i][0])
    y_train.append(train[i][1])
x_train=np.array(x_train)
y_train=np.array(y_train)


x_val=[]
y_val=[]
for i in range(val.shape[0]):
    x_val.append(val[i][0])
    y_val.append(val[i][1])
x_val=np.array(x_val)
y_val=np.array(y_val)


x_test=[]
y_test=[]
for i in range(test.shape[0]):
    x_test.append(test[i][0])
    y_test.append(test[i][1])
x_test=np.array(x_test)
y_test=np.array(y_test)

np.save('x_train',x_train)
np.save('y_train',y_train)
np.save('x_val',x_val)
np.save('y_val',y_val)
np.save('x_test',x_test)
np.save('y_test',y_test)

### TRAINING MODELS

##### NEURAL NETWORK

In [None]:
model = Sequential()
model.add(Dense(30, input_dim=50, kernel_initializer='he_normal'))
model.add(BatchNormalization())
model.add(Activation('sigmoid'))
model.add(Dense(15, kernel_initializer='he_normal'))
model.add(BatchNormalization())
model.add(Activation('sigmoid'))
model.add(Dropout(0.4))
model.add(Dense(5, kernel_initializer='he_normal'))
model.add(BatchNormalization())
model.add(Activation('sigmoid'))
model.add(Dropout(0.4))
model.add(Dense(1, kernel_initializer='he_normal', activation='sigmoid'))

model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
history=model.fit(x_train, y_train, batch_size=512, epochs=50,validation_data=(x_val,y_val),verbose=0)

##### CONVOLUTIONAL NEURAL NETWORK

In [None]:
x_traincnn=x_train.reshape((37500,50,1))
x_valcnn=x_val.reshape((9200,50,1))
x_testcnn=x_test.reshape((1024,50,1))


model = Sequential()
model.add(Conv1D(12, 5, input_shape=(50,1),kernel_initializer='he_normal'))
model.add(BatchNormalization())
model.add(Activation('sigmoid'))
model.add(MaxPool1D(pool_size=2))
model.add(Conv1D(6, 5,kernel_initializer='he_normal'))
model.add(BatchNormalization())
model.add(Activation('sigmoid'))
model.add(Dropout(0.4))
model.add(MaxPool1D(pool_size=2))
model.add(Flatten())
model.add(Dense(10,kernel_initializer='he_normal'))
model.add(BatchNormalization())
model.add(Activation('sigmoid'))
model.add(Dropout(0.4))
model.add(Dense(5,kernel_initializer='he_normal'))
model.add(BatchNormalization())
model.add(Activation('sigmoid'))
model.add(Dense(1, activation='sigmoid'))

model.compile(loss='binary_crossentropy', optimizer='adam',metrics=['accuracy'])
call=ModelCheckpoint('CNNVALBESTNEW.h5',monitor='val_loss',save_best_only=True,verbose=0)
history=model.fit(x_traincnn, y_train, batch_size=512, epochs=50,validation_data=(x_valcnn,y_val),verbose=0,callbacks=[call])

#### Anomaly Detection Methods

In [None]:
x_train=np.load('x_train.npy')
x_val=np.load('x_val.npy')
x_test=np.load('x_test.npy')
Y_train=np.load('y_train.npy')
Y_val=np.load('y_val.npy')
Y_test=np.load('y_test.npy')


y_train=Y_train.ravel()
del Y_train
y_val=Y_val.ravel()
del Y_val
y_test=Y_test.ravel()
del Y_test

y_train=np.concatenate((y_train,y_val))
x_train=np.concatenate((x_train,x_val))

x_trainGunshots=x_train[y_train==1]
x_trainNotGunshots=x_train[y_train==0]

#####  ISOLATION FORESTS

In [None]:
clf = IsolationForest()
clf.fit(x_trainGunshots)

joblib.dump(clf,'IF.pkl')

##### SVMONECLASS

In [None]:
clf = svm.OneClassSVM(nu=0.01, kernel="",degree=1)
clf.fit(x_trainGunshots)

joblib.dump(clf,'SVMlinear.pkl')

##### AutoEncoders

In [None]:
input_dim = x_trainGunshots.shape[1]
encoding_dim = 100
input_layer = Input(shape=(input_dim, ))

encoder = Dense(encoding_dim,kernel_initializer='he_normal',activity_regularizer=regularizers.l1(10e-3))(input_layer)
encoder = Activation('relu')(encoder)
encoder = Dense(int(encoding_dim // 3),kernel_initializer='he_normal')(encoder)
encoder = Activation('relu')(encoder)

decoder = Dense(int(encoding_dim // 3),kernel_initializer='he_normal')(encoder)
decoder = Activation('relu')(decoder)
decoder = Dense(input_dim, activation='linear',kernel_initializer='he_normal')(decoder)

autoencoder = Model(inputs=input_layer, outputs=decoder)


autoencoder.compile(optimizer='adam', 
                    loss='mean_squared_error', 
                    metrics=['accuracy'])

checkpointer = ModelCheckpoint(filepath="autoencoderthird.h5",verbose=0,monitor='loss',save_best_only=True)

history = autoencoder.fit(x_trainGunshots, x_trainGunshots,epochs=100,batch_size=32,shuffle=True,verbose=0,callbacks=[checkpointer]).history