In [None]:
!pip install tensorflow tensorflow-io matplotlib xgboost catboost

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
#necessary libraries
import os
from matplotlib import pyplot as plt
import tensorflow as tf
import tensorflow_io as tfio
import numpy as np
import pywt
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Conv2D, Dense, Flatten, Dropout
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier, VotingClassifier, AdaBoostClassifier
from sklearn.naive_bayes import GaussianNB
from catboost import CatBoostClassifier
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, confusion_matrix
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score,f1_score

#function to load audio file
def load_wav_16k_mono(filename):
    # Load encoded wav file
    file_contents = tf.io.read_file(filename)
    # Decode wav (tensors by channels)
    wav, sample_rate = tf.audio.decode_wav(file_contents, desired_channels=1)
    # Removes trailing axis
    wav = tf.squeeze(wav, axis=-1)
    sample_rate = tf.cast(sample_rate, dtype=tf.int64)
    # Goes from 44100Hz to 16000hz - amplitude of the audio signal
    wav = tfio.audio.resample(wav, rate_in=sample_rate, rate_out=16000)
    return wav

#listing gunshot n non gunshot files
POS = '/content/drive/MyDrive/aidataset/gunshot'
NEG = '/content/drive/MyDrive/aidataset/nongunshot'
pos = tf.data.Dataset.list_files(POS+'/*.wav')
neg = tf.data.Dataset.list_files(NEG+'/*.wav')

#labeling positive as 1 and negative as 0
positives = tf.data.Dataset.zip((pos, tf.data.Dataset.from_tensor_slices(tf.ones(len(pos)))))
negatives = tf.data.Dataset.zip((neg, tf.data.Dataset.from_tensor_slices(tf.zeros(len(neg)))))

#calculating the quotient n remainder
repeat_count = len(negatives) // len(positives)
remainder = len(negatives) % len(positives)
#oversampling the minority
new_positives = positives.repeat(repeat_count) #repeat that many times as quotient
new_positives = new_positives.concatenate(positives.take(remainder)) #add that many as remainder
data = new_positives.concatenate(negatives) #concatenate gunshot n nonshot

# function provided in tenserflow
def preprocess(file_path, label):
    wav = load_wav_16k_mono(file_path)
    wav = wav[:40000]
    #padding to make equal wav
    zero_padding = tf.zeros([40000] - tf.shape(wav), dtype=tf.float32)
    wav = tf.concat([zero_padding, wav], 0)
    spectrogram = tf.signal.stft(wav, frame_length=320, frame_step=32) #short term fourier transform
    spectrogram = tf.abs(spectrogram) #taking absolute of it
    spectrogram = tf.expand_dims(spectrogram, axis=2)
    return spectrogram, label

#preprocess, shuffling n batch for data
data = data.map(preprocess)
data = data.cache()
data = data.shuffle(buffer_size=5000) #shuffling of data
data = data.batch(16)
data = data.prefetch(8)

#spliting dataset to train,validation n test
train = data.take(130)
test = data.skip(130).take(30)
ttest = data.skip(160).take(20)

#CNN model
model = Sequential()
model.add(Conv2D(32, (3,3), activation='relu', input_shape=(1241, 257, 1)))
model.add(Conv2D(32, (3,3), activation='relu'))
model.add(Dropout(0.5))  # Add dropout layer with dropout rate of 0.5
model.add(Flatten())
model.add(Dense(16, activation='relu'))
model.add(Dropout(0.5))  #Add dropout layer with dropout rate of 0.5

# Create a feature extraction model
feature_extractor = Model(inputs=model.input, outputs=model.layers[-1].output)

# Extract features for training dataset
features = []
labels = []
for spectrogram, label in train:
    extracted_features = feature_extractor.predict(spectrogram)
    features.append(extracted_features)
    labels.append(label)

features = np.concatenate(features, axis=0)
labels = np.concatenate(labels, axis=0)

# Extract features from test dataset
test_features = []
test_labels = []
for spectrogram, label in test:
    extracted_features = feature_extractor.predict(spectrogram)
    test_features.append(extracted_features)
    test_labels.append(label)

test_features = np.concatenate(test_features, axis=0)
test_labels = np.concatenate(test_labels, axis=0)

# Define classifiers
classifiers = {
    "KNN (k=2)": KNeighborsClassifier(n_neighbors=2),
    "KNN (k=3)": KNeighborsClassifier(n_neighbors=3),
    "SVM": SVC(),
    "Random Forests": RandomForestClassifier(),
    "Naive Bayes": GaussianNB(),
    "CatBoost": CatBoostClassifier(),
    "XGBoost": XGBClassifier(),
}
#MLP Classifier
mlp_classifier = MLPClassifier(hidden_layer_sizes=(64, 32),  #structure of the MLP
                                       activation='relu',  # Activation function for hidden layers
                                       solver='adam',  # Optimization algorithm
                                       batch_size=16,
                                       random_state=42)  # Random state for reproducibility

# Define the parameter grid for hyperparameter tuning
param_grid = {'max_iter': range(1, 21)}
from sklearn.model_selection import RandomizedSearchCV
# Perform Randomized Search CV
random_search = RandomizedSearchCV(mlp_classifier, param_distributions=param_grid, n_iter=10)
random_search.fit(features, labels)

# Get the best parameters
print("Best parameters:", random_search.best_params_)
 #Repeat for Various Values of max iteration
accuracies_project = []
for k in range(1, 20):
    mlp_classifier = MLPClassifier(hidden_layer_sizes=(64, 32),  #structure of the MLP
                                       activation='relu',  # Activation function for hidden layers
                                       solver='adam',  # Optimization algorithm
                                       batch_size=16,
                                      max_iter = k ,
                                       random_state=42)  # Random state for reproducibility

    mlp_classifier.fit(features, labels)
    accuracy_project = mlp_classifier.score(test_features, test_labels)
    accuracies_project.append(accuracy_project)

plt.plot(range(1, 20), accuracies_project)
plt.xlabel('k')
plt.ylabel('Accuracy')
plt.title('Accuracy vs k for max_iter in MLP Classifier')
plt.show()

# Compute evaluation metrics for Voting Classifier
accuracy_results['Voting Classifier'] = accuracy_score(test_labels, y_pred_voting)
precision_results['Voting Classifier'] = precision_score(test_labels, y_pred_voting)
recall_results['Voting Classifier'] = recall_score(test_labels, y_pred_voting)
f1_results['Voting Classifier'] = f1_score(test_labels, y_pred_voting)
confusion_matrices['Voting Classifier'] = confusion_matrix(test_labels, y_pred_voting)
train_accuracy_results['Voting Classifier'] = accuracy_score(labels, y_train_pred_voting)

# Print results
for name, result in accuracy_results.items():
    print(f"Classifier: {name}")
    print(f"Test Accuracy: {result}")
    print(f"Train Accuracy: {train_accuracy_results[name]}")
    print(f"Precision: {precision_results[name]}")
    print(f"Recall: {recall_results[name]}")
    print(f"F1 Score: {f1_results[name]}")
    print(f"Confusion Matrix:\n{confusion_matrices[name]}\n")