In [3]:
import librosa
import numpy as np
import tensorflow as tf
import os
import matplotlib.pyplot as plt
from tensorflow.keras.preprocessing import image

In [6]:
def normalize_audio(y):
    """Normalize the audio to a range of -1 to 1."""
    return y / np.max(np.abs(y))

def preprocess_audio(input_audio_path, target_sr=44100):
    # Load the audio file with the original sampling rate
    y, sr = librosa.load(input_audio_path, sr=None)
    
    # Resample the audio to the target sampling rate if necessary
    if sr != target_sr:
        y = librosa.resample(y, orig_sr=sr, target_sr=target_sr)
    
    # Normalize the audio volume
    y_normalized = normalize_audio(y)
    
    # Create the spectrogram
    spectrogram = librosa.feature.melspectrogram(y=y_normalized, sr=target_sr)
    spectrogram_db = librosa.amplitude_to_db(spectrogram, ref=np.max)
    
    # Plot the spectrogram without axes
    plt.figure(figsize=(2, 2))  # Size to match your desired input size
    librosa.display.specshow(spectrogram_db, sr=target_sr, x_axis='time', y_axis='mel')
    plt.axis('off')  # Remove axes for better formatting
    
    # Save the plot to a buffer and then load as an image
    plt.savefig('temp_spectrogram.png', bbox_inches='tight', pad_inches=0)
    plt.close()
    
    # Load the saved image as a numpy array
    spectrogram_img = image.load_img('temp_spectrogram.png', target_size=(128, 128))
    spectrogram_img = image.img_to_array(spectrogram_img)
    spectrogram_img = np.expand_dims(spectrogram_img, axis=0)  # Add batch dimension
    spectrogram_img = spectrogram_img / 255.0  # Normalize pixel values to [0, 1]
    
    # Optionally, remove the temporary file
    os.remove('temp_spectrogram.png')
    
    return spectrogram_img

In [9]:
def predict_gunshot(input_audio_path, model_path='../Trained_Models/gunshot_model.keras'):
    # Preprocess the audio file
    spectrogram_img = preprocess_audio(input_audio_path)
    
    # Load the trained model
    model = tf.keras.models.load_model(model_path)
    
    # Predict the gun type
    predictions = model.predict(spectrogram_img)
    predicted_class = np.argmax(predictions, axis=1)
    
    return predicted_class



In [15]:
# Example usage
input_audio_path = '../Gunshot_Audio_Dataset/Gunshot_Audio_Dataset_Raw/IMI Desert Eagle/2 (13).wav'
model_path = '../Trained_Models/gunshot_model.keras'

# Get the predicted gun type
predicted_class = predict_gunshot(input_audio_path, model_path)

print(f"The predicted gun type is: {predicted_class}")


The predicted gun type is: [0]
