In [9]:
import pandas as pd 
import tensorflow as tf
import glob
import librosa
import numpy as np
from pydub import AudioSegment
from pydub.utils import make_chunks
import soundfile as sf
import os
import pickle


# Read in file paths and labels for audio data

In [6]:
file_paths = glob.glob("E:/Users/atag3/Documents/Gunshot_Data/*.wav")
labels = [os.path.basename(x) for x in glob.glob('E:/Users/atag3/Documents/Gunshot_Data/*.wav')]

## Create augmented data by adding gaussian noise, then take .5 second chunks of all audio clips as additional gunshot observations. 

In [None]:
# Looping through each audio file
for file in file_paths:
        
    # Loading in the audio file
    # add sr=32000 if foinf PANNS tranfer learning route.
    
    # read in audio data using librosa package
    y, sr = librosa.core.load(file, sr=44100)

    #trim empty sound in clips
    trimmed, index = librosa.effects.trim(y, top_db=20, frame_length=512, hop_length=512)

    # Create Noise to augment data while looping
    wav_n = y + 0.006*np.random.normal(0,1,len(y))
    sf.write(file+'_noise_add.wav',wav_n,sr,'PCM_16')

    # Create Noise augment data while looping
    #wav_n = y + 0.05*np.random.normal(0,1,len(y))
    #sf.write(file+'_noise_add.wav',wav_n,sr,'PCM_16')

#update the file paths variable to include the newly made nosie clips.
file_paths = glob.glob("E:/Users/atag3/Documents/Gunshot_Data/*.wav")
for file in file_paths:

    # read in audip clips again and chunk into smaller segments and save as new files.
    myaudio = AudioSegment.from_file(file, "wav") 
    chunk_length_ms = 500 # pydub calculates in millisec
    chunks = make_chunks(myaudio, chunk_length_ms) #Make chunks of one sec

    #Export all of the individual chunks as wav files
    for i, chunk in enumerate(chunks):
        chunk_name = file+"chunk{0}.wav".format(i)
        #print "exporting", chunk_name
        chunk.export(chunk_name, format="wav")

In [7]:
# Define a variable to ectract Mel-Frequency Cepstral Coefficients for each clip.

def extract_features(file_name):
   
    try:
        audio, sample_rate = librosa.load(file_name,sr=44100)
        
        # Originally tried using mel spectrogram output
        #mel_spect = librosa.feature.melspectrogram(y=trimmed, n_fft=1012, hop_length=256,n_mels=128, fmin=250)
        #mel_spect = librosa.amplitude_to_db(mel_spect, ref=np.max)

        #
        mfccs = librosa.feature.mfcc(y=audio, sr=sample_rate, n_mfcc=64)
        mfccs_processed = np.mean(mfccs.T,axis=0)
        
        # Adjusting the size to be 128 x 231
        #if mel_spect.shape[1] != 461:
        #   mel_spect.resize(128,461, refcheck=False)
        
    except Exception as e:
        print("Error encountered while parsing file: ", file_name)
        return None 
     
    return mfccs_processed

## Create df of features and labels

In [8]:
# rebuild list of file names and labels with augmented data. 
file_paths = glob.glob("E:/Users/atag3/Documents/Gunshot_Data/*.wav")
labels = [os.path.basename(x) for x in glob.glob('E:/Users/atag3/Documents/Gunshot_Data/*.wav')]

# empty list to store features.
features = []

# Iterate through each sound file and extract the features 
for i in range(len(file_paths)):
    class_label = labels[i].split("_")[0]
    data = extract_features(file_paths[i])
    
    features.append([data, class_label])

# Convert into a Panda dataframe 
featuresdf = pd.DataFrame(features, columns=['feature','class_label'])

print('Finished feature extraction from ', len(featuresdf), ' files')

Finished feature extraction from  7770  files


In [10]:
featuresdf.to_pickle('pickled_64_feat.pkl')