# **Data Augmentation for Audio**

##### **Please note paths are in Linux/Mac format**

#### Using 4-Mic Array, sounds were recorded for each of the three categories of interest:
##### **1. Siren**
##### **2. Car Horn**
##### **3. Gun Shot**
#### The sound recordings from the microphone array underwent data augmentation to generate a larger dataset for training. This way, the model will be trained on environmental sounds, allowing it to be able to differentiate between noise and actual signal more easily

In [2]:
# Import necessary libraries

import os
import librosa
import librosa.display
import numpy as np
import matplotlib.pyplot as plt
import IPython.display as ipd

#### Data Augmentation Functions
##### Different functions were created to introduce some factor of random noise, shift the data along the time axis, speed up the audio files, as well as modify the pitch in the sounds. These modified files will then be fed into the model with their respective categorical classifications

In [3]:
# Noise Injection

def add_noise(data):
    noise_factor = 0.05
    noise = np.random.randn(len(data))
    data_noise = data + noise_factor * noise
    # Cast back to same data type
    data_noise = data_noise.astype(type(data[0]))
    return data_noise

# Shifting
def right_shift(data):
    sampling_rate = 22050
    shift_max = 3
    shift = np.random.randint(sampling_rate * shift_max)
    shift = -shift
    augmented_data = np.roll(data, shift)
    
    # Set to silence for heading/ tailing
    if shift > 0:
        augmented_data[:shift] = 0
    else:
        augmented_data[shift:] = 0
    return augmented_data

# Speed
def speed_up(data):
    speed_factor = 2
    return librosa.effects.time_stretch(data, speed_factor)

# Pitch
def change_pitch(data):
    pitch_factor = 0.2
    sampling_rate = 22050
    return librosa.effects.pitch_shift(data, sampling_rate, pitch_factor)

In [6]:
# Load various imports 
import pandas as pd
import os
import librosa
from scipy.io.wavfile import write
from pathlib import Path

# Set the path to the full UrbanSound dataset
root_path = Path(os.getcwd()).parent.parent # Software Folder
fulldatasetpath = root_path / "Training_Dataset" / "audio"
metadata = pd.read_csv(root_path / "Training_Dataset" / "metadata" / "micdata.csv")
categories = ['car_horn', 'gun_shot', 'siren']

lenVars = []

# New Dataframe
dataFrame = pd.DataFrame(columns=['slice_file_name','fold','class_name'])

# Iterate through each sound file and extract the number of frames 
for index, row in metadata.iterrows():
    
    # Extract filename and category
    category_str = row["class_name"]
    slicefile_name = str(row["slice_file_name"])
    fold_name = str(row["fold"])
    
    file_name = os.path.join(os.path.abspath(fulldatasetpath),'fold'+fold_name+'/',slicefile_name)
    outfile_path = file_name.rsplit('.wav',1)[0]
    recfile_name = outfile_path.rsplit('/',1)[1]
    outfile_path = outfile_path.rsplit('/',1)[0]
    audio_1, sample_rate = librosa.load(file_name, res_type='kaiser_fast')

    # Run Data Augmentation functions, write to file, append the CSV metadata dataframe
    audio_2 = add_noise(audio_1)
    noisepath_1 = os.path.join(outfile_path, recfile_name + "_noise1.wav")
    write(noisepath_1, sample_rate, audio_2)
    
    audio_3 = speed_up(audio_1)
    noisepath_2 = os.path.join(outfile_path, recfile_name + "_noise2.wav")
    write(noisepath_2, sample_rate, audio_3)
    
    audio_4 = change_pitch(audio_1)
    noisepath_3 = os.path.join(outfile_path, recfile_name + "_noise3.wav")
    write(noisepath_3, sample_rate, audio_4)
    
    audio_5 = right_shift(audio_1)
    noisepath_4 = os.path.join(outfile_path, recfile_name + "_noise4.wav")
    write(noisepath_4, sample_rate, audio_5)

   # print(noisepath_1.rsplit('/', 1)[1])
    # Now, create the five files for dataframe to include the noise files and original audio
    row1 = {'slice_file_name': file_name.rsplit('/',1)[1], 'fold': fold_name, 'class_name': category_str}
    dataFrame = dataFrame.append(row1, ignore_index=True)
    row = {'slice_file_name': noisepath_1.rsplit('/', 1)[1], 'fold': fold_name, 'class_name': category_str}
    dataFrame = dataFrame.append(row, ignore_index=True)
    row = {'slice_file_name': noisepath_2.rsplit('/', 1)[1], 'fold': fold_name, 'class_name': category_str}
    dataFrame = dataFrame.append(row, ignore_index=True)
    row = {'slice_file_name': noisepath_3.rsplit('/', 1)[1], 'fold': fold_name, 'class_name': category_str}
    dataFrame = dataFrame.append(row, ignore_index=True)
    row = {'slice_file_name': noisepath_4.rsplit('/', 1)[1], 'fold': fold_name, 'class_name': category_str}
    dataFrame = dataFrame.append(row, ignore_index=True)

# Add additional columns to the dataframe
dataFrame.insert(1, 'fsID', "")
dataFrame.insert(2, 'start', "")
dataFrame.insert(3, 'end', "")
dataFrame.insert(4, 'salience', "")
dataFrame.insert(6, 'classID', "")

# Display DataFrame
display(dataFrame)

Unnamed: 0,slice_file_name,fsID,start,end,salience,fold,classID,class_name
0,siren1.wav,,,,,_siren,,siren
1,siren1_noise1.wav,,,,,_siren,,siren
2,siren1_noise2.wav,,,,,_siren,,siren
3,siren1_noise3.wav,,,,,_siren,,siren
4,siren1_noise4.wav,,,,,_siren,,siren
...,...,...,...,...,...,...,...,...
235,gunshot15.wav,,,,,_gunshot,,gun_shot
236,gunshot15_noise1.wav,,,,,_gunshot,,gun_shot
237,gunshot15_noise2.wav,,,,,_gunshot,,gun_shot
238,gunshot15_noise3.wav,,,,,_gunshot,,gun_shot


In [14]:
# Export dataframe to CSV
outFile = root_path / "Training_Dataset" / "metadata" / 'NewDataSet.csv'
dataFrame.to_csv(outFile, index=False)

In [15]:
# Import csv to dataframe to check
df = pd.read_csv(outFile)
display(df)

Unnamed: 0,slice_file_name,fsID,start,end,salience,fold,classID,class_name
0,siren1.wav,,,,,_siren,,siren
1,siren1_noise1.wav,,,,,_siren,,siren
2,siren1_noise2.wav,,,,,_siren,,siren
3,siren1_noise3.wav,,,,,_siren,,siren
4,siren1_noise4.wav,,,,,_siren,,siren
...,...,...,...,...,...,...,...,...
235,gunshot15.wav,,,,,_gunshot,,gun_shot
236,gunshot15_noise1.wav,,,,,_gunshot,,gun_shot
237,gunshot15_noise2.wav,,,,,_gunshot,,gun_shot
238,gunshot15_noise3.wav,,,,,_gunshot,,gun_shot
