In [2]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import librosa, librosa.display
import soundfile as sf
import random

import tensorflow as tf
from tensorflow import keras
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical

In [3]:
sig1name = ""
sig2name = ""
sig3name = ""
sig4name = ""
sig5name = ""
count = 0

## DATA AUGMENTATION FUNCTIONS

In [4]:
def addWhiteNoise(signal, noise_factor):
  noise = np.random.normal(0, signal.std(), signal.size)
  augmented_signal = signal + (noise*noise_factor)
  return augmented_signal

In [5]:
def timeStretch(signal, stretch_factor):
  return librosa.effects.time_stretch(signal, stretch_factor)

In [6]:
def pitchScaling(signal, sr, num_semitones):
  return librosa.effects.pitch_shift(signal, sr, num_semitones)

In [7]:
def invertPolarity(signal):
  return signal * -1

In [8]:
def randomGain(signal, min_gain_factor, max_gain_factor):
  gain_factor = random.uniform(min_gain_factor, max_gain_factor)
  return signal*gain_factor

## DATA AUGMENTATION FOR MARVIN

In [9]:
data_path_dict = {
    1: ["D:/Work/VIT/Capstone Project/Data/Total Denoised Data/Marvin Denoised/" + file_path for file_path in os.listdir("D:/Work/VIT/Capstone Project/Data/Total Noise Reduced Data/Marvin/")]
}

In [10]:
for class_label, audio_files in data_path_dict.items():
    for one_file in audio_files:
      sig, fs = librosa.load(one_file)
      sig1 = addWhiteNoise(sig, 0.5)
      sig2 = timeStretch(sig, 1)
      sig3 = pitchScaling(sig, fs, 2)
      sig4 = invertPolarity(sig)
      sig5 = randomGain(sig, 2, 4)
      sig1name = "D:/Work/VIT/Capstone Project/Data/Total Denoised Data/Marvin Denoised Augmented/marvin (" + str(count) +") AWGN.wav"
      sig2name = "D:/Work/VIT/Capstone Project/Data/Total Denoised Data/Marvin Denoised Augmented/marvin (" + str(count) +") timestretched.wav"
      sig3name = "D:/Work/VIT/Capstone Project/Data/Total Denoised Data/Marvin Denoised Augmented/marvin (" + str(count) +") pitchscaled.wav"
      sig4name = "D:/Work/VIT/Capstone Project/Data/Total Denoised Data/Marvin Denoised Augmented/marvin (" + str(count) +") invpolarity.wav"
      sig5name = "D:/Work/VIT/Capstone Project/Data/Total Denoised Data/Marvin Denoised Augmented/marvin (" + str(count) +") gain.wav"
      sf.write(sig1name, sig1, fs)
      sf.write(sig2name, sig2, fs)
      sf.write(sig3name, sig3, fs)
      sf.write(sig4name, sig4, fs)
      sf.write(sig5name, sig5, fs)
      count = count + 1

  
  


## DATA AUGMENTATION FOR NEGATIVE SAMPLES

In [11]:
nameArray = []
for file_path in os.listdir("D:/Work/VIT/Capstone Project/Data/Total Denoised Data/False Denoised/"):
  nameArray.append(file_path)

len(nameArray)

1769

In [12]:
for file in nameArray:
  audio = "D:/Work/VIT/Capstone Project/Data/Total Denoised Data/False Denoised/" + file
  sig, fs = librosa.load(audio)
  sig1 = addWhiteNoise(sig, 0.5)
  sig2 = timeStretch(sig, 1)
  sig3 = pitchScaling(sig, fs, 2)
  sig4 = invertPolarity(sig)
  sig5 = randomGain(sig, 2, 4)
  sig1name = "D:/Work/VIT/Capstone Project/Data/Total Denoised Data/False Denoised Augmented/AWGN_" + file
  sig2name = "D:/Work/VIT/Capstone Project/Data/Total Denoised Data/False Denoised Augmented/timestretched_" + file
  sig3name = "D:/Work/VIT/Capstone Project/Data/Total Denoised Data/False Denoised Augmented/pitchscaled_" + file
  sig4name = "D:/Work/VIT/Capstone Project/Data/Total Denoised Data/False Denoised Augmented/invpolarity_" + file
  sig5name = "D:/Work/VIT/Capstone Project/Data/Total Denoised Data/False Denoised Augmented/gain_" + file
  sf.write(sig1name, sig1, fs)
  sf.write(sig2name, sig2, fs)
  sf.write(sig3name, sig3, fs)
  sf.write(sig4name, sig4, fs)
  sf.write(sig5name, sig5, fs)

  
  


## VERIFYING DATASET SIZE

In [13]:
# Positive Sample Count
countpos = 0
for file_path in os.listdir("D:/Work/VIT/Capstone Project/Data/Total Denoised Data/Marvin Denoised Augmented"):
  countpos = countpos + 1

print(countpos) #1746*5

10476


In [16]:
# Negative Sample Count
countneg = 0
for file_path in os.listdir("D:/Work/VIT/Capstone Project/Data/Total Denoised Data/False Denoised Augmented/"):
  countneg = countneg + 1

print(countneg) #1769*5

10614
