In [3]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import librosa
import matplotlib.pyplot as plt
import os
import cv2
import IPython.display as ipd

In [4]:
def get_spectrogram(wav):
    D = librosa.stft(wav, n_fft=480, hop_length=160,
                     win_length=480, window='hamming')
    spect, phase = librosa.magphase(D)
    return spect

In [5]:
class Augmenter:
    def __init__(self,wav,sr):
        self.wav = wav
        self.sr = sr
        
    def time_shift(self):
        start_ = int(np.random.uniform(-self.wav.shape[0]*0.5,self.wav.shape[0]*0.5))
        if start_ >= 0:
            wav_time_shift = np.r_[self.wav[start_:], np.random.uniform(-0.001,0.001, start_)]
        else:
            wav_time_shift = np.r_[np.random.uniform(-0.001,0.001, -start_), self.wav[:start_]]
        return wav_time_shift,self.sr
    
    def change_pitch(self):
        # magnitude = x. 0<x<1 slows down, x=1 identity, x>1 speeds up
        magnitude = int(np.random.uniform(-10,10))
        return librosa.effects.pitch_shift(self.wav,self.sr,magnitude), self.sr
    
    def change_speed(self,lower=0.7,upper=1.3):
        speed_rate = np.random.uniform(lower,upper)
        wav_speed_tune = cv2.resize(self.wav, (1, int(len(self.wav) * speed_rate))).squeeze()
        if len(wav_speed_tune) < self.wav.shape[0]:
            pad_len = self.wav.shape[0] - len(wav_speed_tune)
            wav_speed_tune = np.r_[np.random.uniform(-0.001,0.001,int(pad_len/2)),wav_speed_tune,np.random.uniform(-0.001,0.001,int(np.ceil(pad_len/2)))]
        else: 
            cut_len = len(wav_speed_tune) - self.wav.shape[0]
            wav_speed_tune = wav_speed_tune[int(cut_len/2):int(cut_len/2)+self.wav.shape[0]]
        return wav_speed_tune, self.sr
    
    def change_volume(self,magnitude):
        # magnitude = x. 0<x<1 quieter, x=1 identity, x>1 louder
        print(self.wav)
        r = np.multiply(np.array([magnitude]),self.wav)
        print(r)
        return r,self.sr
    
    def add_background(self,sound_directory):
        bg_files = os.listdir(sound_directory)
        bg_files.remove(chosen_file)
        chosen_bg_file = bg_files[np.random.randint(len(bg_files))]
        bg, sr = librosa.load(sound_directory+chosen_bg_file, sr=None)
        ceil = max((bg.shape[0]-self.wav.shape[0]),1)
        start_ = np.random.randint(ceil)
        bg_slice = bg[start_ : start_+self.wav.shape[0]]
        if bg_slice.shape[0]<self.wav.shape[0]:
            pad_len = self.wav.shape[0] - bg_slice.shape[0]
            bg_slice = np.r_[np.random.uniform(-0.001,0.001,int(pad_len/2)),bg_slice,np.random.uniform(-0.001,0.001,int(np.ceil(pad_len/2)))]
        wav_with_bg = self.wav * np.random.uniform(0.8, 1.2) + bg_slice * np.random.uniform(0, 0.5)
        return wav_with_bg, self.sr

In [6]:
EPS = 1e-8
sound_directory = '/Users/gabe/Desktop/REU_Data_organized/Train/'
chosen_file = '35.wav'
file_path = sound_directory+chosen_file
wav, sr = librosa.load(file_path, sr=None)
aug = Augmenter(wav,sr)
wav_s, sr_s = aug.change_speed()
wav_b, sr_b = aug.add_background(sound_directory)
wav_t, sr_t = aug.time_shift()
wav_p, sr_p = aug.change_pitch()
wav_v, sr_v = aug.change_volume(0.5)
wav_v_2, sr_v_2 = aug.change_volume(2.0)
ipd.display(ipd.Audio(wav, rate=sr))
ipd.display(ipd.Audio(wav_s, rate=sr_s))
ipd.display(ipd.Audio(wav_b, rate=sr_b))
ipd.display(ipd.Audio(wav_t, rate=sr_t))
ipd.display(ipd.Audio(wav_p, rate=sr_p))
ipd.display(ipd.Audio(wav_v, rate=sr_v))
ipd.display(ipd.Audio(wav_v_2, rate=sr_v_2))

[ 0.00059509 -0.00012207  0.00028992 ...  0.          0.
  0.        ]
[ 2.97546387e-04 -6.10351562e-05  1.44958496e-04 ...  0.00000000e+00
  0.00000000e+00  0.00000000e+00]
[ 0.00059509 -0.00012207  0.00028992 ...  0.          0.
  0.        ]
[ 0.00119019 -0.00024414  0.00057983 ...  0.          0.
  0.        ]
