## Data Augmentation

In [None]:
### Install needed libraries
!apt-get install ffmpeg
!pip3 install audiomentations
!pip3 install pydub

In [None]:
import numpy as np
import pydub
import os
import re
from tqdm import tqdm
from audiomentations import Compose, AddGaussianNoise, TimeStretch, PitchShift, Shift
import multiprocessing as mp



In [None]:
##########################################################
####  mp3 read and write methods. Credit to Stackoverflow
####  user Basj for the starting point for these 
####  methods in his answer to:
####  https://stackoverflow.com/questions/53633177/how-to-read-a-mp3-audio-file-into-a-numpy-array-save-a-numpy-array-to-mp3?noredirect=1&lq=1
####
####  The calling function passes a filename to
####  be read or written
####
####  Note the normalization - MP3s are rarely anything
####  other than 16 bit (signed 15 bit number), but there
####  is a faint chance this wouldn't work.
##########################################################

def mp3_read(filename, normalized=False):
    """Read mp3 file to numpy array"""
    with open(filename, "rb") as f:
        a = pydub.AudioSegment.from_mp3(f)
        y = np.array(a.get_array_of_samples()).astype('float64')
        if a.channels == 2:
            y = y.reshape((-1, 2))
            
        if normalized:
            return a.frame_rate, np.float32(y) / 2**15
        else:
            return a.frame_rate, y
    f.close()

    
def mp3_write(filename, sr, x, normalized=False):
    """Write numpy array to mp3 file"""
    with open(filename, "wb") as f:
        channels = 2 if (x.ndim == 2 and x.shape[1] == 2) else 1
        if normalized:  # normalized array - each item should be a float in [-1, 1)
            y = np.int16(x * 2 ** 15)
        else:
            y = np.int16(x)
        clip = pydub.AudioSegment(y.tobytes(), frame_rate=sr, sample_width=2, channels=channels)
        clip.export(f, format="mp3", bitrate=f"{sr * 2**16}")
    f.close


In [None]:
sample_rate, y = mp3_read("data/birdsong-recognition/train_audio/aldfly/XC134874.mp3")
print(sample_rate)
print(y.min())
print(y.shape)
print(y.dtype)

In [None]:

def augment(source_file,dest_file,augmentation):
    """
    augment() - used for the multiprocessing
    later on to accelerate the creation of all the
    augmentations as fast as possible
    
    source_file:  path and filename of source sample
    dest_file:    path and filename of output sample
    augmentation: an instance of audiomentations.Compose
                  containing the augmentation(s)    
    
    If passed a stereo file, it will create two mono files
    from the two channels. If the passed file does not exist,
    then we look for it in the stereo subdirectory. If it
    does exist, it gets moved there after the split.
    """

    print(f"source_file: {source_file}")
#    print(f"dest_file: {dest_file}")

    source_file0 = f"{source_file[:-4]}.0{source_file[-4:]}"
    source_file1 = f"{source_file[:-4]}.1{source_file[-4:]}"
    dest_file0 = f"{dest_file[:-4]}.0{dest_file[-4:]}"
    dest_file1 = f"{dest_file[:-4]}.1{dest_file[-4:]}"
    
#    print(f"source_file0: {source_file0}")
#    print(f"dest_file0: {dest_file0}")
#    print(f"source_file1: {source_file1}")
#    print(f"dest_file1: {dest_file1}")


    if( os.path.isfile(source_file0) and os.path.isfile(source_file1) and not(os.path.isfile(source_file))):
        # the stereo split has already been done, so augment each of 
        # the files in turn then exit
        print("File has already been split into mono files")
        augment(source_file0,dest_file0,augmentation)
        augment(source_file1,dest_file1,augmentation)

    else:
        # The file is either mono already or not yet split
        # Read information from the mp3 file
        sample_rate, chirp = mp3_read(source_file)

#        print(sample_rate, chirp.shape, chirp.dtype)

        # If the chirp file is in stereo (two column array)
        # then create two mono files from the two channels
        # and run the augmentation on each of them.

        if (len(chirp.shape) == 2 and chirp.shape[1] == 2):
            print("Recording is in stereo, splitting into two mono files")

            # write the split mono files
            mp3_write(source_file0, sample_rate, chirp[:,0], normalized=False)
            mp3_write(source_file1, sample_rate, chirp[:,1], normalized=False)

            # run augmentation on each
            augment(source_file0,dest_file0,augmentation)
            augment(source_file1,dest_file1,augmentation)          
            
            # move the stereo version of the file to a new location
            print(f"moving {source_file} to {os.path.dirname(source_file)}/stereo/{os.path.basename(source_file)}")
            os.rename(source_file, f"{os.path.dirname(source_file)}/stereo/{os.path.basename(source_file)}")
         
        else:
            # file is mono, and we can process directly
            print("file is already in mono")
            new_chirp = augmentation(chirp, sample_rate=sample_rate) 

            print(new_chirp.shape)

            # Write out the new chirp audio to an mp3
            mp3_write(dest_file, sample_rate, new_chirp, normalized=False)


In [None]:
###########################################################
###  Process a single directory of bird sounds
###########################################################

def augment_bird(bird):
        
        print(f"Creating augmented samples for {bird}")
        
        input_dir = os.path.join("data/birdsong-recognition/train_audio/", bird)

        
        # only enhance files that start with X - the generated files start with A
        for chirp in tqdm([f for f in os.listdir(input_dir) if re.match(r'X.*\.mp3', f)]):

#            print(f"chirp: {chirp}")
#            print(f"input_dir: {input_dir}")
#            print(f"chirp: {chirp}")
            
            chirp_file = os.path.join(input_dir, chirp)

            # Augmentation 1 - random time shift + or - up to 0.5s
            augmentation = Compose([Shift(min_fraction=-0.5, max_fraction=0.5, p=0.5)])
            dest_file =  os.path.join(input_dir, f"A.{chirp[:-4]}.ts0.5{chirp[-4:]}")
            augment(chirp_file, dest_file, augmentation)

            # Augmentation 2 - random frequency shift up or down by up to 2 semitones
            augmentation = Compose([PitchShift(min_semitones=-2, max_semitones=2, p=0.5)])
            dest_file =  os.path.join(input_dir, f"A.{chirp[:-4]}.fs2{chirp[-4:]}")
            augment(chirp_file, dest_file, augmentation)

            # Augmentation 3 - random time expansion / contraction up to 20%
            augmentation = Compose([TimeStretch(min_rate=0.8, max_rate=1.2, p=0.5)])
            dest_file =  os.path.join(input_dir, f"A.{chirp[:-4]}.ex20{chirp[-4:]}")
            augment(chirp_file, dest_file, augmentation)

            # Augmentation 4 - add gaussian noise between 0.001 and 0.015 amplitude
            augmentation = Compose([AddGaussianNoise(min_amplitude=0.001, max_amplitude=0.015, p=0.5)])
            dest_file =  os.path.join(input_dir, f"A.{chirp[:-4]}.gn015{chirp[-4:]}")
            augment(chirp_file, dest_file, augmentation)
                                  
            
        print(f"Augmentation complete for {os.path.basename(input_dir)}")

In [None]:
###########################################################
### Main function
###########################################################


def main():

    np.random.seed(1234)
    
    # BIRDS = os.listdir("data/birdsong-recognition/train_audio/")[0:20]
    birds = ['aldfly']

    # comment out the next line and uncomment the following one if running
    # on non-hyperthreaded CPU cores. Assigns one thread per physical core
    # reserving one core for OS, IO, compression etc. when using cloud
    # object storage
    threads = int((mp.cpu_count() /2) - 1)
    # threads = int(mp.cpu_count() -1)

    # Handle single-core machines
    if (threads < 1): threads = 1

#    pool = mp.Pool(threads)
    
#    pool.map(augment_bird, birds)

    augment_bird(birds[0])
        
       

if __name__ == "__main__":
    main()
                   
        
