In [10]:
import pandas as pd
import os
import shutil
from tqdm import tqdm
import librosa
import matplotlib.pyplot as plt
import soundfile as sf
from audiomentations import AddGaussianSNR,PitchShift, Compose,AddGaussianNoise,TimeStretch,Shift
import tensorflow as tf
import data_utils as du

In [5]:
def preprocessing_resample_aug(csv,input_path,output_path,sample_rate=16000, repeat=1,transform=None,df_aug_name='esc50_augmented.csv'):
    """
    Preprocess and resample audio files, with optional augmentation transformations.

    Args:
        csv (str): Path to the CSV file containing metadata of audio files.
        input_path (str): Path to the directory containing the original audio files.
        output_path (str): Path to the directory where preprocessed audio files will be saved.
        sample_rate (int, optional): Sampling rate for the audio files. Default is 16000.
        repeat (int, optional): Number of times to repeat the augmentation. Default is 1.
        transform (callable, optional): Transformation function to apply to the audio files. Default is None.
        df_aug_name (str, optional): Name of the CSV file to save metadata of augmented audio files. Default is 'esc50_augmented.csv'.
    """

    #read the csv file
    df=pd.read_csv(csv)
    
     #if folder exists, delete it
    if os.path.exists(output_path):
        shutil.rmtree(output_path)

    #iterate over the rows in the csv file
    if not os.path.exists(output_path):
        os.makedirs(output_path)
    
    if transform:
        df_aug=df.copy() #create a copy of the dataframe to add the augmented files


    for index, row in tqdm(df.iterrows()):

        if transform:
            y,_=librosa.load(input_path+row['filename'], sr=sample_rate)

            # augmentation
            for i in range(repeat):

                augmented_sound = transform(y, sample_rate=sample_rate)
                sf.write(output_path+str(i)+'__'+row['filename'], augmented_sound, sample_rate)
                #add the augmented file to the dataframe with the same columns
                df_aug.loc[len(df_aug)]=df.loc[index]
                df_aug.loc[len(df_aug)-1,'filename']=str(i)+'__'+row['filename']

                if i==0:
                    sf.write(output_path+row['filename'], y, sample_rate)
    
        else:
            y,_=librosa.load(input_path+row['filename'], sr=sample_rate)
            sf.write(output_path+row['filename'], y, sample_rate)

    if transform:
        df_aug.to_csv(output_path+df_aug_name,index=False)


In [6]:
# audio augmentation transformations
transform = Compose([
    AddGaussianNoise(min_amplitude=0.001, max_amplitude=0.015, p=0.5),
    TimeStretch(min_rate=0.8, max_rate=1.25, p=0.5),
    PitchShift(min_semitones=-4, max_semitones=4, p=0.5),
    Shift(min_shift=-0.5, max_shift=0.5, p=0.5),
])



In [18]:
# first dataset without augmentation: only resampling to 16k, total of 2000 files
preprocessing_resample_aug(csv='/mnt/ESC-50-master/meta/esc50.csv',
                input_path= '/mnt/ESC-50-master/audio/',
                output_path='/mnt/ESC-50-master/audio_16k/',
                sample_rate=16000,
                repeat=0,
                transform=None,
    )

2000it [01:45, 19.00it/s]


In [12]:

# second dataset with augmentation: resampling to 16k and augmentation with repeat=4, total of 10000 files
preprocessing_resample_aug(csv='/mnt/ESC-50-master/meta/esc50.csv',
                input_path= '/mnt/ESC-50-master/audio/',
                 output_path='/mnt/ESC-50-master/audio_16k_aug_r4/',
                    sample_rate=16000,
                    repeat=4,
                     transform=transform,
                     df_aug_name='esc50_augmented_r4.csv')


2000it [09:02,  3.69it/s]


In [13]:
df_aug=pd.read_csv('/mnt/ESC-50-master/audio_16k_aug_r4/esc50_augmented_r4.csv')
len(df_aug)

10000

In [2]:
### Deprecated function
#previous function to preprocess the data with augmentation and split into train and validation folders


def prepreprocessing_with_folders(csv_file,audio_dir,save_dir,freq=16000,val_split=0.2, transform=transform,repeat=1):
    """
    This function takes a csv file with the following columns: filename, fold, category, esc10, src_file, take
    and the audio directory where the audio files are stored. It then splits the data into train and validation
    folders and applies augmentation(augment=True) to the training set. each class is stored in a separate folder.
    The augmented files are repeated with the number of times specified in the repeat parameter.
    The probability of applying the augmentation is specified in the prob parameter.
    repeat = 1 means that the original file is copied once and augmented once.
    Parameters:
    """
    audio_data = pd.read_csv(csv_file)
    audio_data=audio_data.sample(frac=1).reset_index(drop=True)
    grouped = audio_data.groupby('category')



    for name,group in tqdm(grouped):
        

        if name:
            if not (os.path.isdir(save_dir+"train/"+name) and os.path.isdir(save_dir+"val/"+name)):
                os.makedirs(save_dir+"train/"+name)
                os.makedirs(save_dir+"val/"+name)   

            j=0
            for i in group['filename']:
                #len of group
                len_group = len(group)
                y,_ = librosa.load(audio_dir+i, sr=freq)
                # copy sample from original dataset into validation with no augmentation
                if j<len_group*val_split:
                    sf.write(save_dir+"val/"+name+'/'+i, y, freq)
    
                # copy sample from original dataset into train with augmentation
                else:
                    if transform is not None:
                            for k in range(repeat):
                                transformed = transform(y, sample_rate=16000)
                                sf.write(save_dir+"train/"+name+'/'+i[:-4]+str(k)+'.wav', transformed, freq)
                                if k == 0:
                                    sf.write(save_dir+"train/"+name+'/'+i, y, freq) # append also original file
                    else:
                        sf.write(save_dir+"train/"+name+'/'+i, y, freq)
                j+=1



In [3]:
prepreprocessing_with_folders('/mnt/ESC-50-master/meta/esc50.csv',
                 '/mnt/ESC-50-master/audio/',
                 '/mnt/ESC-50-master/audio_aug/',
                 freq=16000,val_split=0.3,augment=True, repeat=5,prob=0.5)

100%|██████████| 50/50 [10:26<00:00, 12.53s/it]
