In [12]:
import pandas as pd

df = pd.read_csv('../data/labels_int.csv')
df = df.sample(frac=1).reset_index(drop=True)

In [17]:
print (df['rhonchus_l'].append(df['rhonchus_r']).value_counts().sort_index())
print (df['whistling_l'].append(df['whistling_r']).value_counts().sort_index())

0    52
1    55
2    17
3    18
dtype: int64
0    83
1    59
dtype: int64


In [34]:
from audiomentations import Compose, AddGaussianNoise, TimeStretch, PitchShift, Shift
def augment_samples(samples, sample_rate=4000):
    augment = Compose([
        AddGaussianNoise(min_amplitude=0.001, max_amplitude=0.015, p=0.5),
        TimeStretch(min_rate=0.8, max_rate=1.25, p=0.5),
        PitchShift(min_semitones=-4, max_semitones=4, p=0.5),
        Shift(min_fraction=0.5, max_fraction=0.5, p=0.5)
    ])
    
    return augment(samples, sample_rate=sample_rate)

In [44]:
import librosa
import soundfile as sf

augmented_data = []
idx = 0
for index, row in df.iterrows():
    if 2 <= row['rhonchus_l'] <= 3:
        file_name = '../data/recordings/{id}/{recording}_L.wav'.format(id=row['seal_id'], recording=row['rec_name'])
        audio, sr = librosa.load(file_name, sr=None, res_type='kaiser_best')
        # Augment/transform/perturb the audio data
        for i in range(3):
            y = augment_samples(samples=audio, sample_rate=sr)
            sf.write('../data/augmented_recordings/sample{idx}.wav'.format(idx=idx), y, sr)
            augmented_data.append(['sample{idx}'.format(idx=idx), 'rhonchus_l', row['rhonchus_l'], row['rhonchus_r'], row['whistling_l'], row['whistling_r']])
            idx += 1
    elif 2 <= row['rhonchus_r'] <= 3:
        file_name = '../data/recordings/{id}/{recording}_R.wav'.format(id=row['seal_id'], recording=row['rec_name'])
        audio, sr = librosa.load(file_name, sr=None, res_type='kaiser_best')
        # Augment/transform/perturb the audio data
        for i in range(3):
            y = augment_samples(samples=audio, sample_rate=sr)
            sf.write('../data/augmented_recordings/sample{idx}.wav'.format(idx=idx), y, sr)
            augmented_data.append(['sample{idx}'.format(idx=idx), 'rhonchus_r', row['rhonchus_l'], row['rhonchus_r'], row['whistling_l'], row['whistling_r']])
            idx += 1
    idx += 1

augmented_df = pd.DataFrame(augmented_data, columns=['rec_name', 'row_id', 'rhonchus_l', 'rhonchus_r', 'whistling_l', 'whistling_r'])
augmented_df.to_csv('../data/augmented.csv')