In [1]:
import os
import pandas as pd
import shutil
import torchaudio
import torch
import time
from tqdm import tqdm 
from audiomentations import Compose, AddGaussianNoise, TimeStretch, PitchShift, Shift, Gain, Clip

In [2]:
wav_path = 'tomofun_train'
df = pd.read_csv("tomofun_train.csv")

In [3]:
apply_augmentation_0 = Compose([
        TimeStretch(min_rate=0.5, max_rate=1, p=1),
        PitchShift(min_semitones=-4, max_semitones=4, p=1)
    ])

apply_augmentation_1 = Compose([
        TimeStretch(min_rate=1, max_rate=1.5, p=1),
        PitchShift(min_semitones=-4, max_semitones=4, p=1)
    ])

apply_augmentation_2 = Compose([
        AddGaussianNoise(min_amplitude=0.001, max_amplitude=0.015, p=1),
        Shift(min_fraction=1, max_fraction=1, rollover=True, p=1)
    ])

apply_augmentation_3 = Compose([
        Gain(min_gain_in_db=-10, max_gain_in_db=10, p=0.5)
    ])

apply_augmentation_4 = Compose([
        Clip(a_min=-1.0, a_max=1.0, p=0.5)
    ])

apply_augmentation_list = [apply_augmentation_0, apply_augmentation_1, apply_augmentation_2, apply_augmentation_3, apply_augmentation_4]

In [4]:
wav_aug_path = 'tomofun_aug_train'

try:
    if not os.path.exists(wav_aug_path):
        os.mkdir(wav_aug_path)
except:
    pass

In [5]:
aug_list = []
start_time = time.time()

for i in tqdm(os.listdir(wav_path)):
    data, sr = torchaudio.load(os.path.join(wav_path, i))
    wav_label = df.loc[df["fname"] == i]['label'].values[0]
    shutil.copy(os.path.join(wav_path, i), os.path.join(wav_aug_path, i))
    
    for j in range(len(apply_augmentation_list)):
        audio_data = apply_augmentation_list[j](samples=data.numpy(), sample_rate=sr)
        new_wav_name = "aug_" + str(j) + "_" + i
        torchaudio.save(os.path.join(wav_aug_path, new_wav_name), torch.tensor(audio_data), sample_rate=sr)
        aug_list.append({"fname":new_wav_name, "label":wav_label})
    
end_time = time.time()

100%|██████████| 1200/1200 [04:43<00:00,  4.24it/s]


In [6]:
print(end_time - start_time)

283.1262562274933


In [7]:
df = df.append(aug_list, ignore_index = True)
df.to_csv("tomofun_aug_train.csv", index=False)