In [1]:
import pandas as pd
from math import ceil
from scipy import signal
from scipy.signal import get_window
from numpy.random import RandomState
from librosa.filters import mel
import nlpaug.augmenter.audio as naa
import nlpaug.augmenter.spectrogram as nas
from tqdm import tqdm
import os
import gc
import torch
import numpy as np
import random
import librosa
import torchaudio
import soundfile as sf

In [2]:
sr = 44100
sr_a = 16000
n_fft = 1102
hop_length = 441
duration = 70
spacing = 5
use_autovc = False
use_augment = True

In [3]:
aug0 = naa.MaskAug(sampling_rate=sr_a, zone=(0,1), coverage=0.3)
aug1 = naa.LoudnessAug(zone=(0,1), factor=(0.3, 3))
aug3 = naa.PitchAug(sampling_rate=sr_a, zone=(0,1))
aug4 = naa.VtlpAug(zone=(0,1), sampling_rate=sr_a, factor=(0.5,3), coverage=1) # needs padding after!
aug5 = naa.ShiftAug(sampling_rate=sr_a, duration=0.5)
aug6 = naa.SpeedAug(factor=(0.5,1)) # needs cropping after!
aug7 = naa.NormalizeAug(coverage=1, method='max')
aug8 = nas.FrequencyMaskingAug()
aug9 = nas.TimeMaskingAug()

In [4]:
def save_wavs(segments_aug: list, output_path: str, file_name: str):
    """将增强后的文件保存"""
    for i, segment in enumerate(segments_aug):
        # Resample the audio to 16000 Hz
        waveform = torchaudio.transforms.Resample(sr, 16000)(segment)
        # save enhanced audio to file
        if not os.path.exists(output_path):
            os.makedirs(output_path)
        torchaudio.save(f'{output_path}/{file_name}_{i}.wav', waveform, 16000)

In [5]:
def vc_to_mel(x, augmentation_type="notspec"):

    S = librosa.power_to_db(
                    librosa.feature.melspectrogram(
                        x.numpy().astype(np.float32)/80, sr=sr, n_fft=n_fft,
                        hop_length=hop_length),ref=np.max)
    if augmentation_type=="specaugment":
        S = aug8.augment(S)
        S = aug9.augment(S)
    return S

In [6]:
def random_augmentation(segment, aug_num):
    """
    There are 8 augmentations
    0 - Masking
    1 - Loudness
    2 - Noise
    3 - Pitch
    4 - VTLP
    5 - Shift
    6 - Speed
    7 - Normalization
    """
    dur_a = 705600 * 7 # sample_rate = 44100
    if aug_num == 0: # masking
        aug_segment = aug0.augment(segment.detach().numpy())[0]

    elif aug_num == 1: # loudness
        aug_segment = aug1.augment(segment.detach().numpy())[0]

    elif aug_num == 2: # noise
        noise = np.random.randn(len(segment))
        aug_segment = segment.detach().numpy() + 0.002 * noise

    elif aug_num == 3: # pitch
        aug_segment = aug3.augment(segment.detach().numpy())[0]

    elif aug_num == 4: # vtlp
        aug_segment = aug4.augment(segment.detach().numpy())[0]
        len_to_add = dur_a -  len(aug_segment)
        zeros = np.zeros(len_to_add)
        aug_segment = np.concatenate((aug_segment, zeros))

    elif aug_num == 5: # shift
        aug_segment = aug5.augment(segment.detach().numpy())[0]

    elif aug_num == 6: # speed
        aug_segment = aug6.augment(segment.detach().numpy())[0]
        aug_segment = aug_segment[:dur_a]

    elif aug_num == 7: # normalization
        aug_segment = aug7.augment(segment.detach().numpy())[0]

    elif aug_num == 8: # specaugment
        aug_segment = librosa.feature.melspectrogram(segment.detach().numpy())
        aug_segment = aug8.augment(aug_segment)
        aug_segment = aug9.augment(aug_segment)
        aug_segment = librosa.feature.inverse.mel_to_audio(aug_segment)

    return aug_segment

In [7]:
def segment_mp3_tensor(augment, logmel, autovc_data, audio_path, duration, spacing = None, augmentation_type = 'random'):
    """
    This function loads an mp3 or wav file, converts it to a tensor, splits up the
    tensor into segments of a desired length and returns these in a list.

    Arguments
        audio: a string containing the path, we assume mp3
        duration: required length of a segment in seconds
        spacing: step size between segments, in seconds

    Returns
        segments: a list of tensors
    """
    segments = []
    audio, notimp = sf.read(audio_path)
    if autovc_data:
        audio = librosa.resample(audio, 16000, 44100)
        audio = np.tile(audio, 8)
    audio = torch.Tensor(audio)

    audio_len = len(audio)
    start_pt = 0
    end_pt = duration * sr
    while end_pt < audio_len:
        segments.append(audio[start_pt:end_pt])
        if spacing is None: # non-overlapping segments
            start_pt += duration * sr
            end_pt += duration * sr
        else:
            start_pt += spacing * sr
            end_pt += spacing * sr
    print("data aug", len(segments))

    random_aug_size = 1
    if augment:
        # for a specific kind of augmentation, just create a vector of
        # the number representing the desired method
        if augmentation_type == 'random':
            aug_num = random.choices(population=list(range(8)), k=len(segments))
            if random_aug_size == 2:
                aug_num2 = random.choices(population=list(range(9)), k=len(segments))
            elif random_aug_size == 4:
                aug_num2 = random.choices(population=list(range(9)), k=len(segments))
                aug_num3 = random.choices(population=list(range(9)), k=len(segments))
                aug_num4 = random.choices(population=list(range(9)), k=len(segments))
        elif augmentation_type == 'masking':
            aug_num = [0 for i in range(len(segments))]
        elif augmentation_type == 'loudness':
            aug_num = [1 for i in range(len(segments))]
        elif augmentation_type == 'noise':
            aug_num = [2 for i in range(len(segments))]
        elif augmentation_type == 'pitch':
            aug_num = [3 for i in range(len(segments))]
        elif augmentation_type == 'vtlp':
            aug_num = [4 for i in range(len(segments))]
        elif augmentation_type == 'shift':
            aug_num = [5 for i in range(len(segments))]
        elif augmentation_type == 'speed':
            aug_num = [6 for i in range(len(segments))]
        elif augmentation_type == 'normalisation':
            aug_num = [7 for i in range(len(segments))]
        elif augmentation_type == 'specaugment':
            aug_num = [8 for i in range(len(segments))]
        if not augmentation_type == 'specaugment':
            segments_aug = [random_augmentation(segments[i], aug_num=aug_num[i]) for i in list(range(len(segments)))]
        else:
            segments_aug = [segments[i] for i in list(range(len(segments)))]
        if augmentation_type == 'random':
            if random_aug_size == 2:
                segments_aug2 = [random_augmentation(segments[i], aug_num=aug_num2[i]) for i in list(range(len(segments)))]
                segments_aug = segments_aug + segments_aug2
            elif random_aug_size == 4:
                segments_aug2 = [random_augmentation(segments[i], aug_num=aug_num2[i]) for i in list(range(len(segments)))]
                segments_aug3 = [random_augmentation(segments[i], aug_num=aug_num3[i]) for i in list(range(len(segments)))]
                segments_aug4 = [random_augmentation(segments[i], aug_num=aug_num4[i]) for i in list(range(len(segments)))]
                segments_aug = segments_aug + segments_aug2 + segments_aug3 + segments_aug4


        segments_aug = [torch.tensor(segments_aug[i]) for i in list(range(len(segments_aug)))]

        if logmel:
            # waveform-level augmentation
            segments_aug = [torch.tensor(vc_to_mel(segment, augmentation_type)) for segment in segments_aug]

    if logmel:
        segments = [torch.tensor(vc_to_mel(segment)) for segment in segments]

    if augment:
        return segments, segments_aug
    else:
        return segments

In [8]:
def segement_samples(file_lst, foldername, duration, spacing, logmel=True, augment=True, augmentation_type='random'):
    """
    ids_dict is only specified when we don't perform cross-validation, so the
    train & validation split is already given. So in a way, the presence or
    absence of ids_dict acts as a cv = True / False flag.
    """
    for filename in tqdm(file_lst):
        path = os.path.join(foldername, f'{filename}')
        print(path)
        if augment:
            segments, segments_aug = segment_mp3_tensor(augment, logmel, True, path, duration, spacing, augmentation_type)
            save_wavs(segments_aug, output_path="aug", file_name=filename[:-4])
            print(len(segments), len(segments_aug))
        else:
            segments = segment_mp3_tensor(augment, logmel, True, path, duration, spacing, augmentation_type)

In [9]:
def get_sample_lst(path: str) -> list:
    """返回sample的AD和HC"""
    sample_data = pd.read_csv(path, usecols=[0, 4])
    ad = list(sample_data[sample_data['dx'] == "ProbableAD"]["adressfname"])
    hc = list(sample_data[sample_data['dx'] == "Control"]["adressfname"])

    print("Number of AD samples:", len(ad), "\nNumber of HC samples:", len(hc))
    return hc, ad

In [10]:
sample_csv = "Data/sample-groundtruth.csv"
file_path = "Data/test/sample"
file_lst = os.listdir(file_path)

In [11]:
HC_lst, AD_lst = get_sample_lst(sample_csv)

Number of AD samples: 4 
Number of HC samples: 4


In [13]:
segement_samples(file_lst, file_path, 70, 5)

  audio = librosa.resample(audio, 16000, 44100)


Data/test/sample/madrs-smpl3.wav
data aug 58


 7.329732e-05] as keyword args. From version 0.10 passing these as positional arguments will result in an error
  librosa.feature.melspectrogram(
 7.7741774e-05] as keyword args. From version 0.10 passing these as positional arguments will result in an error
  librosa.feature.melspectrogram(
 -7.7529679e-05 -8.0610844e-05] as keyword args. From version 0.10 passing these as positional arguments will result in an error
  librosa.feature.melspectrogram(
 -0.0003388 ] as keyword args. From version 0.10 passing these as positional arguments will result in an error
  librosa.feature.melspectrogram(
  7.0527647e-05  9.1568130e-05] as keyword args. From version 0.10 passing these as positional arguments will result in an error
  librosa.feature.melspectrogram(
 5.0202456e-05] as keyword args. From version 0.10 passing these as positional arguments will result in an error
  librosa.feature.melspectrogram(
 -2.7828859e-04 -2.7817825e-04] as keyword args. From version 0.10 passing these as posit

58 58
Data/test/sample/madrs-smpl8.wav
data aug 100


 -5.6242745e-04 -4.9708784e-04] as keyword args. From version 0.10 passing these as positional arguments will result in an error
  librosa.feature.melspectrogram(
 -1.2553540e-05  0.0000000e+00] as keyword args. From version 0.10 passing these as positional arguments will result in an error
  librosa.feature.melspectrogram(
 4.3009390e-05] as keyword args. From version 0.10 passing these as positional arguments will result in an error
  librosa.feature.melspectrogram(
 -3.2076317e-05  0.0000000e+00] as keyword args. From version 0.10 passing these as positional arguments will result in an error
  librosa.feature.melspectrogram(
  0.0000000e+00  0.0000000e+00] as keyword args. From version 0.10 passing these as positional arguments will result in an error
  librosa.feature.melspectrogram(
  librosa.feature.melspectrogram(
 -8.2096383e-05 -4.8810947e-05] as keyword args. From version 0.10 passing these as positional arguments will result in an error
  librosa.feature.melspectrogram(
 -1.

100 100
Data/test/sample/madrs-smpl4.wav
data aug 62


  0.000000e+00] as keyword args. From version 0.10 passing these as positional arguments will result in an error
  librosa.feature.melspectrogram(
  1.0142934e-04  1.1142093e-04] as keyword args. From version 0.10 passing these as positional arguments will result in an error
  librosa.feature.melspectrogram(
 -2.9674359e-06 -3.3703041e-06] as keyword args. From version 0.10 passing these as positional arguments will result in an error
  librosa.feature.melspectrogram(
  0.0000000e+00  0.0000000e+00] as keyword args. From version 0.10 passing these as positional arguments will result in an error
  librosa.feature.melspectrogram(
  0.00027471] as keyword args. From version 0.10 passing these as positional arguments will result in an error
  librosa.feature.melspectrogram(
  librosa.feature.melspectrogram(
  0.0000000e+00  0.0000000e+00] as keyword args. From version 0.10 passing these as positional arguments will result in an error
  librosa.feature.melspectrogram(
  0.0000000e+00  0.000

62 62
Data/test/sample/madrs-smpl1.wav
data aug 61


  4.7416394e-04  4.1363816e-04] as keyword args. From version 0.10 passing these as positional arguments will result in an error
  librosa.feature.melspectrogram(
 -1.15384637e-06  0.00000000e+00] as keyword args. From version 0.10 passing these as positional arguments will result in an error
  librosa.feature.melspectrogram(
  3.2366876e-07 -8.3644174e-08] as keyword args. From version 0.10 passing these as positional arguments will result in an error
  librosa.feature.melspectrogram(
 4.1507628e-05] as keyword args. From version 0.10 passing these as positional arguments will result in an error
  librosa.feature.melspectrogram(
 0.0000000e+00] as keyword args. From version 0.10 passing these as positional arguments will result in an error
  librosa.feature.melspectrogram(
  0.0000000e+00  0.0000000e+00] as keyword args. From version 0.10 passing these as positional arguments will result in an error
  librosa.feature.melspectrogram(
 -2.3873324e-06 -2.1476596e-06] as keyword args. Fro

61 61
Data/test/sample/madrs-smpl7.wav
data aug 52


  0.0000000e+00  0.0000000e+00] as keyword args. From version 0.10 passing these as positional arguments will result in an error
  librosa.feature.melspectrogram(
 -8.0914627e-04 -8.3844550e-04] as keyword args. From version 0.10 passing these as positional arguments will result in an error
  librosa.feature.melspectrogram(
  0.00031962] as keyword args. From version 0.10 passing these as positional arguments will result in an error
  librosa.feature.melspectrogram(
  librosa.feature.melspectrogram(
 -8.7549379e-05 -8.8436478e-05] as keyword args. From version 0.10 passing these as positional arguments will result in an error
  librosa.feature.melspectrogram(
  1.2170697e-05  0.0000000e+00] as keyword args. From version 0.10 passing these as positional arguments will result in an error
  librosa.feature.melspectrogram(
  0.00086459] as keyword args. From version 0.10 passing these as positional arguments will result in an error
  librosa.feature.melspectrogram(
  0.0000000e+00  0.00000

52 52
Data/test/sample/madrs-smpl5.wav
data aug 59


  librosa.feature.melspectrogram(
 -3.8536782e-05 -3.9039660e-05] as keyword args. From version 0.10 passing these as positional arguments will result in an error
  librosa.feature.melspectrogram(
  librosa.feature.melspectrogram(
  1.9592455e-05  1.4896240e-05] as keyword args. From version 0.10 passing these as positional arguments will result in an error
  librosa.feature.melspectrogram(
  3.7338337e-05  3.5848960e-05] as keyword args. From version 0.10 passing these as positional arguments will result in an error
  librosa.feature.melspectrogram(
  1.3372929e-05  2.5458774e-05] as keyword args. From version 0.10 passing these as positional arguments will result in an error
  librosa.feature.melspectrogram(
 -1.7422906e-04 -1.7999540e-04] as keyword args. From version 0.10 passing these as positional arguments will result in an error
  librosa.feature.melspectrogram(
 1.4664898e-05] as keyword args. From version 0.10 passing these as positional arguments will result in an error
  li

59 59
Data/test/sample/madrs-smpl2.wav
data aug 52


  0.0000000e+00  0.0000000e+00] as keyword args. From version 0.10 passing these as positional arguments will result in an error
  librosa.feature.melspectrogram(
 -2.2581651e-05 -3.8423350e-05] as keyword args. From version 0.10 passing these as positional arguments will result in an error
  librosa.feature.melspectrogram(
 3.3078133e-04] as keyword args. From version 0.10 passing these as positional arguments will result in an error
  librosa.feature.melspectrogram(
 0.0000000e+00] as keyword args. From version 0.10 passing these as positional arguments will result in an error
  librosa.feature.melspectrogram(
 -8.4708433e-04 -8.7619369e-04] as keyword args. From version 0.10 passing these as positional arguments will result in an error
  librosa.feature.melspectrogram(
  1.7479234e-04  1.6168179e-04] as keyword args. From version 0.10 passing these as positional arguments will result in an error
  librosa.feature.melspectrogram(
 -4.4393666e-05 -4.2356267e-05] as keyword args. From 

52 52
Data/test/sample/madrs-smpl6.wav
data aug 46


 1.9868243e-05] as keyword args. From version 0.10 passing these as positional arguments will result in an error
  librosa.feature.melspectrogram(
  0.0000000e+00  0.0000000e+00] as keyword args. From version 0.10 passing these as positional arguments will result in an error
  librosa.feature.melspectrogram(
 9.4263451e-05] as keyword args. From version 0.10 passing these as positional arguments will result in an error
  librosa.feature.melspectrogram(
  4.2402120e-05 -8.0013797e-06] as keyword args. From version 0.10 passing these as positional arguments will result in an error
  librosa.feature.melspectrogram(
  6.9592985e-05  0.0000000e+00] as keyword args. From version 0.10 passing these as positional arguments will result in an error
  librosa.feature.melspectrogram(
  6.3350139e-06  1.7056498e-05] as keyword args. From version 0.10 passing these as positional arguments will result in an error
  librosa.feature.melspectrogram(
 -2.4364330e-05 -3.3675497e-05] as keyword args. From 

46 46



