In [1]:
import numpy as np
import json
import os
import zipfile
import wget
import librosa
import soundfile as sf

In [2]:
def get_immediate_subdirectories(a_dir):
    return [name for name in os.listdir(a_dir) if os.path.isdir(os.path.join(a_dir, name))]

def get_immediate_files(a_dir):
    return [name for name in os.listdir(a_dir) if os.path.isfile(os.path.join(a_dir, name))]
def resample_audio(input_path, output_path, target_sr=16000):
    """使用 librosa 重采样音频"""
    try:
        # 读取音频
        audio, sr = librosa.load(input_path, sr=None)
        # 重采样
        if sr != target_sr:
            audio = librosa.resample(audio, orig_sr=sr, target_sr=target_sr)
        # 保存音频
        sf.write(output_path, audio, target_sr)
        return True
    except Exception as e:
        print(f"处理文件 {input_path} 时出错: {str(e)}")
        return False

In [None]:
# downlooad esc50
# dataset provided in https://github.com/karolpiczak/ESC-50
if os.path.exists('./data/ESC-50-master') == False:
    print("开始下载 ESC-50 数据集...")
    if not os.path.exists('./data'):
        os.makedirs('./data')
    
    esc50_url = 'https://github.com/karoldvl/ESC-50/archive/master.zip'
    wget.download(esc50_url, out='./data/')
    print("\n下载完成，开始解压...")
    
    with zipfile.ZipFile('./data/ESC-50-master.zip', 'r') as zip_ref:
        zip_ref.extractall('./data/')
    # os.remove('./data/ESC-50-master.zip')
    print("解压完成")

    # 转换音频到16kHz
    base_dir = './data/ESC-50-master/'
    if not os.path.exists('./data/ESC-50-master/audio_16k/'):
        os.makedirs('./data/ESC-50-master/audio_16k/')
    
    print("开始转换音频采样率到16kHz...")
    audio_list = get_immediate_files('./data/ESC-50-master/audio')
    for audio in audio_list:
        input_path = os.path.join(base_dir, 'audio', audio)
        output_path = os.path.join(base_dir, 'audio_16k', audio)
        if resample_audio(input_path, output_path):
            print(f'成功处理: {audio}')
        else:
            print(f'处理失败: {audio}')

In [4]:
label_set = np.loadtxt('./data/esc_class_labels_indices.csv', delimiter=',', dtype='str')
label_map = {}
for i in range(1, len(label_set)):
    label_map[eval(label_set[i][2])] = label_set[i][0]
print(label_map)

# fix bug: generate an empty directory to save json files
if os.path.exists('./data/datafiles') == False:
    os.mkdir('./data/datafiles')

for fold in [1,2,3,4,5]:
    base_path = "./data/ESC-50-master/audio_16k/"
    meta = np.loadtxt('./data/ESC-50-master/meta/esc50.csv', delimiter=',', dtype='str', skiprows=1)
    train_wav_list = []
    eval_wav_list = []
    for i in range(0, len(meta)):
        cur_label = label_map[meta[i][3]]
        cur_path = meta[i][0]
        cur_fold = int(meta[i][1])
        # /m/07rwj is just a dummy prefix
        cur_dict = {"wav": base_path + cur_path, "labels": '/m/07rwj'+cur_label.zfill(2)}
        if cur_fold == fold:
            eval_wav_list.append(cur_dict)
        else:
            train_wav_list.append(cur_dict)

    print('fold {:d}: {:d} training samples, {:d} test samples'.format(fold, len(train_wav_list), len(eval_wav_list)))

    with open('./data/datafiles/esc_train_data_'+ str(fold) +'.json', 'w') as f:
        json.dump({'data': train_wav_list}, f, indent=1)

    with open('./data/datafiles/esc_eval_data_'+ str(fold) +'.json', 'w') as f:
        json.dump({'data': eval_wav_list}, f, indent=1)


{'dog': '0', 'rooster': '1', 'pig': '2', 'cow': '3', 'frog': '4', 'cat': '5', 'hen': '6', 'insects': '7', 'sheep': '8', 'crow': '9', 'rain': '10', 'sea_waves': '11', 'crackling_fire': '12', 'crickets': '13', 'chirping_birds': '14', 'water_drops': '15', 'wind': '16', 'pouring_water': '17', 'toilet_flush': '18', 'thunderstorm': '19', 'crying_baby': '20', 'sneezing': '21', 'clapping': '22', 'breathing': '23', 'coughing': '24', 'footsteps': '25', 'laughing': '26', 'brushing_teeth': '27', 'snoring': '28', 'drinking_sipping': '29', 'door_wood_knock': '30', 'mouse_click': '31', 'keyboard_typing': '32', 'door_wood_creaks': '33', 'can_opening': '34', 'washing_machine': '35', 'vacuum_cleaner': '36', 'clock_alarm': '37', 'clock_tick': '38', 'glass_breaking': '39', 'helicopter': '40', 'chainsaw': '41', 'siren': '42', 'car_horn': '43', 'engine': '44', 'train': '45', 'church_bells': '46', 'airplane': '47', 'fireworks': '48', 'hand_saw': '49'}
fold 1: 1600 training samples, 400 test samples
fold 2: 1

In [5]:
print('Finished ESC-50 Preparation')

Finished ESC-50 Preparation
