In [1]:
import os
import glob
import shutil
import librosa
import soundfile as sf
from tqdm import tqdm
import numpy as np

In [2]:
# 根目录
root_dir = 'data/wav'

In [3]:
# 遍历每个 subject_id 目录
for subject_dir in tqdm(sorted(os.listdir(root_dir))):
    full_subject_path = os.path.join(root_dir, subject_dir)
    if not os.path.isdir(full_subject_path):
        continue

    # 获取所有切片 wav 文件
    wav_files = sorted(glob.glob(os.path.join(full_subject_path, f'{subject_dir}*.wav')))
    if not wav_files:
        continue
    print('下面是切片wav文件')
    print(wav_files)

    # 读取并拼接所有音频
    audio_concat = []
    for wav_path in wav_files:
        audio, sr = librosa.load(wav_path, sr=48000)
        audio_concat.append(audio)

    full_audio = np.concatenate(audio_concat)

    # 下采样至 16kHz
    audio_resampled = librosa.resample(full_audio, orig_sr=48000, target_sr=16000)

    # 保存拼接后的新音频
    output_path = os.path.join(root_dir, f'{subject_dir}.wav')
    sf.write(output_path, audio_resampled, samplerate=16000)
    print('已保存到')
    print(output_path)

    # 删除原始切片文件
    for wav_path in wav_files:
        os.remove(wav_path)

    # 删除空目录
    try:
        os.rmdir(full_subject_path)
    except OSError:
        pass  # 目录不为空时忽略

  0%|          | 0/10 [00:00<?, ?it/s]

下面是切片wav文件
['data/wav\\00000995-100507\\00000995-100507[001].wav', 'data/wav\\00000995-100507\\00000995-100507[002].wav', 'data/wav\\00000995-100507\\00000995-100507[003].wav', 'data/wav\\00000995-100507\\00000995-100507[004].wav', 'data/wav\\00000995-100507\\00000995-100507[005].wav']


 10%|█         | 1/10 [00:16<02:32, 16.99s/it]

已保存到
data/wav\00000995-100507.wav
下面是切片wav文件
['data/wav\\00000999-100507\\00000999-100507[001].wav', 'data/wav\\00000999-100507\\00000999-100507[002].wav', 'data/wav\\00000999-100507\\00000999-100507[003].wav', 'data/wav\\00000999-100507\\00000999-100507[004].wav', 'data/wav\\00000999-100507\\00000999-100507[005].wav']


 20%|██        | 2/10 [00:30<01:59, 14.99s/it]

已保存到
data/wav\00000999-100507.wav
下面是切片wav文件
['data/wav\\00001000-100507\\00001000-100507[001].wav', 'data/wav\\00001000-100507\\00001000-100507[002].wav', 'data/wav\\00001000-100507\\00001000-100507[003].wav', 'data/wav\\00001000-100507\\00001000-100507[004].wav', 'data/wav\\00001000-100507\\00001000-100507[005].wav']


 30%|███       | 3/10 [00:41<01:32, 13.20s/it]

已保存到
data/wav\00001000-100507.wav
下面是切片wav文件
['data/wav\\00001006-100507\\00001006-100507[001].wav', 'data/wav\\00001006-100507\\00001006-100507[002].wav', 'data/wav\\00001006-100507\\00001006-100507[003].wav', 'data/wav\\00001006-100507\\00001006-100507[004].wav']


 40%|████      | 4/10 [00:51<01:11, 11.97s/it]

已保存到
data/wav\00001006-100507.wav
下面是切片wav文件
['data/wav\\00001008-100507\\00001008-100507[001].wav', 'data/wav\\00001008-100507\\00001008-100507[002].wav', 'data/wav\\00001008-100507\\00001008-100507[003].wav', 'data/wav\\00001008-100507\\00001008-100507[004].wav', 'data/wav\\00001008-100507\\00001008-100507[005].wav']


 50%|█████     | 5/10 [01:02<00:58, 11.63s/it]

已保存到
data/wav\00001008-100507.wav
下面是切片wav文件
['data/wav\\00001010-100507\\00001010-100507[001].wav', 'data/wav\\00001010-100507\\00001010-100507[002].wav', 'data/wav\\00001010-100507\\00001010-100507[003].wav', 'data/wav\\00001010-100507\\00001010-100507[004].wav']


 60%|██████    | 6/10 [01:11<00:43, 10.77s/it]

已保存到
data/wav\00001010-100507.wav
下面是切片wav文件
['data/wav\\00001014-100507\\00001014-100507[001].wav', 'data/wav\\00001014-100507\\00001014-100507[002].wav', 'data/wav\\00001014-100507\\00001014-100507[003].wav', 'data/wav\\00001014-100507\\00001014-100507[004].wav']


 70%|███████   | 7/10 [01:22<00:31, 10.57s/it]

已保存到
data/wav\00001014-100507.wav
下面是切片wav文件
['data/wav\\00001016-100507\\00001016-100507[001].wav', 'data/wav\\00001016-100507\\00001016-100507[002].wav', 'data/wav\\00001016-100507\\00001016-100507[003].wav', 'data/wav\\00001016-100507\\00001016-100507[004].wav', 'data/wav\\00001016-100507\\00001016-100507[005].wav', 'data/wav\\00001016-100507\\00001016-100507[006].wav', 'data/wav\\00001016-100507\\00001016-100507[007].wav']


 80%|████████  | 8/10 [01:38<00:24, 12.31s/it]

已保存到
data/wav\00001016-100507.wav
下面是切片wav文件
['data/wav\\00001018-100507\\00001018-100507[001].wav', 'data/wav\\00001018-100507\\00001018-100507[002].wav', 'data/wav\\00001018-100507\\00001018-100507[003].wav', 'data/wav\\00001018-100507\\00001018-100507[004].wav', 'data/wav\\00001018-100507\\00001018-100507[005].wav']


 90%|█████████ | 9/10 [01:50<00:12, 12.47s/it]

已保存到
data/wav\00001018-100507.wav
下面是切片wav文件
['data/wav\\00001020-100507\\00001020-100507[001].wav', 'data/wav\\00001020-100507\\00001020-100507[002].wav', 'data/wav\\00001020-100507\\00001020-100507[003].wav', 'data/wav\\00001020-100507\\00001020-100507[004].wav', 'data/wav\\00001020-100507\\00001020-100507[005].wav']


100%|██████████| 10/10 [02:03<00:00, 12.54s/it]

已保存到
data/wav\00001020-100507.wav


100%|██████████| 10/10 [02:03<00:00, 12.36s/it]
