项目、环境有关 Env

In [None]:
# @title 克隆或更新存储库 Clone or Update the repository 

# Make sure to pull the latest changes from the repository
# %cd ./AudioLabeling
!git stash
!git pull https://github.com/X-T-E-R/AudioLabeling.git 

In [None]:
# @title 安装依赖项 Install dependencies 
%pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118
%pip install -r requirements.txt

前置处理 PreProcessing

In [None]:
# @title 下载数据集(音频) Download the dataset (Audio) 

In [None]:
# @title 分离人声 Separate Vocals

# 这个部分还没有写好，请使用 uvr 5 或 MVSEP-MDX23 手动分离
# This part is not ready yet, please use uvr 5 or MVSEP-MDX23 to separate manually

生成字幕、切分音频 SRT and Slice

In [None]:
# @title 生成srt字幕 Generate srt subtitles
import os

source_path = "Input/audios/test/" # Audio file path, can be a folder or a file 音频文件路径， 可以是文件夹或文件 @param {type:"string"}
os.makedirs(source_path, exist_ok=True)

from tools.my_utils import scan_audios_walk

audio_list = []
if os.path.isdir(source_path):
    audio_list = scan_audios_walk(source_path)
    audio_list = [os.path.join(source_path, audio) for audio in audio_list]
else:
    audio_list.append(source_path)
    
from src.srt_generator.audio2srt import Audio2Srt

models_path = 'models/iic' # 设定模型路径，留空或路径不存在则 modelscope 会自动下载模型 @param {type:"string"}


with Audio2Srt(models_path=models_path) as a2s:
    for audio_path in audio_list:
        srt_path = audio_path.rsplit('.', 1)[0] + '.srt'
        srt_content = a2s.generate_srt(audio_path)
        try:
            with open(srt_path, 'w') as f:
                f.write(srt_content)
            print(f"生成字幕文件：{srt_path}")
        except:
            print(f"生成字幕文件失败：{srt_path}")

In [None]:
# @title 切分音频 Split Audio
import os
import shutil
try:
    print(f"source_path: {source_path}")
except:
    source_path = "Input/audios/test/" 
    os.makedirs(source_path, exist_ok=True)

charactor = "test" # Character name 角色名 (或者说文件夹名) @param {type:"string"}
output_path = "Output/sliced_audio/test/" # @param {type:"string"}
os.makedirs(output_path, exist_ok=True)

print(f"Remove old files in {output_path}")
shutil.rmtree(output_path, ignore_errors=True)

from tools.my_utils import scan_audios_walk, scan_ext_walk

# scan srt files and audio files
print(f"scan srt files and audio files in {source_path}")
items = []

if os.path.isdir(source_path):
    srt_list = scan_ext_walk(source_path, '.srt')
    audio_list = scan_audios_walk(source_path)
else:   
    audio_list = [source_path]
    source_path = os.path.dirname(source_path)
    srt_list = scan_ext_walk(source_path, '.srt')

print(f"audio_list: {audio_list}")
print(f"srt_list: {srt_list}")
for audio_file in audio_list:
    audio_file_name = os.path.basename(audio_file).rsplit('.', 1)[0]
    for srt_file in srt_list:
        if audio_file_name in srt_file:
            items.append((audio_file, srt_file))
            print(f"找到配对的音频文件：{audio_file} 和字幕文件：{srt_file}")
            break

from src.srt_slicer.srt_utils import merge_subtitles_with_lib, slice_audio_with_lib, parse_srt_with_lib, generate_srt_with_lib, filter_subtitles

# srt合并设置
merge_zero_interval = True # 是否合并相当短的间隔 @param {type:"boolean"} 
short_interval = 0.05 # 短间隔时间 @param {type:"number"}
max_interval = 0.8 # 最大间隔时间 @param {type:"number"}
max_text_length = 100 # 最大文本长度 @param {type:"number"}
add_period = True # 是否添加句号 @param {type:"boolean"}

min_text_len = 5 # 合并后允许的最小字幕长度 @param {type:"number"}
language = 'ZH' # 语言 @param {type:"string"}

merge_folder = True # 是否合并切分好的文件夹和list @param {type:"boolean"}

save_paths = []
for index, item in enumerate(items):
    audio_file, srt_file = item
    print(f"开始切分音频文件：{audio_file} 从字幕文件：{srt_file}")
    save_path = os.path.join(output_path, f"{index}_{os.path.basename(srt_file).rsplit('.', 1)[0]}")
    save_paths.append(save_path)
    audio_file_full_path = os.path.join(source_path, audio_file)
    srt_file_full_path = os.path.join(source_path, srt_file)
    
    try:
        with open(srt_file_full_path, 'r') as f:
            srt_content = f.read()
        subtitles = parse_srt_with_lib(srt_content)
        merged_subtitles = merge_subtitles_with_lib(subtitles, short_interval, max_interval, max_text_length, add_period, merge_zero_interval)
        merged_subtitles = filter_subtitles(merged_subtitles, min_text_len)
        # print(generate_srt_with_lib(merged_subtitles))
    except Exception as e:
        print(f"打开字幕文件失败：{srt_file_full_path}")
        raise e
    print(f"合并字幕完成，开始切分音频")
    
    try:
        slice_audio_with_lib(audio_file_full_path, save_folder=save_path, format="wav", subtitles=merged_subtitles, language=language)
    except Exception as e:
        print(f"切分音频文件失败：{audio_file_full_path}")
        raise e
    


if merge_folder and len(save_paths) > 1:
    print(f"开始合并文件夹")
    from src.list_merger.list_utils import merge_list_folders

    first_folder = save_paths[0]
    first_list_file = os.path.join(first_folder, 'datamapping.list')
    for i in range(1, len(save_paths)):
        second_folder = save_paths[i]
        second_list_file = os.path.join(second_folder, 'datamapping.list')
        merge_list_folders(first_list_file, second_list_file, None, first_folder, second_folder)
        
    print(f"合并文件夹完成，开始清理")
    output_path = output_path[:-1] if output_path.endswith('/') else output_path
    tmp_path = output_path + "_tmp"
    shutil.rmtree(tmp_path, ignore_errors=True)
    shutil.move(first_folder, tmp_path)
    shutil.rmtree(output_path, ignore_errors=True)
    shutil.move(tmp_path, output_path)

后处理 Postprocessing

In [None]:
# @title 响度标准化 Loudness Normalization
%pip install tqdm
from tqdm import tqdm

try:
    print(f"source_path: {output_path}")
    source_path = output_path
except:
    source_path = "Output/sliced_audio/test/"
    
target_loudness = -16.0 # 目标响度 @param {type:"number"}
from src.audio_normalizer.my_utils import normalize_loudness

audio_list = scan_audios_walk(source_path)
for audio_file in tqdm(audio_list):
    audio_file_full_path = os.path.join(source_path, audio_file)
    normalize_loudness(audio_file_full_path, target_loudness=target_loudness, target_path=audio_file_full_path)

In [None]:
# @title (可选) 进行音频中文情绪分类 (Optional) Chinese Emotion Classification in Audio
# Warning: 当前版本只支持中文音频，并且不会自动重命名list文件中的音频文件名，谨慎使用
# Warning: 基于emotion2vec实现，效果不一定准（对于唱歌素材一定不准）
from src.emotion_recognition.audio2emotion import Audio2Emotion

try :
    print(f"models_path: {models_path}")
except:
    models_path = ""
    
try:
    print(f"source_path: {output_path}")
    source_path = output_path
except:
    source_path = "Output/sliced_audio/test/"

audio_list = scan_audios_walk(source_path)
with Audio2Emotion(models_path=models_path) as a2e:
    for audio_file in audio_list:
        audio_file_full_path = os.path.join(source_path, audio_file)
        emotion = a2e.get_emotion(audio_file_full_path)
        emotion = emotion.split('/')[0]
        filename = os.path.basename(audio_file).rsplit('.', 1)[0]
        new_filename = f"{emotion}#{filename}"
        new_file_full_path = os.path.join(os.path.dirname(audio_file_full_path), f"{new_filename}.{audio_file.rsplit('.', 1)[1]}")
        os.rename(audio_file_full_path, new_file_full_path)