# WhisperX48_local: 用于本地运行WhisperX48的Jupyter Notebook脚本


In [None]:
import os
import ffmpeg
import subprocess
import torch
import whisperx
import time
from pathlib import Path
import sys


model_path = ""  # To prevent having to download a model every time, faster-whisper model should be already put in a local directory
language = "ja"  # @param ["ja","zh","en","fr", "de","es","it","pt","ru"]

max_line_width = 1000 # @param {type:"number"}
max_line_count = 1 # @param {type:"number"}
highlight_words = False # @param ["False","True"]

output_dir = "./files/"  # 默认的音频文件输入和字幕文件输出路径 Path for input audio file and output subtitle by default
file_name = output_dir + "sample1.wav"  # 在这里输入音频文件名 Name of the audio file
audio_file = f'{file_name}'
file_basename = file_name.split('.')[0]

device = "cuda"
torch.cuda.empty_cache()
print('加载模型 Loading model...')
model = whisperx.load_model(model_path, device, language= language)
audio = whisperx.load_audio(audio_file)

# Original whisper transcribe
tic = time.time()
print('识别中 Transcribe in progress...')
result = model.transcribe(audio, batch_size= 8)
print('语音转录完成 Transcribing completed')

#Write SRT file
options = {"max_line_width":max_line_width,"max_line_count":max_line_count,"highlight_words":highlight_words}
from whisperx.utils import WriteSRT
filename_srt = file_basename + "_transcribe.srt"
with open(filename_srt, "w", encoding="utf-8") as srt:
    srt_writer = WriteSRT(filename_srt)
    srt_writer.write_result(result, srt, options)

# Load alignment model and metadata
print('加载调整模型 Load alignment model...')
# model_id = "jonatasgrosman/wav2vec2-large-xlsr-53-japanese"
alignment_model, metadata = whisperx.load_align_model(language_code= language, device= device)

# Align whisper output
print('调整识别结果 Align whisper output...')
result_aligned = whisperx.align(result["segments"], alignment_model, metadata, audio, device)

toc = time.time()
print('调整完毕 Alignment done')
print(f'Time consumpution {toc-tic} s')
del alignment_model
torch.cuda.empty_cache()

#Write SRT file
filename_srt = file_basename + "_align.srt"
with open(filename_srt, "w", encoding="utf-8") as srt:
    srt_writer = WriteSRT(filename_srt)
    srt_writer.write_result(result_aligned, srt, options)

#Write JSON file
from whisperx.utils import WriteJSON
filename_json = file_basename + "_align.json"
with open(filename_json, "w", encoding="utf-8") as json:
    json_writer = WriteJSON(filename_json)
    json_writer.write_result(result, json, options)

print('字幕生成完毕 Subtitle generated!')

<font size="2">  

Last modified 2023-05-08  
Haven't checked yet  
  
Author: ifeimi ♢ Email me: yfwu0202 AT gmail dot com  