In [None]:
import whisper
import os
import openai
import paddlehub as hub
import time
import csv
from datetime import datetime
from pydub import AudioSegment
from opencc import OpenCC
from docx import Document

In [None]:
# whisper模型
def my_whisper(audio,model_type):

    print ("開始進行中文語音辦識，請稍等")
    model = whisper.load_model(model_type)
    result = model.transcribe(audio, language='zh')
    print ("中文語音辦識完成")
    return result

# wav轉檔程式 
def convert_wav(b4cover_file_name, b4cover_file_path,  b4cover_type ):
    b4cover_file = b4cover_file_path + b4cover_file_name + "." + b4cover_type
    
    conver_output_file = b4cover_file_path + b4cover_file_name + ".wav"
    audio = AudioSegment.from_file(b4cover_file)

    wav_audio = audio.set_channels(1).set_frame_rate(44100)  

    wav_audio.export(conver_output_file, format="wav")
    
    print("轉為wav檔成功：\n" + conver_output_file + "\n")
    return conver_output_file

In [None]:
#繁簡轉換涵數( t2s or s2t)
#主要是為了model使用
def Ch_Convert(transcript, method):
    model = OpenCC(method)
    converted = model.convert(transcript)
    return converted


##使用paddle模型
def Add_punc(raw_script):

    def split_text(text, max_length):
        return [text[i:i+max_length] for i in range(0, len(text), max_length)]
        
    
    # 整理逐字稿
    def process_text(text):
        model = hub.Module(name='auto_punc')
        Punc_result = model.add_puncs(text)
        return Punc_result

    # 處理長字串
    def process_long_text(long_text):
        text_list = split_text(long_text, 300)
        processed_text_list = process_text(text_list) 
        return "".join(processed_text_list)

    # 呼叫分段處理函式
    processed_transcript = process_long_text(raw_script)
    print('標點符號加入完成\n')
    return processed_transcript

#改錯字涵數
def fix_wording(fix_txt, csv_file):
    replace_dict = {}
    # 將CSV檔轉換為字典
    with open(csv_file, mode='r') as file:
        reader = csv.reader(file)

        for row in reader:
            key = row[0]
            value = row[1]
            replace_dict[key] = value
    
    Replaced_text = fix_txt

    for old_word, new_word in replace_dict.items():
        Replaced_text = Replaced_text.replace(old_word, new_word)

    myfix_result = Replaced_text.replace("-", "\n")
    
    print('文字更正置換完成\n')
    
    return myfix_result

In [None]:
# 轉純文字檔 及 word檔
def convert_text(input_text, file_name , output_file_path):
    output_text = output_file_path + file_name + ".txt" 

    with open(output_text, 'w', encoding='utf-8') as file:
        file.write(input_text)

    print(f"文本已输出到 {output_text} 純文字檔中")
    
def convert_word(input_text, file_name , output_file_path):
    
    doc = Document()
  
    word_text = input_text
    doc.add_paragraph(word_text)

    text_type = "docx"
    output_word = output_file_path + file_name + "." + text_type
    doc.save(output_word)

    print(f"文本已输出到 {output_word} 文件中。")


In [None]:
#檔案來源確認及Whisper模型大小

file_name = "Video_file"
input_type = 'mp4'
input_file_path = "C:/recording/"
input_file = input_file_path + file_name + '.' + input_type

model_type = "large"

is_word_correcting = "Y"
correct_csv = "ReplaceWord_v1.0.csv"

print (f"=== 初始音檔確認：\n{input_file}\n")
print (f"=== 即將使用 Whisper 模型：{model_type}\n")

if is_word_correcting == "Y":
    print (f"=== 確認是要改錯字，並且依字典檔改錯字：{correct_csv}\n")
else:
    print (f"=== 確認不需要要改錯字\n")


In [None]:
import warnings
warnings.filterwarnings('ignore')
warnings.simplefilter('ignore')

# 記錄開始時間 
start_time = time.time()
start_datetime = datetime.fromtimestamp(start_time)


print("===================================== ")
print("開始時間：", start_datetime)
print("===================================== \n")

# 音檔轉wav並呼叫Whisper涵數
whisper_result = my_whisper(convert_wav(file_name, input_file_path, input_type),model_type)

#加註標點符號
processed_transcript = Ch_Convert(Add_punc(Ch_Convert(whisper_result["text"],'tw2s')),'s2tw')

#確認是否要進入改錯字流程
if is_word_correcting == "Y":
    final_result = fix_wording(processed_transcript,correct_csv)
else:
    final_result = processed_transcript
    

# 轉入純文字檔 & word檔
convert_text(final_result,file_name,input_file_path)
convert_word(final_result,file_name,input_file_path)


# 記錄結束時間 
end_time = time.time()
end_datetime = datetime.fromtimestamp(end_time)

execution_time = end_time - start_time

print("===================================== ")
print("結束時間：", end_datetime)
print("程式執行時間：", execution_time/60, "分")
print("===================================== \n")


In [None]:
# 字太多，用分段來看
def print_seg(text, word_len):
    for i in range(0, len(text), word_len):
        print(text[i:i+word_len] +'\n')
        print( '=' * 5 + str(i) +  '=' * 100 +'\n')

print_seg(final_result, 500)