# stable-whisper48: 用于云端运行stable-whisper的Jupyter Notebook脚本


In [None]:
'''
This script is modified minorly from N46Whisper project of Ayanaminn under MIT license, see the copyright notice at the end of this notebook. 

MIT License

Copyright (c) 2022 Ayanaminn

Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
'''

#@markdown **挂载你的谷歌网盘/Mount Google Drive** 
#@markdown **</br>【重要】:** 务必在"修改"->"笔记本设置"->"硬件加速器"中选择GPU！否则处理速度会非常慢。在开始运行脚本之前，请先将您要转录的音频文件上传到谷歌云盘中。
#@markdown **</br>【IMPORTANT】:** Make sure you select GPU as hardware accelerator in notebook settings, otherwise the processing speed will be very slow. Please upload the audio file you want to transcribe before using this script. 
! pip install geemap
from google.colab import drive
from google.colab import files
import os
from IPython.display import clear_output
import geemap

clear_output()
drive.mount('/drive')
print('Google Drive mounted, please execute next cell')
print('谷歌云盘挂载完毕，请执行下个单元格')

In [None]:
#@markdown **配置stable-whisper/Setup stable-whisper**

! pip install -U stable-ts
clear_output()
print('Stable-whisper installed, please execute next cell')
print('语音识别库配置完毕，请执行下个单元格')

In [None]:
#@markdown **从谷歌网盘选择文件/Select File From Google Drive**

# @markdown <font size="2">Navigate to the file you want to transcribe, left-click to highlight the file, then click 'Select' button to confirm.
# @markdown <br/>从网盘目录中选择要转换的文件(视频/音频），单击选中文件，点击'Select'按钮以确认。</font><br/>
# @markdown <br/><font size="2">If use local file, ignore this cell and move to the next.
# @markdown <br/>若希望从本地上传文件，则跳过此步执行下一单元格。</font><br/>
# @markdown <br/><font size="2">If file uploaded to drive after execution, execute this cell again to refresh.
# @markdown <br/>若到这一步才上传文件到谷歌盘，则重复执行本单元格以刷新文件列表。</font>
from ipytree import Tree, Node
import ipywidgets as widgets
from ipywidgets import interactive
import os
from google.colab import output 
output.enable_custom_widget_manager()
use_drive = True
global drive_dir
drive_dir = ''

def file_tree():
    # create widgets as a simple file browser
    full_widget = widgets.HBox()
    left_widget = widgets.VBox()
    right_widget = widgets.VBox()

    path_widget = widgets.Text()
    path_widget.layout.min_width = '300px'
    select_widget = widgets.Button(
      description='Select', button_style='primary', tooltip='Select current media file.'
      )
    drive_url = widgets.Output()

    right_widget.children = [select_widget]
    full_widget.children = [left_widget]

    tree_widget = widgets.Output()
    tree_widget.layout.max_width = '300px'
    tree_widget.overflow = 'auto'

    left_widget.children = [path_widget,tree_widget]

    # init file tree
    my_tree = Tree(multiple_selection=False)
    my_tree_dict = {}
    media_names = []

    def select_file(b):
        global drive_dir 
        drive_dir = path_widget.value
        # full_widget.disabled = True
        clear_output()
        print('File selected, please execute next cell')
        print('已选择文件，请执行下个单元格')
    #     if (out_file not in my_tree_dict.keys()) and (out_dir in my_tree_dict.keys()):
    #         node = Node(os.path.basename(out_file))
    #         my_tree_dict[out_file] = node
    #         parent_node = my_tree_dict[out_dir]
    #         parent_node.add_node(node)

    select_widget.on_click(select_file)

    def handle_file_click(event):
        if event['new']:
            cur_node = event['owner']
            for key in my_tree_dict.keys():
                if (cur_node is my_tree_dict[key]) and (os.path.isfile(key)):
                    try:
                        with open(key) as f:
                            path_widget.value = key
                            path_widget.disabled = False
                            select_widget.disabled = False
                            full_widget.children = [left_widget, right_widget]
                    except Exception as e:
                        path_widget.value = key
                        path_widget.disabled = True
                        select_widget.disabled = True

                        return

    def handle_folder_click(event):
        if event['new']:
            full_widget.children = [left_widget]

    # redirect cwd to default drive root path and add nodes
    my_dir = '/drive/MyDrive'
    my_root_name = my_dir.split('/')[-1]
    my_root_node = Node(my_root_name)
    my_tree_dict[my_dir] = my_root_node
    my_tree.add_node(my_root_node)
    my_root_node.observe(handle_folder_click, 'selected')

    for root, d_names, f_names in os.walk(my_dir):
        folders = root.split('/')
        for folder in folders:
            if folder.startswith('.'):
                continue
        for d_name in d_names:
            if d_name.startswith('.'):
                d_names.remove(d_name)
        for f_name in f_names:
            # if f_name.startswith('.'):
            #     f_names.remove(f_name)
            # only add media files
            if f_name.endswith(('mp3','m4a','flac','aac','wav','mp4','mkv','ts','flv')):
                media_names.append(f_name)

        d_names.sort()
        f_names.sort()
        media_names.sort()
        keys = my_tree_dict.keys()

        if root not in my_tree_dict.keys():
          # print(f'root name is {root}') # folder path
          name = root.split('/')[-1] # folder name
          # print(f'folder name is {name}')
          dir_name = os.path.dirname(root) # parent path of folder
          # print(f'dir name is {dir_name}')
          parent_node = my_tree_dict[dir_name]
          node = Node(name)
          my_tree_dict[root] = node
          parent_node.add_node(node)
          node.observe(handle_folder_click, 'selected')

        if len(media_names) > 0:
              parent_node = my_tree_dict[root] # parent folders
              # print(parent_node)
              parent_node.opened = False
              for f_name in media_names:
                  node = Node(f_name)
                  node.icon = 'file' 
                  full_path = os.path.join(root, f_name)
                  # print(full_path)
                  my_tree_dict[full_path] = node
                  parent_node.add_node(node)
                  node.observe(handle_file_click, 'selected')
        media_names.clear()

    with tree_widget:
      tree_widget.clear_output()
      display(my_tree)

    return full_widget


tree= file_tree()
tree


In [None]:
#@markdown **从本地上传文件/Upload Local File**
# @markdown <br/><font size="2">If use file in google drive, ignore this cell and move to the next.
# @markdown <br/>若已选择谷歌盘中的文件，则跳过此步执行下一单元格。</font>

from google.colab import files
use_drive = False
uploaded = files.upload()
file_name = list(uploaded.keys())[0]

In [None]:
#@markdown **进行参数设置/Parameter setup**
# @markdown <br/><font size="2">If use file in google drive, ignore this cell and move to the next.
# @markdown <br/>若已选择谷歌盘中的文件，则跳过此步执行下一单元格。</font>

model_size = "large-v2"  # @param ["base","small","medium", "large", "large-v1", "large-v2"]
language = "ja"  # @param {type:"string"}
#sub_style = "default"  # @param ["default", "ikedaCN", "kaedeCN","sugawaraCN"]
#compression_ratio_threshold = 2.4 # @param {type:"number"}
#no_speech_threshold = 0.6 # @param {type:"number"}
#logprob_threshold = -1.0 # @param {type:"number"}
#condition_on_previous_text = "True" # @param ["True", "False"]

In [None]:
import os
import ffmpeg
import subprocess
import torch
import stable_whisper
import time
from pathlib import Path
import sys

try:
  file_name = drive_dir
  # print(file_name)
  file_basename = file_name.split('/')[-1]
  # print(file_basename)
  output_dir = os.path.dirname(drive_dir)
  # print(output_dir)
except Exception as e:
  print(f'error: {e}')

audio_file = file_name

device = "cuda"
torch.cuda.empty_cache()
print('加载Whisper模型 Loading Whisper model...')
model = stable_whisper.load_model(model_size, device)

# stable-whisper transcribe
tic = time.time()
print('识别中 Transcribe in progress...')
results = model.transcribe(audio_file, language =language)
print('识别完毕 Done')
stable_whisper.results_to_sentence_srt(results, f'{file_name}.srt')

toc = time.time()
print('字幕生成完毕 Subtitle generated!')
print(f'Time consumpution {toc-tic}s')

torch.cuda.empty_cache()

<font size="2">  

Last modified 2023-03-08  
  
Author: ifeimi ♢ Email me: yfwu0202 AT gmail.com

