

> Script made by biscuits. Github: https://github.com/danrynr



# Install Whisper and it's dependencies

In [None]:
!sudo apt update && sudo apt upgrade

In [None]:
!sudo apt update && sudo apt install ffmpeg libcublas11 && pip install git+https://github.com/openai/whisper.git whisper-ctranslate2==0.4.8 ctranslate2==4.4.0 faster-whisper==1.1.0

# Create AI Subtitle

### Command Help

In [None]:
!whisper --help

usage: whisper [-h] [--model MODEL] [--model_dir MODEL_DIR] [--device DEVICE]
               [--output_dir OUTPUT_DIR] [--output_format {txt,vtt,srt,tsv,json,all}]
               [--verbose VERBOSE] [--task {transcribe,translate}]
               [--language {af,am,ar,as,az,ba,be,bg,bn,bo,br,bs,ca,cs,cy,da,de,el,en,es,et,eu,fa,fi,fo,fr,gl,gu,ha,haw,he,hi,hr,ht,hu,hy,id,is,it,ja,jw,ka,kk,km,kn,ko,la,lb,ln,lo,lt,lv,mg,mi,mk,ml,mn,mr,ms,mt,my,ne,nl,nn,no,oc,pa,pl,ps,pt,ro,ru,sa,sd,si,sk,sl,sn,so,sq,sr,su,sv,sw,ta,te,tg,th,tk,tl,tr,tt,uk,ur,uz,vi,yi,yo,yue,zh,Afrikaans,Albanian,Amharic,Arabic,Armenian,Assamese,Azerbaijani,Bashkir,Basque,Belarusian,Bengali,Bosnian,Breton,Bulgarian,Burmese,Cantonese,Castilian,Catalan,Chinese,Croatian,Czech,Danish,Dutch,English,Estonian,Faroese,Finnish,Flemish,French,Galician,Georgian,German,Greek,Gujarati,Haitian,Haitian Creole,Hausa,Hawaiian,Hebrew,Hindi,Hungarian,Icelandic,Indonesian,Italian,Japanese,Javanese,Kannada,Kazakh,Khmer,Korean,Lao,Latin,Latvian,Le

In [None]:
!whisper-ctranslate2 --help

usage: whisper-ctranslate2 [-h]
                           [--model {tiny,tiny.en,base,base.en,small,small.en,medium,medium.en,large-v1,large-v2,large-v3,large-v3-turbo,turbo,distil-large-v2,distil-large-v3,distil-medium.en,distil-small.en}]
                           [--model_directory MODEL_DIRECTORY] [--model_dir MODEL_DIR]
                           [--local_files_only LOCAL_FILES_ONLY] [--output_dir OUTPUT_DIR]
                           [--output_format {txt,vtt,srt,tsv,json,all}]
                           [--pretty_json PRETTY_JSON] [--print_colors PRINT_COLORS]
                           [--verbose VERBOSE] [--highlight_words HIGHLIGHT_WORDS]
                           [--max_line_width MAX_LINE_WIDTH] [--max_line_count MAX_LINE_COUNT]
                           [--max_words_per_line MAX_WORDS_PER_LINE] [--device {auto,cpu,cuda}]
                           [--threads THREADS] [--device_index DEVICE_INDEX]
                           [--compute_type {default,auto,int8,int8_float

### Transcribe & make subtitles

In [None]:
import os

single_input = "" #@param {type:"string"}
input_dir = "/content/drive/MyDrive/SUBS/AUDIO" #@param {type:"string"}
inputs = os.listdir(input_dir)
output_dir = "/content/drive/MyDrive/SUBS/SUBFILES" #@param {type:"string"}
outputs = os.listdir(output_dir)

model = "large-v2" #@param ["large-v3-turbo", "large-v3", "large-v2"]
threads = "16" #@param ["0", "2", "4", "8", "16"]
condition_on_previous_text = "False" #@param ["True", "False"]
no_repeat_ngram_size = "3" #@param {type:"string"}
vad_threshold = 0.4 #@param {type:"slider", min:0, max:1, step:0.025}
vad_threshold = int(vad_threshold)
vad_max_speech_duration_s = "7" #@param {type:"string"}
language = "ko" #@param ["ko", "ja"] {allow-input:true}
output_format = "srt" #@param ["txt", "vtt", "srt", "tsv", "json"]
verbose = True #@param {type:"boolean"}
print_colors = False #@param {type:"boolean"}


def whisper_run(file_path, output_dir):
  sub_file_name = os.path.splitext(
      os.path.basename(file_path))[0] + '.' + output_format
  print(f"Creating Subtitle: {sub_file_name}")

  if os.path.isfile(os.path.join(output_dir, sub_file_name)):
    print(f"Subtitle for {file_path} already exists!")
  else:
    !whisper-ctranslate2 \
    --model "$model" \
    --threads "$threads" \
    --language "$language" \
    --condition_on_previous_text "$condition_on_previous_text" \
    --no_repeat_ngram_size "$no_repeat_ngram_size" \
    --vad_filter true \
    --vad_threshold "$vad_threshold" \
    --vad_max_speech_duration_s "$vad_max_speech_duration_s" \
    --output_format "$output_format" \
    --task translate "$file_path" \
    --output_dir "$output_dir" \
    --verbose "$verbose" \
    --print_colors "$print_colors"

if len(single_input) > 0 and single_input is not None:
  whisper_run(single_input, output_dir)
  pass
else:
  inputs = [x for x in inputs if '.ipynb_checkpoints' not in x]
  outputs = [x for x in outputs if '.ipynb_checkpoints' not in x]

  print(f"Total Audio to subs: {len(inputs)}")
  print(f"Total srt files in folder: {len(outputs)}")

  for input_file in inputs:
    if input_file.startswith('.'): continue

    file_path = os.path.join(input_dir, input_file)
    whisper_run(file_path, output_dir)

# Download Subtitle

In [None]:
import os
import shutil
import datetime

sub_dir = "/content/drive/MyDrive/SUBS/SUBFILES"
zip_dir = "/content/drive/MyDrive/SUBS/ZIP"

zip_name = os.path.join(zip_dir,
f"{datetime.datetime.now().strftime('%y%m%d_%H%M')}_subtitles_biscuits")

os.makedirs(zip_dir, exist_ok=True)

shutil.make_archive(zip_name, 'zip', sub_dir)