# Whispery: a tool to easily convert an audio/video file to text using **whisper** by *OpenAI*.

In [None]:
# @title 1. **GPU Checker** { vertical-output: true, display-mode: "form" }
import torch

if torch.cuda.is_available():
    print("GPU is available 👍, you can skip to the step 2. 🦄")
else:
    print("GPU is NOT available 👎, follow the instructions written below. 📚")


GPU is available 👍, you can skip to the step 2. 🦄



If GPU is still not available, you can try changing the runtime following these steps:

1. Click on <ins>Runtime</ins> in the top menu.
2. Choose <ins>Change runtime type</ins> from the dropdown menu.
3. In the <ins>Hardware accelerator</ins> section, select <ins>**T4 GPU**</ins>.
5. Click on <ins>Save</ins> to apply the changes.

After changing the runtime, you can run again the code snippet above to check if the GPU is available.

In [None]:
# @title 2. Install **whisper** and **ffmpeg** { display-mode: "form" }
!pip install git+https://github.com/openai/whisper.git
!sudo apt update && sudo apt install ffmpeg

Collecting git+https://github.com/openai/whisper.git
  Cloning https://github.com/openai/whisper.git to /tmp/pip-req-build-67ues9a9
  Running command git clone --filter=blob:none --quiet https://github.com/openai/whisper.git /tmp/pip-req-build-67ues9a9
  Resolved https://github.com/openai/whisper.git to commit ba3f3cd54b0e5b8ce1ab3de13e32122d0d5f98ab
  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Hit:1 https://cloud.r-project.org/bin/linux/ubuntu jammy-cran40/ InRelease
Hit:2 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64  InRelease
Hit:3 http://security.ubuntu.com/ubuntu jammy-security InRelease
Hit:4 http://archive.ubuntu.com/ubuntu jammy InRelease
Hit:5 http://archive.ubuntu.com/ubuntu jammy-updates InRelease
Hit:6 http://archive.ubuntu.com/ubuntu jammy-backports InRelease
Hit:7 https://ppa.launchpadcontent.net/c2d4u.team/c2d4u4.0+/u

In [None]:
# @title 3. **Run whisper with an audio/video file** { display-mode: "form" }
import ipywidgets as widgets
from IPython.display import display
from google.colab import files


# Language selection dropdown with search
language_dropdown = widgets.Dropdown(
    options=['Autodetect','Afrikaans', 'Albanian', 'Amharic', 'Arabic', 'Armenian', 'Assamese', 'Azerbaijani',
      'Bashkir', 'Basque', 'Belarusian', 'Bengali', 'Bosnian', 'Breton', 'Bulgarian', 'Burmese',
      'Cantonese', 'Castilian', 'Catalan', 'Chinese', 'Croatian', 'Czech', 'Danish', 'Dutch',
      'English', 'Estonian', 'Faroese', 'Finnish', 'Flemish', 'French', 'Galician', 'Georgian',
      'German', 'Greek', 'Gujarati', 'Haitian', 'Haitian Creole', 'Hausa', 'Hawaiian', 'Hebrew',
      'Hindi', 'Hungarian', 'Icelandic', 'Indonesian', 'Italian', 'Japanese', 'Javanese', 'Kannada',
      'Kazakh', 'Khmer', 'Korean', 'Lao', 'Latin', 'Latvian', 'Letzeburgesch', 'Lingala', 'Lithuanian',
      'Luxembourgish', 'Macedonian', 'Malagasy', 'Malay', 'Malayalam', 'Maltese', 'Mandarin', 'Maori',
      'Marathi', 'Moldavian', 'Moldovan', 'Mongolian', 'Myanmar', 'Nepali', 'Norwegian', 'Nynorsk',
      'Occitan', 'Panjabi', 'Pashto', 'Persian', 'Polish', 'Portuguese', 'Punjabi', 'Pushto', 'Romanian',
      'Russian', 'Sanskrit', 'Serbian', 'Shona', 'Sindhi', 'Sinhala', 'Sinhalese', 'Slovak', 'Slovenian',
      'Somali', 'Spanish', 'Sundanese', 'Swahili', 'Swedish', 'Tagalog', 'Tajik', 'Tamil', 'Tatar',
      'Telugu', 'Thai', 'Tibetan', 'Turkish', 'Turkmen', 'Ukrainian', 'Urdu', 'Uzbek', 'Valencian',
      'Vietnamese', 'Welsh', 'Yiddish', 'Yoruba'
    ],
    description='Language:',
    style={'description_width': 'initial'},
    layout={'width': 'max-content'}
)

# Model selection dropdown
model_dropdown = widgets.Dropdown(
    options=['tiny', 'base', 'small', 'medium', 'large'],
    value='medium',
    description='Model:',
    style={'description_width': 'initial'},
    layout={'width': 'max-content'}
)

# Task selection
task_checkbox = widgets.Checkbox(
    value=False,
    description='Add a Translation Task',
    style={'description_width': 'initial'}
)

# Display widgets
display(language_dropdown)
display(model_dropdown)
display(task_checkbox)

# File uploader
file_upload = files.upload()

if file_upload:
    language = '' if language_dropdown.value == 'Autodetect' else '--language ' + language_dropdown.value
    model = model_dropdown.value
    task = '--task translate' if task_checkbox.value else ''
    file_name = list(file_upload.keys())[0]  # Get the uploaded file name
    !whisper "$file_name" --model "$model" $language $task --output_dir outputs
else:
    print("No file uploaded.")

Dropdown(description='Language:', layout=Layout(width='max-content'), options=('Autodetect', 'Afrikaans', 'Alb…

Dropdown(description='Model:', index=3, layout=Layout(width='max-content'), options=('tiny', 'base', 'small', …

Checkbox(value=False, description='Add a Translation Task', style=DescriptionStyle(description_width='initial'…

In [38]:
# @title 4. **Outputs download** { display-mode: "form" }
import shutil

# Replace '/content/folder_to_download' with the path to the folder you want to download
folder_to_download = '/content/outputs'

# Replace 'folder_to_download.zip' with the desired name for the zip file
zip_file_name = 'outputs.zip'

# Create a zip file of the folder
shutil.make_archive(zip_file_name.replace('.zip', ''), 'zip', folder_to_download)

# Download the zip file
from google.colab import files
files.download(zip_file_name)

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [45]:
# @title 5. **Remove all files** { display-mode: "form" }
!rm -r *