<a href="https://colab.research.google.com/github/bigjohncodes/audio-transcriber/blob/main/Untitled1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [10]:
import os
import yt_dlp
import whisper
from tqdm import tqdm
from google.colab import files

# Define working folders
folder_audio = "audios"
folder_transcriptions = "transcriptions"
url_file = "vimeo.txt"

# Create folders if they don't exist
os.makedirs(folder_audio, exist_ok=True)
os.makedirs(folder_transcriptions, exist_ok=True)

# Load Whisper model
model = whisper.load_model("base")

# Upload cookies file (run this once)
print("📤 Please upload your cookies.txt file")
uploaded = files.upload()  # This will prompt you to upload cookies.txt

def download_vimeo_audio(url, output_path):
    """Downloads only the audio from the Vimeo video and saves it to the specified location."""
    options = {
        "format": "bestaudio",
        "outtmpl": output_path,
        "cookiefile": "vimeo.com_cookies.txt",  # Use the uploaded cookies file
        "postprocessors": [
            {
                "key": "FFmpegExtractAudio",
                "preferredcodec": "mp3",
                "preferredquality": "192"
            }
        ],
        "quiet": False,
        "no_warnings": False,
    }
    with yt_dlp.YoutubeDL(options) as ydl:
        ydl.download([url])


def transcribe_audio(audio_path, output_text_path):
    """Transcribes the audio and saves the result to a text file."""
    print(f"🎤 Transcribing: {audio_path}")
    result = model.transcribe(audio_path)
    with open(output_text_path, "w", encoding="utf-8") as text_file:
        text_file.write(result["text"])


def process_urls():
    """Reads URLs, downloads audio, transcribes it, and deletes the audio afterwards."""
    with open(url_file, "r") as f:
        urls = [line.strip() for line in f.readlines() if line.strip()]

    for url in tqdm(urls, desc="Processing videos"):
        print(f"\n🔽 Processing: {url}")

        # Determine output file name
        output_audio_path = os.path.join(folder_audio, "%(title)s.%(ext)s")

        try:
            download_vimeo_audio(url, output_audio_path)
        except Exception as e:
            print(f"❌ Error downloading {url}: {e}")
            continue

        # Find the downloaded file
        mp3_files = [f for f in os.listdir(folder_audio) if f.endswith(".mp3")]
        if not mp3_files:
            print(f"⚠️ Downloaded file not found for {url}")
            continue

        audio_file = os.path.join(folder_audio, mp3_files[0])
        output_text_path = os.path.join(folder_transcriptions, f"{os.path.splitext(mp3_files[0])[0]}.txt")

        try:
            # Transcribe and delete the audio
            transcribe_audio(audio_file, output_text_path)
            os.remove(audio_file)
            print(f"✅ Transcription saved and audio deleted: {output_text_path}")
        except Exception as e:
            print(f"❌ Error transcribing {audio_file}: {e}")


if __name__ == "__main__":
    process_urls()

📤 Please upload your cookies.txt file


Saving vimeo.com_cookies.txt to vimeo.com_cookies (3).txt


Processing videos:   0%|          | 0/3 [00:00<?, ?it/s]


🔽 Processing: https://vimeo.com/910802846
[vimeo] Extracting URL: https://vimeo.com/910802846
[vimeo] 910802846: Downloading webpage
[vimeo] Downloading web token info
[vimeo] 910802846: Downloading web API JSON
[vimeo] 910802846: Downloading JSON metadata
[vimeo] 910802846: Downloading akfire_interconnect_quic m3u8 information
[vimeo] 910802846: Downloading fastly_skyfire m3u8 information
[vimeo] 910802846: Downloading akfire_interconnect_quic MPD information




[vimeo] 910802846: Downloading fastly_skyfire MPD information




[vimeo] 910802846: Loading download config JSON
[vimeo] 910802846: Downloading web API JSON
[vimeo] 910802846: Downloading web API JSON
[info] 910802846: Downloading 1 format(s): hls-fastly_skyfire-audio-high-Èdè_Yorùbá
[hlsnative] Downloading m3u8 manifest
[hlsnative] Total fragments: 12
[download] Destination: audios/Quadratic Equations-Mathematics-JSS3.mp4
[download] 100% of    1.63MiB in 00:00:00 at 3.14MiB/s                   
[FixupM3u8] Fixing MPEG-TS in MP4 container of "audios/Quadratic Equations-Mathematics-JSS3.mp4"
[ExtractAudio] Destination: audios/Quadratic Equations-Mathematics-JSS3.mp3
Deleting original file audios/Quadratic Equations-Mathematics-JSS3.mp4 (pass -k to keep)
🎤 Transcribing: audios/Quadratic Equations-Mathematics-JSS3.mp3


Processing videos:  33%|███▎      | 1/3 [00:23<00:47, 23.88s/it]

✅ Transcription saved and audio deleted: transcriptions/Quadratic Equations-Mathematics-JSS3.txt

🔽 Processing: https://vimeo.com/872924274
[vimeo] Extracting URL: https://vimeo.com/872924274
[vimeo] 872924274: Downloading webpage
[vimeo] Downloading web token info
[vimeo] 872924274: Downloading web API JSON
[vimeo] 872924274: Downloading JSON metadata
[vimeo] 872924274: Downloading akfire_interconnect_quic m3u8 information
[vimeo] 872924274: Downloading fastly_skyfire m3u8 information
[vimeo] 872924274: Downloading akfire_interconnect_quic MPD information




[vimeo] 872924274: Downloading fastly_skyfire MPD information




[vimeo] 872924274: Loading download config JSON
[vimeo] 872924274: Downloading web API JSON
[vimeo] 872924274: Downloading web API JSON
[info] 872924274: Downloading 1 format(s): hls-fastly_skyfire-audio-high-Èdè_Yorùbá
[hlsnative] Downloading m3u8 manifest
[hlsnative] Total fragments: 20
[download] Destination: audios/Introduction to Chemistry-Chemistry-SS2.mp4
[download] 100% of    2.86MiB in 00:00:00 at 3.22MiB/s                   
[FixupM3u8] Fixing MPEG-TS in MP4 container of "audios/Introduction to Chemistry-Chemistry-SS2.mp4"
[ExtractAudio] Destination: audios/Introduction to Chemistry-Chemistry-SS2.mp3
Deleting original file audios/Introduction to Chemistry-Chemistry-SS2.mp4 (pass -k to keep)
🎤 Transcribing: audios/Introduction to Chemistry-Chemistry-SS2.mp3


Processing videos:  67%|██████▋   | 2/3 [01:09<00:36, 36.94s/it]

✅ Transcription saved and audio deleted: transcriptions/Introduction to Chemistry-Chemistry-SS2.txt

🔽 Processing: https://vimeo.com/1118250162
[vimeo] Extracting URL: https://vimeo.com/1118250162
[vimeo] 1118250162: Downloading webpage
[vimeo] Downloading web token info
[vimeo] 1118250162: Downloading web API JSON
[vimeo] 1118250162: Downloading JSON metadata
[vimeo] 1118250162: Downloading akfire_interconnect_quic m3u8 information
[vimeo] 1118250162: Downloading fastly_skyfire m3u8 information
[vimeo] 1118250162: Downloading akfire_interconnect_quic MPD information




[vimeo] 1118250162: Downloading fastly_skyfire MPD information




[vimeo] 1118250162: Loading download config JSON
[vimeo] 1118250162: Downloading web API JSON
[vimeo] 1118250162: Downloading web API JSON
[info] 1118250162: Downloading 1 format(s): hls-fastly_skyfire-audio-high-Original
[hlsnative] Downloading m3u8 manifest
[hlsnative] Total fragments: 208
[download] Destination: audios/Galvanometer Conversion-Physics-SS3.mp4
[download] 100% of   29.68MiB in 00:00:09 at 3.07MiB/s                   
[FixupM3u8] Fixing MPEG-TS in MP4 container of "audios/Galvanometer Conversion-Physics-SS3.mp4"
[ExtractAudio] Destination: audios/Galvanometer Conversion-Physics-SS3.mp3
Deleting original file audios/Galvanometer Conversion-Physics-SS3.mp4 (pass -k to keep)
🎤 Transcribing: audios/Galvanometer Conversion-Physics-SS3.mp3


Processing videos: 100%|██████████| 3/3 [06:29<00:00, 129.88s/it]

✅ Transcription saved and audio deleted: transcriptions/Galvanometer Conversion-Physics-SS3.txt



