<a href="https://colab.research.google.com/github/anokhina-rgb/Multilingual-Corpus-for-EU-Studies/blob/main/mp3_to_text.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# Install dependencies
!pip install openai-whisper python-docx --quiet

# Imports
import whisper
from docx import Document
from google.colab import files
import os
from datetime import datetime

# --- Helper functions ---
def save_word(text, path):
    doc = Document()
    doc.add_paragraph(text)
    doc.save(path)

def download_file(path):
    files.download(path)

# --- Upload audio files ---
print("Upload your MP3/WAV audio files (multiple selection allowed)")
uploaded = files.upload()  # Opens file picker

audio_files = list(uploaded.keys())
if not audio_files:
    raise SystemExit("No files uploaded.")

# --- Load Whisper model ---
print("Loading Whisper model...")
model = whisper.load_model("small")  # You can change to base/medium/large
print("Model loaded.\n")

# --- Create folder for outputs ---
output_folder = "transcriptions_colab"
os.makedirs(output_folder, exist_ok=True)

# --- Transcribe each file ---
for idx, file in enumerate(audio_files, start=1):
    print(f"[{idx}/{len(audio_files)}] Transcribing: {file}")
    try:
        result = model.transcribe(file)
        text = result['text'].strip()

        # Save TXT
        txt_filename = os.path.join(output_folder, f"{os.path.splitext(file)[0]}.txt")
        with open(txt_filename, "w", encoding="utf-8") as f:
            f.write(text)

        # Save DOCX
        docx_filename = os.path.join(output_folder, f"{os.path.splitext(file)[0]}.docx")
        save_word(text, docx_filename)

        print(f"✅ Completed: {file}")
    except Exception as e:
        print(f"❌ Error transcribing {file}: {e}")

# --- Zip the output folder ---
import shutil
zip_filename = f"transcriptions_{datetime.now().strftime('%Y%m%d_%H%M%S')}.zip"
shutil.make_archive(zip_filename.replace('.zip',''), 'zip', output_folder)
print(f"\nAll transcriptions zipped as: {zip_filename}")

# --- Download ZIP ---
files.download(zip_filename)


[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/803.2 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m[90m━━━━━━━━━━━━━[0m [32m522.2/803.2 kB[0m [31m15.2 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m803.2/803.2 kB[0m [31m14.5 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m253.0/253.0 kB[0m [31m19.0 MB/s[0m eta [36m0:00:00[0m
[?25h  Building wheel for openai-whisper (pyproject.toml) ... [?25l[?25hdone
Upload your MP3/WAV audio files (multiple selection allowed)


Saving 09.02.01-student-conversation-directons-and-questions.mp3 to 09.02.01-student-conversation-directons-and-questions.mp3
Saving 09.03.03-student-conversation.mp3 to 09.03.03-student-conversation.mp3
Saving 09.03.04-student-conversation.mp3 to 09.03.04-student-conversation.mp3
Saving 2025-07-17-121245_173642.mp3 to 2025-07-17-121245_173642.mp3
Saving celebrity short answer.mp3 to celebrity short answer.mp3
Saving leaders.mp3 to leaders.mp3
Saving leaders1.mp3 to leaders1.mp3
Saving photo_2025-06-04_12-05-51.jpg to photo_2025-06-04_12-05-51.jpg
Saving photo_2025-06-23_17-29-12.jpg to photo_2025-06-23_17-29-12.jpg
Saving response 100 words 2025-07-17-121534_190519.mp3 to response 100 words 2025-07-17-121534_190519.mp3
Saving social media.mp3 to social media.mp3
Saving speaking.mp3 to speaking.mp3
Saving teachers vs computers.mp3 to teachers vs computers.mp3
Saving voice_file_to_txt_transcriber_by_whisper_in_google_colab.ipynb to voice_file_to_txt_transcriber_by_whisper_in_google_cola

100%|███████████████████████████████████████| 461M/461M [00:08<00:00, 57.0MiB/s]


Model loaded.

[1/17] Transcribing: 09.02.01-student-conversation-directons-and-questions.mp3




✅ Completed: 09.02.01-student-conversation-directons-and-questions.mp3
[2/17] Transcribing: 09.03.03-student-conversation.mp3




✅ Completed: 09.03.03-student-conversation.mp3
[3/17] Transcribing: 09.03.04-student-conversation.mp3




✅ Completed: 09.03.04-student-conversation.mp3
[4/17] Transcribing: 2025-07-17-121245_173642.mp3




✅ Completed: 2025-07-17-121245_173642.mp3
[5/17] Transcribing: celebrity short answer.mp3




✅ Completed: celebrity short answer.mp3
[6/17] Transcribing: leaders.mp3




✅ Completed: leaders.mp3
[7/17] Transcribing: leaders1.mp3




✅ Completed: leaders1.mp3
[8/17] Transcribing: photo_2025-06-04_12-05-51.jpg
❌ Error transcribing photo_2025-06-04_12-05-51.jpg: Failed to load audio: ffmpeg version 4.4.2-0ubuntu0.22.04.1 Copyright (c) 2000-2021 the FFmpeg developers
  built with gcc 11 (Ubuntu 11.2.0-19ubuntu1)
  configuration: --prefix=/usr --extra-version=0ubuntu0.22.04.1 --toolchain=hardened --libdir=/usr/lib/x86_64-linux-gnu --incdir=/usr/include/x86_64-linux-gnu --arch=amd64 --enable-gpl --disable-stripping --enable-gnutls --enable-ladspa --enable-libaom --enable-libass --enable-libbluray --enable-libbs2b --enable-libcaca --enable-libcdio --enable-libcodec2 --enable-libdav1d --enable-libflite --enable-libfontconfig --enable-libfreetype --enable-libfribidi --enable-libgme --enable-libgsm --enable-libjack --enable-libmp3lame --enable-libmysofa --enable-libopenjpeg --enable-libopenmpt --enable-libopus --enable-libpulse --enable-librabbitmq --enable-librubberband --enable-libshine --enable-libsnappy --enable-libsoxr



✅ Completed: response 100 words 2025-07-17-121534_190519.mp3
[11/17] Transcribing: social media.mp3




✅ Completed: social media.mp3
[12/17] Transcribing: speaking.mp3




✅ Completed: speaking.mp3
[13/17] Transcribing: teachers vs computers.mp3




✅ Completed: teachers vs computers.mp3
[14/17] Transcribing: voice_file_to_txt_transcriber_by_whisper_in_google_colab.ipynb
❌ Error transcribing voice_file_to_txt_transcriber_by_whisper_in_google_colab.ipynb: Failed to load audio: ffmpeg version 4.4.2-0ubuntu0.22.04.1 Copyright (c) 2000-2021 the FFmpeg developers
  built with gcc 11 (Ubuntu 11.2.0-19ubuntu1)
  configuration: --prefix=/usr --extra-version=0ubuntu0.22.04.1 --toolchain=hardened --libdir=/usr/lib/x86_64-linux-gnu --incdir=/usr/include/x86_64-linux-gnu --arch=amd64 --enable-gpl --disable-stripping --enable-gnutls --enable-ladspa --enable-libaom --enable-libass --enable-libbluray --enable-libbs2b --enable-libcaca --enable-libcdio --enable-libcodec2 --enable-libdav1d --enable-libflite --enable-libfontconfig --enable-libfreetype --enable-libfribidi --enable-libgme --enable-libgsm --enable-libjack --enable-libmp3lame --enable-libmysofa --enable-libopenjpeg --enable-libopenmpt --enable-libopus --enable-libpulse --enable-librabbi



✅ Completed: writing.mp3
[16/17] Transcribing: Відео WhatsApp, дата_ 2025-08-21 о 18.03.17_a7a28209.mp4




✅ Completed: Відео WhatsApp, дата_ 2025-08-21 о 18.03.17_a7a28209.mp4
[17/17] Transcribing: Відео WhatsApp, дата_ 2025-08-21 о 18.03.18_0863234b.mp4




✅ Completed: Відео WhatsApp, дата_ 2025-08-21 о 18.03.18_0863234b.mp4

All transcriptions zipped as: transcriptions_20250902_071409.zip


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>