<a href="https://colab.research.google.com/github/anokhina-rgb/Google-Colabs/blob/main/mp3_splitting.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# 1️⃣ Install required packages
!pip install -q pydub==0.25.1 torch torchaudio git+https://github.com/linto-ai/whisper-timestamped

# 2️⃣ Import libraries
import os
from pydub import AudioSegment
import whisper_timestamped as whisper
import torch
from google.colab import files
import zipfile

# 3️⃣ User configuration
pause_seconds = 10               # pause length between segments
model_size = "base"              # Whisper model: tiny/base/small/medium/large
device = "cuda" if torch.cuda.is_available() else "cpu"
pause_ms = pause_seconds * 1000

# Create temporary folders in Colab
input_folder = "/content/mp3_input"
output_folder = "/content/mp3_output"
os.makedirs(input_folder, exist_ok=True)
os.makedirs(output_folder, exist_ok=True)

# 4️⃣ Upload multiple MP3 files from your computer
print("Select MP3 files to upload:")
uploaded = files.upload()  # opens file picker
for filename in uploaded.keys():
    os.rename(filename, os.path.join(input_folder, filename))

print(f"\n{len(uploaded)} file(s) uploaded successfully.\n")

# 5️⃣ Load Whisper model
print(f"Loading Whisper model ({model_size}) on device: {device} ...")
model = whisper.load_model(model_size, device=device)
print("Model loaded successfully.\n")

# 6️⃣ Process all uploaded MP3 files
files_list = [f for f in os.listdir(input_folder) if f.lower().endswith(".mp3")]

for idx, file_name in enumerate(files_list, 1):
    print(f"Processing {file_name} ({idx}/{len(files_list)})...")

    in_path = os.path.join(input_folder, file_name)
    out_path = os.path.join(output_folder, os.path.splitext(file_name)[0] + "__pause.mp3")

    try:
        # Load audio
        original = AudioSegment.from_file(in_path)
        audio = whisper.load_audio(in_path)

        # Transcribe audio
        result = whisper.transcribe(model, audio, language=None)
        segments = result.get("segments", [])

        # Prepare silent pauses
        silence = AudioSegment.silent(duration=pause_ms)
        final_audio = AudioSegment.empty()

        if not segments:
            print("⚠️ No segments detected, saving original file.")
            final_audio = original
        else:
            for i, seg in enumerate(segments):
                start_ms = int(seg["start"] * 1000)
                end_ms = int(seg["end"] * 1000)
                chunk = original[start_ms:end_ms]
                final_audio += chunk
                if i != len(segments) - 1:
                    final_audio += silence
                print(f"  Segment {i+1}/{len(segments)} processed")

        # Export processed file
        final_audio.export(out_path, format="mp3")
        print(f"✅ Saved: {out_path}\n")

    except Exception as e:
        print(f"❌ Error processing {file_name}: {e}")

print("🎉 All files processed successfully!\n")

# 7️⃣ Create a ZIP of all processed files
zip_path = "/content/processed_mp3_files.zip"
with zipfile.ZipFile(zip_path, 'w') as zipf:
    for f in os.listdir(output_folder):
        zipf.write(os.path.join(output_folder, f), arcname=f)

# 8️⃣ Download the ZIP file
print("Downloading all processed files as ZIP...")
files.download(zip_path)


  Preparing metadata (setup.py) ... [?25l[?25hdone
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m48.1/48.1 kB[0m [31m2.0 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m803.2/803.2 kB[0m [31m38.8 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m789.1/789.1 kB[0m [31m43.7 MB/s[0m eta [36m0:00:00[0m
[?25h  Building wheel for whisper-timestamped (setup.py) ... [?25l[?25hdone
  Building wheel for openai-whisper (pyproject.toml) ... [?25l[?25hdone


  m = re.match('([su]([0-9]{1,2})p?) \(([0-9]{1,2}) bit\)$', token)
  m2 = re.match('([su]([0-9]{1,2})p?)( \(default\))?$', token)
  elif re.match('(flt)p?( \(default\))?$', token):
  elif re.match('(dbl)p?( \(default\))?$', token):


Importing the dtw module. When using in academic works please cite:
  T. Giorgino. Computing and Visualizing Dynamic Time Warping Alignments in R: The dtw Package.
  J. Stat. Soft., doi:10.18637/jss.v031.i07.

Select MP3 files to upload:


Saving 3_2_fashion_10 sentences_English.mp3 to 3_2_fashion_10 sentences_English.mp3
Saving 4_1__A-new-vaccineSI.mp3 to 4_1__A-new-vaccineSI.mp3
Saving 4_2_A-new-vaccine10 sec.mp3 to 4_2_A-new-vaccine10 sec.mp3
Saving 5_1_освітня реформа_10 речень_українська.mp3 to 5_1_освітня реформа_10 речень_українська.mp3
Saving 5_2_educatrion reform_10sentences_English.mp3 to 5_2_educatrion reform_10sentences_English.mp3
Saving 6_1_Клара _Кідалова_укр.mp3 to 6_1_Клара _Кідалова_укр.mp3
Saving 6_1_плани післявоєнної рекунстукції 5 речень Українська.mp3 to 6_1_плани післявоєнної рекунстукції 5 речень Українська.mp3
Saving 6_3_blogging.mp4 to 6_3_blogging.mp4
Saving 7_1_спорт 5 речень українська.mp3 to 7_1_спорт 5 речень українська.mp3
Saving 7_2_sports in 5 sentences English.mp3 to 7_2_sports in 5 sentences English.mp3
Saving 8_1_Welcome speech.mp3 to 8_1_Welcome speech.mp3
Saving 8_2_Welcome speec+.mp3 to 8_2_Welcome speec+.mp3
Saving 8_2_укр монтессорі.webm to 8_2_укр монтессорі.webm
Saving 8_3_Mon

100%|████████████████████████████████████████| 139M/139M [00:00<00:00, 193MiB/s]


Model loaded successfully.

Processing 5_2_educatrion reform_10sentences_English.mp3 (1/12)...
Detected language: English


100%|██████████| 13208/13208 [00:24<00:00, 546.52frames/s]


  Segment 1/10 processed
  Segment 2/10 processed
  Segment 3/10 processed
  Segment 4/10 processed
  Segment 5/10 processed
  Segment 6/10 processed
  Segment 7/10 processed
  Segment 8/10 processed
  Segment 9/10 processed
  Segment 10/10 processed
✅ Saved: /content/mp3_output/5_2_educatrion reform_10sentences_English__pause.mp3

Processing 7_2_sports in 5 sentences English.mp3 (2/12)...
Detected language: English


100%|██████████| 6616/6616 [00:11<00:00, 570.45frames/s]


  Segment 1/5 processed
  Segment 2/5 processed
  Segment 3/5 processed
  Segment 4/5 processed
  Segment 5/5 processed
✅ Saved: /content/mp3_output/7_2_sports in 5 sentences English__pause.mp3

Processing 6_1_плани післявоєнної рекунстукції 5 речень Українська.mp3 (3/12)...
Detected language: Ukrainian


100%|██████████| 6899/6899 [00:14<00:00, 462.08frames/s]


  Segment 1/5 processed
  Segment 2/5 processed
  Segment 3/5 processed
  Segment 4/5 processed
  Segment 5/5 processed
✅ Saved: /content/mp3_output/6_1_плани післявоєнної рекунстукції 5 речень Українська__pause.mp3

Processing 4_2_A-new-vaccine10 sec.mp3 (4/12)...
Detected language: English


100%|██████████| 5802/5802 [00:11<00:00, 509.18frames/s]


  Segment 1/5 processed
  Segment 2/5 processed
  Segment 3/5 processed
  Segment 4/5 processed
  Segment 5/5 processed
✅ Saved: /content/mp3_output/4_2_A-new-vaccine10 sec__pause.mp3

Processing 8_2_Welcome speec+.mp3 (5/12)...
Detected language: English


100%|██████████| 11795/11795 [00:30<00:00, 385.45frames/s]


  Segment 1/18 processed
  Segment 2/18 processed
  Segment 3/18 processed
  Segment 4/18 processed
  Segment 5/18 processed
  Segment 6/18 processed
  Segment 7/18 processed
  Segment 8/18 processed
  Segment 9/18 processed
  Segment 10/18 processed
  Segment 11/18 processed
  Segment 12/18 processed
  Segment 13/18 processed
  Segment 14/18 processed
  Segment 15/18 processed
  Segment 16/18 processed
  Segment 17/18 processed
  Segment 18/18 processed
✅ Saved: /content/mp3_output/8_2_Welcome speec+__pause.mp3

Processing 7_1_спорт 5 речень українська.mp3 (6/12)...
Detected language: Russian


100%|██████████| 6841/6841 [00:14<00:00, 459.22frames/s]


  Segment 1/8 processed
  Segment 2/8 processed
  Segment 3/8 processed
  Segment 4/8 processed
  Segment 5/8 processed
  Segment 6/8 processed
  Segment 7/8 processed
  Segment 8/8 processed
✅ Saved: /content/mp3_output/7_1_спорт 5 речень українська__pause.mp3

Processing 8_4_Welcome speech_comments.mp3 (7/12)...
Detected language: English


100%|██████████| 11795/11795 [00:29<00:00, 398.38frames/s]


  Segment 1/18 processed
  Segment 2/18 processed
  Segment 3/18 processed
  Segment 4/18 processed
  Segment 5/18 processed
  Segment 6/18 processed
  Segment 7/18 processed
  Segment 8/18 processed
  Segment 9/18 processed
  Segment 10/18 processed
  Segment 11/18 processed
  Segment 12/18 processed
  Segment 13/18 processed
  Segment 14/18 processed
  Segment 15/18 processed
  Segment 16/18 processed
  Segment 17/18 processed
  Segment 18/18 processed
✅ Saved: /content/mp3_output/8_4_Welcome speech_comments__pause.mp3

Processing 4_1__A-new-vaccineSI.mp3 (8/12)...
Detected language: English


100%|██████████| 1802/1802 [00:06<00:00, 274.19frames/s]


  Segment 1/5 processed
  Segment 2/5 processed
  Segment 3/5 processed
  Segment 4/5 processed
  Segment 5/5 processed
✅ Saved: /content/mp3_output/4_1__A-new-vaccineSI__pause.mp3

Processing 6_1_Клара _Кідалова_укр.mp3 (9/12)...
Detected language: Ukrainian


100%|██████████| 15381/15381 [01:17<00:00, 198.46frames/s]


  Segment 1/53 processed
  Segment 2/53 processed
  Segment 3/53 processed
  Segment 4/53 processed
  Segment 5/53 processed
  Segment 6/53 processed
  Segment 7/53 processed
  Segment 8/53 processed
  Segment 9/53 processed
  Segment 10/53 processed
  Segment 11/53 processed
  Segment 12/53 processed
  Segment 13/53 processed
  Segment 14/53 processed
  Segment 15/53 processed
  Segment 16/53 processed
  Segment 17/53 processed
  Segment 18/53 processed
  Segment 19/53 processed
  Segment 20/53 processed
  Segment 21/53 processed
  Segment 22/53 processed
  Segment 23/53 processed
  Segment 24/53 processed
  Segment 25/53 processed
  Segment 26/53 processed
  Segment 27/53 processed
  Segment 28/53 processed
  Segment 29/53 processed
  Segment 30/53 processed
  Segment 31/53 processed
  Segment 32/53 processed
  Segment 33/53 processed
  Segment 34/53 processed
  Segment 35/53 processed
  Segment 36/53 processed
  Segment 37/53 processed
  Segment 38/53 processed
  Segment 39/53 proce

100%|██████████| 13940/13940 [00:32<00:00, 429.48frames/s]


  Segment 1/17 processed
  Segment 2/17 processed
  Segment 3/17 processed
  Segment 4/17 processed
  Segment 5/17 processed
  Segment 6/17 processed
  Segment 7/17 processed
  Segment 8/17 processed
  Segment 9/17 processed
  Segment 10/17 processed
  Segment 11/17 processed
  Segment 12/17 processed
  Segment 13/17 processed
  Segment 14/17 processed
  Segment 15/17 processed
  Segment 16/17 processed
  Segment 17/17 processed
✅ Saved: /content/mp3_output/5_1_освітня реформа_10 речень_українська__pause.mp3

Processing 3_2_fashion_10 sentences_English.mp3 (11/12)...
Detected language: English


100%|██████████| 10811/10811 [00:22<00:00, 486.78frames/s]


  Segment 1/11 processed
  Segment 2/11 processed
  Segment 3/11 processed
  Segment 4/11 processed
  Segment 5/11 processed
  Segment 6/11 processed
  Segment 7/11 processed
  Segment 8/11 processed
  Segment 9/11 processed
  Segment 10/11 processed
  Segment 11/11 processed
✅ Saved: /content/mp3_output/3_2_fashion_10 sentences_English__pause.mp3

Processing 8_1_Welcome speech.mp3 (12/12)...
Detected language: English


100%|██████████| 6296/6296 [00:19<00:00, 320.48frames/s]


  Segment 1/13 processed
  Segment 2/13 processed
  Segment 3/13 processed
  Segment 4/13 processed
  Segment 5/13 processed
  Segment 6/13 processed
  Segment 7/13 processed
  Segment 8/13 processed
  Segment 9/13 processed
  Segment 10/13 processed
  Segment 11/13 processed
  Segment 12/13 processed
  Segment 13/13 processed
✅ Saved: /content/mp3_output/8_1_Welcome speech__pause.mp3

🎉 All files processed successfully!

Downloading all processed files as ZIP...


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>