<a href="https://colab.research.google.com/github/ddeepak95/whisper-transcript-w-diarization/blob/main/whisper-diarization.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Step 1:
# Execute the following task and upload an audio file, or files, to the content directory while you wait for the task to complete.

In [None]:
!git clone https://github.com/ddeepak95/whisper-diarization.git

In [None]:
!pip install cython

In [None]:
!pip install -c whisper-diarization/constraints.txt -r whisper-diarization/requirements.txt

In [None]:
!pip install pydub ffmpeg-python

In [None]:
import os
from pydub import AudioSegment
import math

def split_audio(audio_path, output_dir="/content/source_files", max_length_minutes=60):
    """
    Splits an audio file into chunks of a specified maximum length and saves to output directory.

    Args:
        audio_path (str): The path to the audio file.
        output_dir (str): Directory where output files will be saved.
        max_length_minutes (int): The maximum length of each chunk in minutes.
    """
    try:
        audio = AudioSegment.from_file(audio_path)
        duration_minutes = len(audio) / (1000 * 60)

        if duration_minutes > max_length_minutes:
            print(f"Audio file {audio_path} is longer than {max_length_minutes} minutes. Splitting...")
            chunk_length_ms = max_length_minutes * 60 * 1000
            num_chunks = math.ceil(duration_minutes / max_length_minutes)

            os.makedirs(output_dir, exist_ok=True)

            for i in range(num_chunks):
                start_time = i * chunk_length_ms
                end_time = (i + 1) * chunk_length_ms
                chunk = audio[start_time:end_time]

                base = os.path.splitext(os.path.basename(audio_path))[0]
                output_path = os.path.join(output_dir, f"{base}_part{i+1}.mp3")
                chunk.export(output_path, format="mp3")

                print(f"Exported chunk {i+1} to {output_path}")
        else:
            print(f"Audio file {audio_path} is {duration_minutes:.2f} minutes, no splitting needed.")

    except Exception as e:
        print(f"Error processing {audio_path}: {e}")

# Iterate through files in /content and split if necessary
content_dir = "/content"
output_dir = "/content/source_files"
for filename in os.listdir(content_dir):
    file_path = os.path.join(content_dir, filename)
    if os.path.isfile(file_path):
        split_audio(file_path, output_dir=output_dir)

# Step 2:
# ^ !!! WAIT FOR ABOVE TASK TO COMPLETE !!! ^
# ^ !!! BEFORE RESTARTING RUNTIME !!! ^
(you can also skip restarting the runtime when asked)

# Step 3:
# Once the above task has completed and all audio files have successfully been uploaded to the content directory, execute the following task.

In [None]:
import glob, os
audioFiles = glob.glob("/content/source_files/*.*")
os.chdir("/content/whisper-diarization")
for i in range(len(audioFiles)):
  for audioFile in glob.glob(audioFiles[i]):
    baseFile = os.path.splitext(audioFile)[0]
    !python diarize_parallel.py --whisper-model large-v3 -a "$audioFile" --language "en"

# Step 4:
# Download the srt and txt files from the content directory.