<a href="https://colab.research.google.com/github/arinadi/colab-discord-transcriber/blob/main/Discord_Transkripsi_Bot.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
# @title 🚀 Run Transcription Bot

# ------------------------------------------------------------------------------
# SECTION 1: INSTALLATION & ASYNC SETUP
# ------------------------------------------------------------------------------
print("⏳ Installing required libraries...")
!pip install -q openai-whisper ffmpeg-python numpy torch discord.py==2.3.2 nest_asyncio requests werkzeug
print("✅ Libraries installed successfully.")

import nest_asyncio
nest_asyncio.apply()
print("✅ nest_asyncio applied.")

# ------------------------------------------------------------------------------
# SECTION 2: IMPORTS, CONFIGURATION & CORE UTILITIES
# ------------------------------------------------------------------------------
print("🌀 Importing libraries and setting up initial configuration...")

# --- Standard Library Imports ---
import discord
from discord.ext import commands
import os
import shutil
import time
import asyncio
import torch
import ffmpeg
import whisper
import requests
import re
import zipfile
from collections import defaultdict
from typing import Optional

# --- Third-party Imports ---
from werkzeug.utils import secure_filename
from google.colab import userdata, runtime

# --- 🔑 Load Secrets from Colab ---
try:
    DISCORD_BOT_TOKEN = userdata.get('DISCORD_BOT_TOKEN')
    DISCORD_WEBHOOK_URL = userdata.get('DISCORD_WEBHOOK_URL')
    DISCORD_CHANNEL_ID = userdata.get('DISCORD_CHANNEL_ID')
    if not all([DISCORD_BOT_TOKEN, DISCORD_WEBHOOK_URL, DISCORD_CHANNEL_ID]):
        raise ValueError("Please ensure all secrets (DISCORD_BOT_TOKEN, DISCORD_WEBHOOK_URL, DISCORD_CHANNEL_ID) are set in Colab.")
    print("✅ Secrets loaded successfully.")
except Exception as e:
    print(f"❌ ERROR loading secrets: {e}")
    raise SystemExit("Execution stopped due to missing secrets.")

# --- 🛠️ General Configuration ---
# Editable parameters
model_size = 'large-v2' #@param ['large-v2', 'medium', 'small', 'base']
pause_threshold_input = 0.3 #@param {type:"number"}
MAX_AUDIO_DURATION_SECONDS = 5400 # 90 minutes. Set to 0 to disable.

# Static paths and constants
UPLOAD_FOLDER = 'uploads'
TRANSCRIPT_FOLDER = 'transcripts'
os.makedirs(UPLOAD_FOLDER, exist_ok=True)
os.makedirs(TRANSCRIPT_FOLDER, exist_ok=True)

# --- 🤖 Hardware & Model Loading ---
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"✅ GPU (CUDA) detected!" if device == "cuda" else "⚠️ GPU (CUDA) not detected. Using CPU.")

model = None
try:
    print(f"⏳ Loading Whisper model '{model_size}' onto {device.upper()}...")
    model = whisper.load_model(model_size, device=device)
    print(f"✅ Whisper model '{model_size}' loaded successfully.")
except Exception as e:
    error_msg = f"❌ FAILED to load Whisper model: {e}"
    print(error_msg)
    requests.post(DISCORD_WEBHOOK_URL, json={'content': f"❌ **ERROR:** Failed to load Whisper model. Bot cannot start.\n`{e}`"})

# --- ⚙️ Core Utility Functions ---
def send_startup_notification(webhook_url, message):
    """Sends a notification message to a Discord Webhook."""
    try:
        requests.post(webhook_url, json={'content': message})
    except Exception as e:
        print(f"⚠️ Could not send startup notification: {e}")

def format_duration(seconds: float) -> str:
    """Formats seconds into a user-friendly XXm YYs string."""
    if not isinstance(seconds, (int, float)) or seconds < 0:
        return "N/A"
    minutes = int(seconds // 60)
    remaining_seconds = int(seconds % 60)
    return f"{minutes}m {remaining_seconds:02d}s"

def format_transcription_with_pauses(result: dict, pause_threshold: float = 0.3) -> str:
    """Formats the transcription result, adding newlines for significant pauses."""
    formatted_text, previous_end = "", 0.0
    for segment in result["segments"]:
        start, text = segment["start"], segment["text"].strip()
        if (start - previous_end) > pause_threshold:
            formatted_text += "\n\n"
        formatted_text += text + " "
        previous_end = segment["end"]
    return formatted_text.strip()

# ------------------------------------------------------------------------------
# SECTION 3: CORE CLASSES FOR JOB MANAGEMENT
# ------------------------------------------------------------------------------
print("🏛️ Defining core architecture classes...")

class TranscriptionJob:
    """A data class representing a single transcription task."""
    def __init__(self, message: discord.Message, original_filename: str, local_filepath: str, audio_duration: float):
        self.job_id: int = int(time.time() * 1000) # Milliseconds for more uniqueness
        self.message: discord.Message = message
        self.author: discord.Member = message.author
        self.status: str = "queued"
        self.original_filename: str = original_filename
        self.local_filepath: str = local_filepath
        self.audio_duration: float = audio_duration
        print(f"[JOB: {self.job_id}] New job object created for '{self.original_filename}'.")

class JobManager:
    """Manages the entire lifecycle of transcription jobs."""
    def __init__(self, bot: commands.Bot):
        self.bot = bot
        self.job_queue = asyncio.Queue()
        self.active_jobs = {}
        print("✅ JobManager initialized.")

    async def add_job(self, job: TranscriptionJob):
        """Adds a new job to the queue and sends a confirmation message."""
        self.active_jobs[job.job_id] = job
        await self.job_queue.put(job)
        queue_position = self.job_queue.qsize()

        # Send the "Queued" message. We no longer need to store it.
        content = f"✅ `[ID: {job.job_id}]` Your file `{job.original_filename}` has been added to the queue (Position: **#{queue_position}**)."
        await job.message.channel.send(content)
        print(f"[JOB: {job.job_id}] Added to queue at position {queue_position}.")

    def complete_job(self, job_id: int):
        """Removes a job from the active jobs dictionary after completion."""
        if job_id in self.active_jobs:
            del self.active_jobs[job_id]
            print(f"[JOB: {job_id}] Job completed and removed from active list.")


# ------------------------------------------------------------------------------
# SECTION 4: FILE HANDLING & PRE-FLIGHT VALIDATION
# ------------------------------------------------------------------------------
print("📁 Setting up file handling and validation logic...")

class FilesHandler:
    """Handles all incoming file attachments, including validation and queueing."""
    def __init__(self, job_manager: JobManager, upload_folder: str):
        self.job_manager = job_manager
        self.upload_folder = upload_folder
        self.chunk_regex = re.compile(r'\.zip\.\d{3}$')
        print("✅ FilesHandler initialized.")

    async def _validate_and_queue_file(self, local_path: str, original_filename: str, message: discord.Message):
        """Performs pre-flight checks and, if valid, creates and queues a job."""
        temp_job_id = int(time.time() * 1000)
        print(f"[TEMP_JOB: {temp_job_id}] Starting validation for '{original_filename}'...")

        try:
            # Check 1: Can ffmpeg probe the file? (Is it valid media?)
            probe = await asyncio.to_thread(ffmpeg.probe, local_path)
            duration = float(probe['format']['duration'])
            print(f"[TEMP_JOB: {temp_job_id}] Validation: ffmpeg probe successful. Duration: {duration:.2f}s.")

            # Check 2: Is the duration within limits?
            if MAX_AUDIO_DURATION_SECONDS > 0 and duration > MAX_AUDIO_DURATION_SECONDS:
                error_msg = f"File duration ({format_duration(duration)}) exceeds the maximum allowed ({format_duration(MAX_AUDIO_DURATION_SECONDS)})."
                print(f"[TEMP_JOB: {temp_job_id}] Validation FAILED: {error_msg}")
                await message.reply(f"❌ Could not process `{original_filename}`. **Reason:** {error_msg}")
                os.remove(local_path)
                return

            # All checks passed, create and add the job
            job = TranscriptionJob(message, original_filename, local_path, duration)
            await self.job_manager.add_job(job)

        except ffmpeg.Error as e:
            print(f"[TEMP_JOB: {temp_job_id}] Validation FAILED: ffmpeg could not probe the file. Error: {e.stderr.decode('utf-8') if e.stderr else 'Unknown ffmpeg error'}")
            await message.reply(f"❌ Could not process `{original_filename}`. It appears to be a corrupted or unsupported media file.")
            os.remove(local_path)
        except Exception as e:
            print(f"[TEMP_JOB: {temp_job_id}] Validation FAILED: An unexpected error occurred. Error: {e}")
            await message.reply(f"❌ An unexpected error occurred while validating `{original_filename}`.")
            if os.path.exists(local_path):
                os.remove(local_path)

    async def handle_attachments(self, message: discord.Message):
        """Entry point for processing attachments from a message."""
        chunks = defaultdict(list)
        other_files = []
        for att in message.attachments:
            if self.chunk_regex.search(att.filename):
                base_name = att.filename.rsplit('.zip.', 1)[0]
                chunks[base_name].append(att)
            else:
                other_files.append(att)

        for base_name, chunk_list in chunks.items():
            await self._process_chunk_group(f"{base_name}.zip", chunk_list, message)

        for att in other_files:
            await self._process_single_attachment(att, message)

    async def _process_chunk_group(self, final_zip_name: str, chunk_list: list, message: discord.Message):
        """Merges split zip files and processes the result."""
        chunk_list.sort(key=lambda x: x.filename)
        status_msg = await message.channel.send(f"🧩 Merging **{len(chunk_list)}** parts for `{final_zip_name}`...")
        combined_zip_path = os.path.join(self.upload_folder, f"{int(time.time())}_{secure_filename(final_zip_name)}")

        try:
            with open(combined_zip_path, 'wb') as dest_file:
                for chunk_att in chunk_list:
                    temp_chunk_path = os.path.join(self.upload_folder, chunk_att.filename)
                    await chunk_att.save(temp_chunk_path)
                    with open(temp_chunk_path, 'rb') as src_file:
                        shutil.copyfileobj(src_file, dest_file)
                    os.remove(temp_chunk_path)

            await status_msg.edit(content=f"🗜️ Extracting files from merged `{final_zip_name}`...")
            await self._extract_and_queue_zip(combined_zip_path, final_zip_name, message)

        except Exception as e:
            await message.channel.send(f"❌ Failed to merge chunks for `{final_zip_name}`: `{e}`")
        finally:
            if os.path.exists(combined_zip_path):
                os.remove(combined_zip_path)

    async def _process_single_attachment(self, attachment: discord.Attachment, message: discord.Message):
        """Processes a single attachment, either a zip or a regular file."""
        local_path = os.path.join(self.upload_folder, f"{int(time.time())}_{secure_filename(attachment.filename)}")
        await attachment.save(local_path)

        if attachment.filename.lower().endswith('.zip'):
            await message.channel.send(f"🗜️ Extracting files from `{attachment.filename}`...")
            await self._extract_and_queue_zip(local_path, attachment.filename, message)
            os.remove(local_path) # Clean up original zip
        else:
            await self._validate_and_queue_file(local_path, attachment.filename, message)

    async def _extract_and_queue_zip(self, zip_path: str, original_zip_name: str, message: discord.Message):
        """Extracts a zip file and validates/queues each contained file."""
        extract_dir = os.path.join(self.upload_folder, f"extract_{int(time.time())}")
        try:
            with zipfile.ZipFile(zip_path, 'r') as zip_ref:
                await asyncio.to_thread(zip_ref.extractall, extract_dir)

            for root, _, files in os.walk(extract_dir):
                for filename in files:
                    if not filename.startswith('__MACOSX') and not filename.startswith('.'):
                        source_path = os.path.join(root, filename)
                        dest_path = os.path.join(self.upload_folder, f"{int(time.time())}_{secure_filename(filename)}")
                        shutil.move(source_path, dest_path)
                        await self._validate_and_queue_file(dest_path, filename, message)

        except zipfile.BadZipFile:
            await message.reply(f"❌ Failed to extract `{original_zip_name}`: The file appears to be a corrupted or invalid ZIP archive.")
        except Exception as e:
            await message.reply(f"❌ An unexpected error occurred while extracting `{original_zip_name}`: `{e}`")
        finally:
            if os.path.exists(extract_dir):
                shutil.rmtree(extract_dir)

# ------------------------------------------------------------------------------
# SECTION 5: DISCORD BOT LOGIC & WORKER
# ------------------------------------------------------------------------------
print("🤖 Initializing Discord bot and background worker...")

# --- Bot Instantiation ---
intents = discord.Intents.default()
intents.message_content = True
bot = commands.Bot(command_prefix="!", intents=intents)

# --- Central Managers Instantiation ---
job_manager = JobManager(bot)
files_handler = FilesHandler(job_manager, UPLOAD_FOLDER)


def run_transcription_process(job: TranscriptionJob) -> tuple[str, str]:
    """Runs the core transcription logic for a given job."""
    print(f"[JOB: {job.job_id}] Starting transcription for '{job.original_filename}'...")

    result = model.transcribe(job.local_filepath, word_timestamps=True)
    formatted_text = format_transcription_with_pauses(result, pause_threshold_input)

    base_name = os.path.splitext(job.original_filename)[0]
    safe_name = secure_filename(base_name)[:50]
    duration_str = format_duration(job.audio_duration).replace(" ", "")

    output_filename = f"TS_({duration_str})_{safe_name}.txt"
    output_filepath = os.path.join(TRANSCRIPT_FOLDER, output_filename)

    with open(output_filepath, "w", encoding="utf-8") as f:
        f.write(formatted_text)

    detected_language = result.get('language', 'N/A')
    print(f"[JOB: {job.job_id}] Transcription complete. Output: '{output_filepath}'. Language: {detected_language.upper()}.")
    return output_filepath, detected_language


async def queue_processor(manager: JobManager):
    """The main worker coroutine that processes jobs from the queue."""
    await bot.wait_until_ready()
    while not bot.is_closed():
        job: TranscriptionJob = await manager.job_queue.get()
        local_transcript_path = None

        try:
            # Send a NEW message to indicate processing has started.
            job.status = "processing"
            duration_str = format_duration(job.audio_duration)
            processing_message = f"▶️ `[ID: {job.job_id}]` Now processing `{job.original_filename}` (Duration: **{duration_str}**)..."
            await job.message.channel.send(processing_message)
            print(f"[JOB: {job.job_id}] Status updated to 'processing'. Sent new message.")

            # Run the heavy transcription task in a separate thread.
            local_transcript_path, detected_lang = await asyncio.to_thread(
                run_transcription_process, job
            )

            # Reply to the ORIGINAL user message with the final result.
            embed = discord.Embed(
                title="🎉 Transcription Complete!",
                color=discord.Color.green()
            )
            embed.add_field(name="Original File", value=f"`{job.original_filename}`", inline=False)
            embed.add_field(name="Audio Duration", value=format_duration(job.audio_duration), inline=True)
            embed.add_field(name="Detected Language", value=detected_lang.upper(), inline=True)
            embed.set_footer(text=f"Processed for {job.author.display_name} | Job ID: {job.job_id}")

            await job.message.reply(embed=embed, file=discord.File(local_transcript_path))

        except Exception as e:
            print(f"[JOB: {job.job_id}] FATAL ERROR during processing: {e}")
            error_embed = discord.Embed(
                title=f"❌ Failed to Process: {job.original_filename}",
                description=f"An unexpected error occurred during transcription.\n```\n{e}\n```",
                color=discord.Color.red()
            )
            error_embed.set_footer(text=f"Job ID: {job.job_id}")
            await job.message.reply(embed=error_embed)

        finally:
            if os.path.exists(job.local_filepath):
                os.remove(job.local_filepath)
            if local_transcript_path and os.path.exists(local_transcript_path):
                os.remove(local_transcript_path)

            manager.job_queue.task_done()
            manager.complete_job(job.job_id)


# --- Bot Events and Commands ---

@bot.event
async def on_ready():
    """Event triggered when the bot is connected and ready."""
    print('----------------------------------------------------')
    print(f'✅ Bot has logged in as {bot.user}')
    print(f'🚀 Worker queue started. Bot active in Channel ID: {DISCORD_CHANNEL_ID}')
    print('----------------------------------------------------')
    bot.loop.create_task(queue_processor(job_manager))

@bot.event
async def on_message(message: discord.Message):
    """Event for every message, delegating attachments to the handler."""
    if message.author.bot or str(message.channel.id) != DISCORD_CHANNEL_ID:
        return

    # Process commands like !ping first
    await bot.process_commands(message)

    if message.attachments:
        # Create a background task to handle attachments so the bot remains responsive
        bot.loop.create_task(files_handler.handle_attachments(message))

@bot.command(name="ping", help="Checks bot latency and queue status.")
async def ping(ctx: commands.Context):
    """Displays bot latency and the number of jobs in the queue."""
    latency = round(bot.latency * 1000)
    queue_size = job_manager.job_queue.qsize()
    await ctx.send(f"🏓 Pong! Latency: **{latency}ms**. Jobs in queue: **{queue_size}**.")

@bot.command(name="shutdown", help="Shuts down the bot and the Colab runtime.")
async def shutdown(ctx: commands.Context):
    """Safely shuts down the bot and terminates the Colab runtime."""
    await ctx.send("🔴 Shutdown command received. Bot is shutting down...")

    print("🧹 Cleaning up temporary folders...")
    if os.path.exists(UPLOAD_FOLDER): shutil.rmtree(UPLOAD_FOLDER)
    if os.path.exists(TRANSCRIPT_FOLDER): shutil.rmtree(TRANSCRIPT_FOLDER)
    print("✅ Cleanup complete.")

    await bot.close()
    runtime.unassign()

# ------------------------------------------------------------------------------
# SECTION 6: RUN THE BOT
# ------------------------------------------------------------------------------
if model:
    startup_message = (
        f"✅ **Colab Runtime Ready!**\n"
        f"Model: **{model_size}** on **{device.upper()}** | "
        f"Pause Threshold: **{pause_threshold_input}s** | "
        f"Max Duration: **{format_duration(MAX_AUDIO_DURATION_SECONDS)}**.\n"
        f"Bot is starting..."
    )
    send_startup_notification(DISCORD_WEBHOOK_URL, startup_message)
    print("\n▶️ Running bot...")
    try:
        bot.run(DISCORD_BOT_TOKEN)
    except Exception as e:
        error_message = f"❌ FAILED to run bot: {e}"
        print(error_message)
        send_startup_notification(DISCORD_WEBHOOK_URL, error_message)
else:
    print("\n❌ Bot cannot run because the Whisper model failed to load.")

⏳ Installing required libraries...
✅ Libraries installed successfully.
✅ nest_asyncio applied.
🌀 Importing libraries and setting up initial configuration...
✅ Secrets loaded successfully.
✅ GPU (CUDA) detected!
⏳ Loading Whisper model 'large-v2' onto CUDA...


[30;1m2025-06-20 15:23:54[0m [34;1mINFO    [0m [35mdiscord.client[0m logging in using static token
[30;1m2025-06-20 15:23:54[0m [34;1mINFO    [0m [35mdiscord.client[0m logging in using static token
INFO:discord.client:logging in using static token


✅ Whisper model 'large-v2' loaded successfully.
🏛️ Defining core architecture classes...
📁 Setting up file handling and validation logic...
🤖 Initializing Discord bot and background worker...
✅ JobManager initialized.
✅ FilesHandler initialized.

▶️ Running bot...


[30;1m2025-06-20 15:23:54[0m [34;1mINFO    [0m [35mdiscord.gateway[0m Shard ID None has connected to Gateway (Session ID: 0f54d6f3b29a5d3836e720706e5175f7).
[30;1m2025-06-20 15:23:54[0m [34;1mINFO    [0m [35mdiscord.gateway[0m Shard ID None has connected to Gateway (Session ID: 0f54d6f3b29a5d3836e720706e5175f7).
INFO:discord.gateway:Shard ID None has connected to Gateway (Session ID: 0f54d6f3b29a5d3836e720706e5175f7).


----------------------------------------------------
✅ Bot has logged in as transcription#2723
🚀 Worker queue started. Bot active in Channel ID: 1384485054570299392
----------------------------------------------------
[TEMP_JOB: 1750433187616] Starting validation for 'WhatsApp Video 2025-06-14 at 10.45.38_d4c20774.mp4'...
[TEMP_JOB: 1750433187616] Validation: ffmpeg probe successful. Duration: 123.33s.
[JOB: 1750433188815] New job object created for 'WhatsApp Video 2025-06-14 at 10.45.38_d4c20774.mp4'.
[JOB: 1750433188815] Added to queue at position 1.
[JOB: 1750433188815] Status updated to 'processing'. Sent new message.
[JOB: 1750433188815] Starting transcription for 'WhatsApp Video 2025-06-14 at 10.45.38_d4c20774.mp4'...
[TEMP_JOB: 1750433189348] Starting validation for 'WhatsApp_Audio_2025-06-11_at_09.44.46_6b636627.mp3'...
[TEMP_JOB: 1750433189348] Validation: ffmpeg probe successful. Duration: 187.53s.
[JOB: 1750433189517] New job object created for 'WhatsApp_Audio_2025-06-11_at_