<a href="https://colab.research.google.com/github/arinadi/Transcript-AI/blob/main/Transcript_AI_Web.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# 📌 Installing Required Libraries
!pip install -q openai-whisper ffmpeg numpy scipy noisereduce pydub flask pyngrok

# 📌 Importing Libraries
import whisper
import numpy as np
# import noisereduce as nr
# import scipy.io.wavfile as wav
# from pub import AudioSegment
import os
import shutil
import threading
import time
import signal # Added for shutdown

from flask import Flask, request, render_template_string, send_from_directory, redirect, url_for, jsonify, make_response
from werkzeug.utils import secure_filename
from pyngrok import ngrok, conf

from google.colab import userdata

# ------------------------------------------------------------------------------
# NGROK CONFIGURATION WITH AUTHTOKEN (IMPORTANT!)
# ------------------------------------------------------------------------------
NGROK_AUTH_TOKEN = userdata.get('MyNGROK') # <--- REPLACE THIS WITH YOUR TOKEN

if NGROK_AUTH_TOKEN == "YOUR_NGROK_AUTH_TOKEN_HERE" or not NGROK_AUTH_TOKEN:
    print("⚠️ WARNING: Please replace 'YOUR_NGROK_AUTH_TOKEN_HERE' with your ngrok token!")
else:
    try:
        conf.get_default().auth_token = NGROK_AUTH_TOKEN
        print("✅ Ngrok token configured successfully.")
    except Exception as e:
        print(f"⚠️ Failed to configure ngrok token: {e}")

# ------------------------------------------------------------------------------
# Core Transcription Function Section
# ------------------------------------------------------------------------------
def format_transcription_with_pauses(result, pause_threshold=0.7):
    formatted_text = ""
    previous_end = 0
    for segment in result["segments"]:
        start = segment["start"]
        text = segment["text"].strip()
        if start - previous_end > pause_threshold:
            formatted_text += "\n\n"
        formatted_text += text + " "
        previous_end = segment["end"]
    return formatted_text.strip()

def transcribe_audio_web(audio_path, model, output_folder, language_code=None): # language_code default to None
    """
    Transcribes the audio file.
    If language_code is None, Whisper will attempt to auto-detect the language.
    """
    print(f"📢 Running transcription for: {audio_path} (Language specified: {language_code or 'Auto-Detect'})")
    # If language_code is None or an empty string, Whisper performs auto-detection.
    result = model.transcribe(audio_path, language=language_code, word_timestamps=True)

    formatted_text = format_transcription_with_pauses(result)
    base_filename = os.path.splitext(os.path.basename(audio_path))[0]
    output_filename = f"transcription_final_{secure_filename(base_filename)}.txt"
    output_filepath = os.path.join(output_folder, output_filename)
    with open(output_filepath, "w", encoding="utf-8") as f:
        f.write(formatted_text)

    detected_language = result.get('language', 'N/A') # Get the language detected by Whisper
    print(f"✅ Transcription finished! Output saved to '{output_filepath}'. Detected language by Whisper: {detected_language}")
    return output_filename, detected_language # Return detected language as well

# ------------------------------------------------------------------------------
# Flask Configuration
# ------------------------------------------------------------------------------
UPLOAD_FOLDER = 'uploads'
TRANSCRIPT_FOLDER = 'transcripts'
os.makedirs(UPLOAD_FOLDER, exist_ok=True)
os.makedirs(TRANSCRIPT_FOLDER, exist_ok=True)

app = Flask(__name__)
app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER
app.config['TRANSCRIPT_FOLDER'] = TRANSCRIPT_FOLDER
app.config['MAX_CONTENT_LENGTH'] = 50 * 1024 * 1024 # 50MB

# 📌 Load Whisper model
print("⏳ Loading Whisper model 'medium'...")
try:
    model = whisper.load_model("medium")
    print("✅ Whisper model successfully loaded.")
except Exception as e:
    print(f"❌ Failed to load Whisper model: {e}")
    model = None

# Global variables for server status and ngrok URL
public_url_ngrok = None
flask_thread = None
server_running = False

# ------------------------------------------------------------------------------
# HTML Template with JavaScript for one-by-one file processing and stop button
# ------------------------------------------------------------------------------
HTML_TEMPLATE = """
<!doctype html>
<html lang="en">
<head>
    <meta charset="utf-8">
    <meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no">
    <title>Interactive Audio Transcription</title>
    <style>
        body { font-family: sans-serif; margin: 20px; background-color: #f4f4f4; color: #333; }
        .container { background-color: #fff; padding: 20px; border-radius: 8px; box-shadow: 0 0 10px rgba(0,0,0,0.1); }
        h1, h2 { color: #333; }
        input[type="file"] { margin-bottom: 10px; }
        input[type="button"], button { background-color: #007bff; color: white; padding: 10px 15px; border: none; border-radius: 4px; cursor: pointer; margin-right: 5px; }
        input[type="button"]:hover, button:hover { background-color: #0056b3; }
        button.stop-button { background-color: #dc3545; }
        button.stop-button:hover { background-color: #c82333; }
        .result { margin-top: 20px; padding: 15px; background-color: #e9ecef; border-radius: 4px; }
        a { color: #007bff; text-decoration: none; }
        a:hover { text-decoration: underline; }
        .loader-container { text-align: center; }
        .loader {
            border: 5px solid #f3f3f3; border-top: 5px solid #3498db; border-radius: 50%;
            width: 30px; height: 30px; animation: spin 1s linear infinite;
            display: none; margin: 0 auto 5px auto;
        }
        #currentFileTimer { font-size: 0.9em; color: #555; display: none; margin-bottom: 10px; }
        #totalProcessingTime { font-size: 0.9em; color: #555; display: none; margin-top: 5px;}
        @keyframes spin { 0% { transform: rotate(0deg); } 100% { transform: rotate(360deg); } }
        #statusContainer ul, #historyContainer ul { list-style-type: none; padding-left: 0; }
        #statusContainer li, #historyContainer li { margin-bottom: 5px; padding: 8px; border-radius: 4px; border: 1px solid #ddd; font-size: 0.9em; }
        #statusContainer .success, #historyContainer .success { background-color: #d4edda; color: #155724; border-color: #c3e6cb; }
        #statusContainer .error, #historyContainer .error { background-color: #f8d7da; color: #721c24; border-color: #f5c6cb; }
        #statusContainer .processing { background-color: #fff3cd; color: #856404; border-color: #ffeeba; }
        .file-info { font-weight: bold; }
        .duration-info { color: #337ab7; }
        .time-info { color: #5cb85c; }
        .detected-lang-info { color: #800080; /* Purple */ font-style: italic; }
        .live-timer { color: #e67e22; font-weight: bold; }
        #serviceControlArea { text-align: center; margin-top: 30px; padding-top: 20px; border-top: 1px solid #eee; }
        #serviceStatus { margin-bottom: 10px; padding: 10px; border-radius: 4px; }
        #serviceStatus.running { background-color: #d4edda; color: #155724; }
        #serviceStatus.stopped { background-color: #f8d7da; color: #721c24; }
        .transcription-options { margin-bottom: 15px; }
    </style>
</head>
<body>
    <div class="container">
        <h1>Upload Audio File for Transcription</h1>
        <p>Select one or more audio files. The language will be auto-detected.</p>
        <div class="transcription-options">
            <!-- You can add a language selector here if you want to override auto-detection -->
            <!-- Example:
            <label for="transcriptionLanguage">Target Language (Optional, leave for Auto-Detect):</label>
            <select id="transcriptionLanguage">
                <option value="">Auto-Detect</option>
                <option value="en">English</option>
                <option value="id">Indonesian</option>
                <option value="ja">Japanese</option>
                 Add more languages supported by Whisper
            </select>
            -->
        </div>
        <form id="uploadForm">
            <input type="file" id="audioFilesInput" name="audio_file_input" multiple required>
            <br>
            <input type="button" value="Transcribe" onclick="startProcessingQueue()">
        </form>

        <div class="loader-container">
            <div id="loader" class="loader"></div>
            <div id="currentFileTimer">Processing file: <span class="live-timer">0s</span></div>
            <div id="totalProcessingTime"></div>
        </div>

        <div id="statusContainer" class="result" style="display:none;">
            <h2>Processing Status:</h2>
            <ul id="statusList"></ul>
        </div>

        <div id="historyContainer" class="result">
            <h2>Transcription History (Latest First):</h2>
            <ul id="historyList">
                {% if initial_processed_files %}
                    {% for file_info in initial_processed_files|reverse %}
                        <li>
                            <strong>{{ file_info.original_name }}</strong>
                            {% if file_info.detected_language %}
                                (<span class="detected-lang-info">Lang: {{ file_info.detected_language }}</span>)
                            {% endif %}
                            ->
                            <a href="{{ url_for('view_transcript', filename=file_info.transcript_file) }}" target="_blank">View</a> |
                            <a href="{{ url_for('download_transcript', filename=file_info.transcript_file) }}">Download</a>
                        </li>
                    {% endfor %}
                {% else %}
                    <li id="noHistoryMsg">No history yet.</li>
                {% endif %}
            </ul>
        </div>

        <div id="serviceControlArea">
             <div id="serviceStatus">Service is loading...</div>
             <button type="button" class="stop-button" onclick="stopService()">Stop Service</button>
        </div>

    </div>

    <script>
        let fileProcessingQueue = [];
        let clientSideHistory = [];
        const MAX_CLIENT_HISTORY = 20;
        let globalStartTime;
        let currentFileTimerInterval = null;
        let currentFileElapsedTime = 0;

        {% if initial_processed_files %}
            clientSideHistory = {{ initial_processed_files|tojson|safe }};
            clientSideHistory.reverse();
        {% endif %}

        function updateServiceStatus(isRunning, message = "") {
            const statusDiv = document.getElementById('serviceStatus');
            const stopButton = document.querySelector('button.stop-button');
            const uploadButton = document.querySelector('input[value="Transcribe"]');
            const fileInput = document.getElementById('audioFilesInput');

            if (isRunning) {
                statusDiv.className = 'running';
                statusDiv.textContent = message || 'Service is running normally.';
                if (uploadButton) uploadButton.disabled = false;
                if (fileInput) fileInput.disabled = false;
                if (stopButton) stopButton.disabled = false;
            } else {
                statusDiv.className = 'stopped';
                statusDiv.textContent = message || 'Service stopped. You may need to stop the Colab cell manually.';
                if (uploadButton) uploadButton.disabled = true;
                if (fileInput) fileInput.disabled = true;
                if (stopButton) stopButton.disabled = true;
            }
        }

        function formatDuration(seconds, showZeroMinutes = false) {
            if (seconds === null || typeof seconds === 'undefined' || isNaN(seconds)) return "N/A";
            const h = Math.floor(seconds / 3600);
            const m = Math.floor((seconds % 3600) / 60);
            const s = Math.floor(seconds % 60);
            let str = "";
            if (h > 0) str += `${h}h `;
            if (m > 0 || h > 0 || showZeroMinutes) str += `${m}m `;
            str += `${s}s`;
            return str.trim() || "0s";
        }

        async function getFileWithDuration(file) {
            return new Promise((resolve) => {
                if (!file.type.startsWith('audio/')) {
                    resolve({ file: file, duration: null, name: file.name });
                    return;
                }
                const audioElement = new Audio();
                audioElement.preload = 'metadata';
                const objectURL = URL.createObjectURL(file);
                audioElement.src = objectURL;
                audioElement.onloadedmetadata = () => { resolve({ file: file, duration: audioElement.duration, name: file.name }); URL.revokeObjectURL(objectURL); };
                audioElement.onerror = (e) => { console.error(`Error loading metadata for ${file.name}:`, e); resolve({ file: file, duration: null, name: file.name }); URL.revokeObjectURL(objectURL); };
                setTimeout(() => { if (audioElement.readyState === 0) { console.warn(`Timeout getting duration for ${file.name}`); resolve({ file: file, duration: null, name: file.name }); URL.revokeObjectURL(objectURL); }}, 5000);
            });
        }

        async function startProcessingQueue() {
            const fileInput = document.getElementById('audioFilesInput');
            if (fileInput.files.length === 0) { alert("Please select audio file(s) first."); return; }

            document.getElementById('loader').style.display = 'block';
            document.getElementById('currentFileTimer').style.display = 'block';
            document.getElementById('totalProcessingTime').style.display = 'none';
            document.getElementById('totalProcessingTime').textContent = '';
            document.getElementById('statusContainer').style.display = 'block';
            document.getElementById('statusList').innerHTML = '';
            globalStartTime = performance.now();
            const filePromises = Array.from(fileInput.files).map(file => getFileWithDuration(file));
            fileProcessingQueue = await Promise.all(filePromises);
            processNextFileFromQueue();
        }

        function updateLiveTimerDisplay() {
            const timerDisplay = document.querySelector('#currentFileTimer .live-timer');
            if (timerDisplay) { timerDisplay.textContent = formatDuration(currentFileElapsedTime, true); }
        }

        async function processNextFileFromQueue() {
            if (currentFileTimerInterval) { clearInterval(currentFileTimerInterval); currentFileTimerInterval = null; }
            currentFileElapsedTime = 0; updateLiveTimerDisplay();

            if (fileProcessingQueue.length === 0) {
                document.getElementById('loader').style.display = 'none';
                document.getElementById('currentFileTimer').style.display = 'none';
                const globalEndTime = performance.now();
                const totalProcessingTimeSeconds = (globalEndTime - globalStartTime) / 1000;
                const totalTimeEl = document.getElementById('totalProcessingTime');
                totalTimeEl.textContent = `Total time all files: ${formatDuration(totalProcessingTimeSeconds, true)}`;
                totalTimeEl.style.display = 'block'; console.log("All files in queue have been processed.");
                return;
            }

            const fileData = fileProcessingQueue.shift(); const fileToProcess = fileData.file;
            const formData = new FormData();
            formData.append('audio_file', fileToProcess);

            // If you add a language selector:
            // const langSelector = document.getElementById('transcriptionLanguage');
            // if (langSelector && langSelector.value) {
            //    formData.append('transcription_language', langSelector.value);
            // }
            // Otherwise, no language is sent, relying on server-side default (None for auto-detect)

            const statusList = document.getElementById('statusList');
            const listItem = document.createElement('li'); listItem.id = "status-" + CSS.escape(fileData.name); listItem.className = 'processing';
            let initialStatusText = `<span class="file-info">${fileData.name}</span>`;
            if (fileData.duration !== null) { initialStatusText += ` (<span class="duration-info">Duration: ${formatDuration(fileData.duration)}</span>)`; }
            listItem.innerHTML = `${initialStatusText} - Processing...`;
            statusList.prepend(listItem);

            const fileProcessingStartTime = performance.now();
            currentFileTimerInterval = setInterval(() => { currentFileElapsedTime++; updateLiveTimerDisplay(); }, 1000);

            try {
                const response = await fetch("{{ url_for('transcribe_single_file') }}", { method: 'POST', body: formData });
                const result = await response.json();
                if (currentFileTimerInterval) clearInterval(currentFileTimerInterval); currentFileTimerInterval = null;
                const fileProcessingEndTime = performance.now();
                const finalFileProcessingTimeSeconds = (fileProcessingEndTime - fileProcessingStartTime) / 1000;

                if (response.ok && result.status === 'success') {
                    listItem.innerHTML = `
                        <span class="file-info">${result.original_name}</span>
                        (${fileData.duration !== null ? `<span class="duration-info">Duration: ${formatDuration(fileData.duration)}</span>, ` : ''}
                        <span class="time-info">Completed in: ${formatDuration(finalFileProcessingTimeSeconds, true)}</span>
                        ${result.detected_language ? ` <span class="detected-lang-info">(Lang: ${result.detected_language})</span>` : ''})
                        - Success: ${result.transcript_filename}`;
                    listItem.className = 'success';
                    updateClientHistory(result);
                } else {
                    listItem.innerHTML = `
                        <span class="file-info">${fileData.name}</span>
                        (${fileData.duration !== null ? `<span class="duration-info">Duration: ${formatDuration(fileData.duration)}</span>, ` : ''}
                        <span class="time-info">Failed after: ${formatDuration(finalFileProcessingTimeSeconds, true)}</span>)
                        - Failed: ${result.message || 'Unknown error'}`;
                    listItem.className = 'error';
                    if (result.message && (result.message.includes("Whisper model failed to load") || response.status === 503)) {
                        updateServiceStatus(false, result.message || "Service stopped or unavailable. Check Colab logs.");
                    }
                }
            } catch (error) {
                if (currentFileTimerInterval) clearInterval(currentFileTimerInterval); currentFileTimerInterval = null;
                const fileProcessingEndTime = performance.now();
                const finalFileProcessingTimeSeconds = (fileProcessingEndTime - fileProcessingStartTime) / 1000;
                console.error('Error:', error);
                listItem.innerHTML = `
                    <span class="file-info">${fileData.name}</span>
                    (${fileData.duration !== null ? `<span class="duration-info">Duration: ${formatDuration(fileData.duration)}</span>, ` : ''}
                    <span class="time-info">Error after: ${formatDuration(finalFileProcessingTimeSeconds, true)}</span>)
                    - Client/Network Error: ${error.message}`;
                listItem.className = 'error';
                updateServiceStatus(false, "Connection to server failed. Service may have stopped.");
            }
            await processNextFileFromQueue();
        }

        function updateClientHistory(processedFileResult) {
            const noHistoryMsg = document.getElementById('noHistoryMsg');
            if(noHistoryMsg) noHistoryMsg.style.display = 'none';
            const newHistoryItem = {
                original_name: processedFileResult.original_name,
                transcript_file: processedFileResult.transcript_filename,
                view_url: processedFileResult.view_url,
                download_url: processedFileResult.download_url,
                detected_language: processedFileResult.detected_language // Store detected language
            };
            clientSideHistory.unshift(newHistoryItem);
            if (clientSideHistory.length > MAX_CLIENT_HISTORY) clientSideHistory.pop();
            renderHistoryList();
        }

        function renderHistoryList() {
            const historyList = document.getElementById('historyList');
            historyList.innerHTML = '';
            if (clientSideHistory.length === 0) {
                 const li = document.createElement('li'); li.id = "noHistoryMsg"; li.textContent = "No history yet.";
                 historyList.appendChild(li); return;
            }
            clientSideHistory.forEach(item => {
                const li = document.createElement('li');
                let langInfo = item.detected_language ? ` (<span class="detected-lang-info">Lang: ${item.detected_language}</span>)` : '';
                li.innerHTML = `
                    <strong>${item.original_name}</strong>${langInfo} ->
                    <a href="${item.view_url}" target="_blank">View</a> |
                    <a href="${item.download_url}">Download</a>`;
                historyList.appendChild(li);
            });
        }

        async function stopService() {
            if (!confirm("Are you sure you want to stop the service? This will shut down the Flask server and ngrok tunnel.")) { return; }
            try {
                updateServiceStatus(false, "Sending request to stop service...");
                const response = await fetch("{{ url_for('shutdown') }}", { method: 'POST' });
                const data = await response.json();
                updateServiceStatus(false, data.message || "Service is shutting down. Please stop the Colab cell if it doesn't stop automatically.");
            } catch (error) {
                updateServiceStatus(false, "Error sending shutdown command. Server might already be stopped or unreachable.");
                console.error('Shutdown error:', error);
            }
        }

        document.addEventListener('DOMContentLoaded', () => {
            renderHistoryList();
            const publicUrl = "{{ public_url_ngrok }}";
            const modelLoaded = "{{ model_loaded }}" === "True";
            if (modelLoaded && publicUrl && publicUrl !== "None" && publicUrl.startsWith("http")) {
                 updateServiceStatus(true, "Service is running. Public URL: " + publicUrl);
            } else if (!modelLoaded) {
                 updateServiceStatus(false, "Service inactive: Whisper model failed to load.");
            } else {
                 updateServiceStatus(false, "Service inactive or ngrok URL not ready yet.");
            }
        });
    </script>
</body>
</html>
"""

# ------------------------------------------------------------------------------
# Flask Routes
# ------------------------------------------------------------------------------
server_side_history = []
MAX_SERVER_HISTORY = 20

@app.route('/', methods=['GET'])
def index():
    global public_url_ngrok, model
    model_loaded_status = "True" if model else "False"
    # Pass the modified history that includes detected language
    processed_history_for_template = []
    for item in server_side_history:
        processed_history_for_template.append({
            'original_name': item.get('original_name'),
            'transcript_file': item.get('transcript_file'),
            'detected_language': item.get('detected_language', None) # Ensure key exists
        })

    return render_template_string(HTML_TEMPLATE,
                                  initial_processed_files=list(processed_history_for_template),
                                  public_url_ngrok=public_url_ngrok,
                                  model_loaded=model_loaded_status)

@app.route('/transcribe_single_file', methods=['POST'])
def transcribe_single_file():
    global server_side_history
    if not server_running:
        return jsonify({'status': 'error', 'message': 'Server is shutting down or not active.'}), 503
    if model is None:
        return jsonify({'status': 'error', 'message': 'Whisper model failed to load. Service may need a restart.'}), 500
    if 'audio_file' not in request.files:
        return jsonify({'status': 'error', 'message': "Request does not contain 'audio_file'."}), 400
    file = request.files['audio_file']
    if file.filename == '':
        return jsonify({'status': 'error', 'message': 'Filename is empty.'}), 400

    if file:
        original_filename = file.filename
        filename_secure = secure_filename(original_filename)
        audio_path = os.path.join(app.config['UPLOAD_FOLDER'], filename_secure)
        try:
            file.save(audio_path)
            # Retrieve the language code from the form, if provided by a selector
            # Defaults to None if not provided, which means auto-detect for transcribe_audio_web
            transcription_language_from_client = request.form.get('transcription_language')
            if transcription_language_from_client == "": # Treat empty string from select as None (auto-detect)
                transcription_language_from_client = None

            transcript_filename, detected_lang = transcribe_audio_web(
                audio_path, model, app.config['TRANSCRIPT_FOLDER'],
                language_code=transcription_language_from_client
            )
            file_info = {
                "original_name": original_filename,
                "transcript_file": transcript_filename,
                "detected_language": detected_lang, # Add detected language to the info
                "view_url": url_for('view_transcript', filename=transcript_filename, _external=True),
                "download_url": url_for('download_transcript', filename=transcript_filename, _external=True)
            }
            server_side_history.insert(0, file_info)
            if len(server_side_history) > MAX_SERVER_HISTORY: server_side_history.pop()
            return jsonify({
                'status': 'success',
                'original_name': original_filename,
                'transcript_filename': transcript_filename,
                'detected_language': detected_lang, # Send detected language back to client
                'view_url': file_info['view_url'],
                'download_url': file_info['download_url']
            }), 200
        except Exception as e:
            print(f"Error during transcription: {e}")
            return jsonify({'status': 'error', 'message': f"Internal error during transcription: {str(e)}"}), 500
    return jsonify({'status': 'error', 'message': 'Invalid or no file.'}), 400

@app.route('/download/<filename>')
def download_transcript(filename):
    if not server_running:
        return "Server not active. Please ensure the service is running.", 503 # More descriptive message
    try:
        print(f"DEBUG: Download request received for filename: '{filename}'")
        directory = app.config['TRANSCRIPT_FOLDER']
        abs_directory_path = os.path.abspath(directory)
        print(f"DEBUG: Attempting to send file from directory: '{abs_directory_path}'")

        # It's good practice to sanitize the filename received from the URL
        # although secure_filename might alter names with non-ASCII characters
        # in a way that doesn't match the stored filename.
        # For download, usually, you trust the filename if it was generated securely.

        file_path = os.path.join(directory, filename)
        print(f"DEBUG: Constructed full file path for download: '{file_path}'")

        if not os.path.exists(file_path):
            print(f"ERROR: File not found at the specified path: '{file_path}' for download.")
            # Listing files in directory for debugging if file not found
            try:
                files_in_dir = os.listdir(directory)
                print(f"DEBUG: Contents of transcript directory '{abs_directory_path}': {files_in_dir}")
            except Exception as list_e:
                print(f"DEBUG: Could not list files in the transcript directory: {list_e}")
            return "Requested file not found for download.", 404

        print(f"DEBUG: File '{filename}' located. Preparing to send.")

        # Create a response object from send_from_directory
        # send_from_directory handles Content-Type and Content-Disposition (for as_attachment=True)
        response = make_response(send_from_directory(directory, filename, as_attachment=True))

        # Headers to try and force download and prevent aggressive caching by the browser
        response.headers["Cache-Control"] = "no-cache, no-store, must-revalidate, public, max-age=0"
        response.headers["Pragma"] = "no-cache" # HTTP/1.0 backward compatibility
        response.headers["Expires"] = "0" # Proxies

        # Optional: If you strictly want a new download prompt every time,
        # you could try to make the browser think it's a different resource.
        # This often involves changing the Content-Disposition filename slightly,
        # for example, by adding a timestamp. However, this changes the actual
        # name of the file the user downloads, which might not be desirable.
        # Example (use with caution, usually not needed if Cache-Control headers are respected):
        # unique_filename = f"{os.path.splitext(filename)[0]}_{int(time.time())}{os.path.splitext(filename)[1]}"
        # response.headers["Content-Disposition"] = f"attachment; filename={unique_filename}"

        print(f"DEBUG: Sending file '{filename}' with cache-control headers.")
        return response

    except FileNotFoundError: # This specific exception might be caught by os.path.exists already
        print(f"ERROR: FileNotFoundError specifically caught for {filename} in {app.config['TRANSCRIPT_FOLDER']}")
        return "File not found in the transcript directory (FileNotFound).", 404
    except Exception as e:
        print(f"ERROR: An unexpected error occurred during the download of {filename}: {e}")
        import traceback
        traceback.print_exc() # Prints the full stack trace to the server console for debugging
        return "An error occurred while attempting to download the file.", 500

@app.route('/view/<filename>')
def view_transcript(filename):
    if not server_running:
        return "Server not active.", 503
    try:
        return send_from_directory(app.config['TRANSCRIPT_FOLDER'], filename, mimetype='text/plain')
    except FileNotFoundError:
        return "File not found.", 404

@app.route('/shutdown', methods=['POST'])
def shutdown():
    global server_running, public_url_ngrok
    print("🛑 Received shutdown request from client...")
    server_running = False

    if public_url_ngrok:
        try:
            print("🔌 Disconnecting ngrok tunnel...")
            ngrok.disconnect(public_url_ngrok)
            public_url_ngrok = None
            print("✅ Ngrok tunnel disconnected successfully.")
        except Exception as e:
            print(f"⚠️ Warning while disconnecting ngrok tunnel: {e}")
    try:
        print("🔪 Killing ngrok process...")
        ngrok.kill()
        print("✅ Ngrok process killed successfully.")
    except Exception as e:
        print(f"⚠️ Warning while killing ngrok process: {e}")

    shutdown_func = request.environ.get('werkzeug.server.shutdown')
    if shutdown_func is None:
        print("⚠️ Cannot find Werkzeug shutdown function. Server might need manual stop.")
    else:
        print("🛑 Shutting down Flask server...")
        shutdown_func()
        print("✅ Flask server has been signaled to stop.")
    return jsonify(message="Server is shutting down. Ngrok tunnel and Flask server will be stopped."), 200

# ------------------------------------------------------------------------------
# Run Flask Application and ngrok
# ------------------------------------------------------------------------------
if __name__ == '__main__':
    # Optional: Clear port 5000 before starting if you often face "Address already in use"
    # print("Attempting to clear port 5000...")
    # !lsof -t -iTCP:5000 -sTCP:LISTEN | xargs -r kill -15
    # !sleep 0.5
    # !lsof -t -iTCP:5000 -sTCP:LISTEN | xargs -r kill -9
    # print("Port clearing attempt finished.")
    # time.sleep(1)


    if (NGROK_AUTH_TOKEN == "YOUR_NGROK_AUTH_TOKEN_HERE" or not NGROK_AUTH_TOKEN):
        print("\n🔴 IMPORTANT: Flask application will not run because ngrok token is not set.")
    elif model:
        print("🚀 Starting Flask server...")
        server_running = True
        flask_thread = None

        def run_flask_app():
            global server_running
            try:
                app.run(host='0.0.0.0', port=5000, debug=False, use_reloader=False)
            except OSError as e:
                if e.errno == 98:
                    print(f"🚫 Flask Error: Port 5000 is already in use. {e}")
                    print("   Consider running the port clearing commands in a separate cell or restarting the Colab runtime.")
                else:
                    print(f"🚫 Flask server encountered an OS error: {e}")
                server_running = False
            except Exception as e_flask:
                print(f"🚫 Flask server stopped with an unexpected error: {e_flask}")
                server_running = False
            finally:
                print("ℹ️ Flask thread has stopped.")

        flask_thread = threading.Thread(target=run_flask_app)
        flask_thread.daemon = True
        flask_thread.start()

        print("⏳ Waiting for Flask server to be ready...")
        time.sleep(3)

        if not flask_thread.is_alive() or not server_running:
            print("⚠️ Flask server did not start successfully (possibly port in use). Ngrok will not be started.")
            server_running = False
        else:
            try:
                print("🔌 Creating ngrok tunnel to port 5000...")
                ngrok_tunnel = ngrok.connect(5000, proto="http")
                public_url_ngrok = ngrok_tunnel.public_url
                print(f"✅ Your application is accessible at: {public_url_ngrok}")
                print("ℹ️  Server is running. Use the 'Stop Service' button on the web or stop this Colab cell to shut down.")
                while server_running and flask_thread.is_alive(): time.sleep(0.5)
                if not server_running: print("ℹ️ Server was stopped.")
                elif not flask_thread.is_alive(): print("ℹ️ Flask server stopped unexpectedly.")
            except KeyboardInterrupt: print("\n🛑 Keyboard Interrupt received, initiating shutdown..."); server_running = False
            except Exception as e_ngrok_runtime: print(f"❌ An error occurred during ngrok startup or main runtime: {e_ngrok_runtime}"); server_running = False
        try:
            if not server_running: print("🧹 Performing cleanup as server is marked to stop or failed to start...")
        finally:
            print("🧹 Starting final cleanup procedure (main `finally` block)...")
            server_running = False
            print("   Shutting down ngrok (if active)...")
            current_tunnels = ngrok.get_tunnels()
            if current_tunnels:
                print(f"   Active ngrok tunnels found: {[t.public_url for t in current_tunnels]}. Disconnecting...")
                for tunnel in current_tunnels:
                    try: ngrok.disconnect(tunnel.public_url); print(f"   - Tunnel {tunnel.public_url} disconnected.")
                    except Exception as e_d: print(f"   - Warning while disconnecting tunnel {tunnel.name}: {e_d}")
            else: print("   No active ngrok tunnels found to disconnect.")
            try: ngrok.kill(); print("   Ngrok process kill attempted successfully (or was not running).")
            except Exception as e_k: print(f"   - Warning while killing ngrok process (it might have already stopped): {e_k}")
            if flask_thread and flask_thread.is_alive():
                print("   Flask thread is still alive. Waiting for it to stop...")
                flask_thread.join(timeout=5)
                if flask_thread.is_alive(): print("   ⚠️ Flask thread did not stop within 5 seconds.")
                else: print("   Flask thread has now stopped.")
            elif flask_thread: print("   Flask thread was already stopped.")
            else: print("   Flask thread was not initialized.")
            print("🏁 Cleanup finished. Colab cell execution will now end.")
            # Commands to clear port after everything is done (for next run)
            PORT_TO_CLEAR = 5000
            print(f"\nAttempting to clear port {PORT_TO_CLEAR} for the next run (if any)...")
            !lsof -t -iTCP:{PORT_TO_CLEAR} -sTCP:LISTEN | xargs -r kill -9 > /dev/null 2>&1
            print(f"Port {PORT_TO_CLEAR} clearing attempt finished.")

    elif not model: print("❌ Flask application cannot run because the Whisper model failed to load.")
    else: print("❌ Flask application cannot run for other reasons.")

✅ Ngrok token configured successfully.
⏳ Loading Whisper model 'medium'...
✅ Whisper model successfully loaded.
🚀 Starting Flask server...
⏳ Waiting for Flask server to be ready...
 * Serving Flask app '__main__'
 * Debug mode: off


 * Running on all addresses (0.0.0.0)
 * Running on http://127.0.0.1:5000
 * Running on http://172.28.0.12:5000
INFO:werkzeug:[33mPress CTRL+C to quit[0m


🔌 Creating ngrok tunnel to port 5000...
✅ Your application is accessible at: https://a868-34-125-176-134.ngrok-free.app
ℹ️  Server is running. Use the 'Stop Service' button on the web or stop this Colab cell to shut down.


INFO:werkzeug:127.0.0.1 - - [21/May/2025 07:06:58] "GET / HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [21/May/2025 07:06:59] "[33mGET /favicon.ico HTTP/1.1[0m" 404 -
INFO:werkzeug:127.0.0.1 - - [21/May/2025 07:07:11] "GET / HTTP/1.1" 200 -


📢 Running transcription for: uploads/AUD-20250521-WA0004.m4a (Language specified: Auto-Detect)


INFO:werkzeug:127.0.0.1 - - [21/May/2025 07:08:13] "POST /transcribe_single_file HTTP/1.1" 200 -


✅ Transcription finished! Output saved to 'transcripts/transcription_final_AUD-20250521-WA0004.txt'. Detected language by Whisper: id


INFO:werkzeug:127.0.0.1 - - [21/May/2025 07:08:23] "GET /download/transcription_final_AUD-20250521-WA0004.txt HTTP/1.1" 200 -


DEBUG: Download request received for filename: 'transcription_final_AUD-20250521-WA0004.txt'
DEBUG: Attempting to send file from directory: '/content/transcripts'
DEBUG: Constructed full file path for download: 'transcripts/transcription_final_AUD-20250521-WA0004.txt'
DEBUG: File 'transcription_final_AUD-20250521-WA0004.txt' located. Preparing to send.
DEBUG: Sending file 'transcription_final_AUD-20250521-WA0004.txt' with cache-control headers.


INFO:werkzeug:127.0.0.1 - - [21/May/2025 07:08:40] "GET /view/transcription_final_AUD-20250521-WA0004.txt HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [21/May/2025 07:09:57] "GET /download/transcription_final_AUD-20250521-WA0004.txt HTTP/1.1" 200 -


DEBUG: Download request received for filename: 'transcription_final_AUD-20250521-WA0004.txt'
DEBUG: Attempting to send file from directory: '/content/transcripts'
DEBUG: Constructed full file path for download: 'transcripts/transcription_final_AUD-20250521-WA0004.txt'
DEBUG: File 'transcription_final_AUD-20250521-WA0004.txt' located. Preparing to send.
DEBUG: Sending file 'transcription_final_AUD-20250521-WA0004.txt' with cache-control headers.


INFO:werkzeug:127.0.0.1 - - [21/May/2025 07:10:03] "GET /download/transcription_final_AUD-20250521-WA0004.txt HTTP/1.1" 200 -


DEBUG: Download request received for filename: 'transcription_final_AUD-20250521-WA0004.txt'
DEBUG: Attempting to send file from directory: '/content/transcripts'
DEBUG: Constructed full file path for download: 'transcripts/transcription_final_AUD-20250521-WA0004.txt'
DEBUG: File 'transcription_final_AUD-20250521-WA0004.txt' located. Preparing to send.
DEBUG: Sending file 'transcription_final_AUD-20250521-WA0004.txt' with cache-control headers.


INFO:werkzeug:127.0.0.1 - - [21/May/2025 07:10:24] "GET /download/transcription_final_AUD-20250521-WA0004.txt HTTP/1.1" 200 -


DEBUG: Download request received for filename: 'transcription_final_AUD-20250521-WA0004.txt'
DEBUG: Attempting to send file from directory: '/content/transcripts'
DEBUG: Constructed full file path for download: 'transcripts/transcription_final_AUD-20250521-WA0004.txt'
DEBUG: File 'transcription_final_AUD-20250521-WA0004.txt' located. Preparing to send.
DEBUG: Sending file 'transcription_final_AUD-20250521-WA0004.txt' with cache-control headers.


INFO:werkzeug:127.0.0.1 - - [21/May/2025 07:10:31] "GET /download/transcription_final_AUD-20250521-WA0004.txt HTTP/1.1" 200 -


DEBUG: Download request received for filename: 'transcription_final_AUD-20250521-WA0004.txt'
DEBUG: Attempting to send file from directory: '/content/transcripts'
DEBUG: Constructed full file path for download: 'transcripts/transcription_final_AUD-20250521-WA0004.txt'
DEBUG: File 'transcription_final_AUD-20250521-WA0004.txt' located. Preparing to send.
DEBUG: Sending file 'transcription_final_AUD-20250521-WA0004.txt' with cache-control headers.


INFO:werkzeug:127.0.0.1 - - [21/May/2025 07:10:59] "GET /download/transcription_final_AUD-20250521-WA0004.txt HTTP/1.1" 200 -


DEBUG: Download request received for filename: 'transcription_final_AUD-20250521-WA0004.txt'
DEBUG: Attempting to send file from directory: '/content/transcripts'
DEBUG: Constructed full file path for download: 'transcripts/transcription_final_AUD-20250521-WA0004.txt'
DEBUG: File 'transcription_final_AUD-20250521-WA0004.txt' located. Preparing to send.
DEBUG: Sending file 'transcription_final_AUD-20250521-WA0004.txt' with cache-control headers.


INFO:werkzeug:127.0.0.1 - - [21/May/2025 07:13:10] "[33mGET /favicon.ico HTTP/1.1[0m" 404 -
INFO:werkzeug:127.0.0.1 - - [21/May/2025 07:13:14] "GET / HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [21/May/2025 07:14:45] "GET / HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [21/May/2025 07:14:55] "GET / HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [21/May/2025 07:15:21] "GET / HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [21/May/2025 07:22:57] "POST /shutdown HTTP/1.1" 200 -


🛑 Received shutdown request from client...
🔌 Disconnecting ngrok tunnel...
✅ Ngrok tunnel disconnected successfully.
🔪 Killing ngrok process...
✅ Ngrok process killed successfully.
⚠️ Cannot find Werkzeug shutdown function. Server might need manual stop.
ℹ️ Server was stopped.
🧹 Performing cleanup as server is marked to stop or failed to start...
🧹 Starting final cleanup procedure (main `finally` block)...
   Shutting down ngrok (if active)...
   No active ngrok tunnels found to disconnect.
   Ngrok process kill attempted successfully (or was not running).
   Flask thread is still alive. Waiting for it to stop...
