# First installation of modules and importing the required ones

In [1]:
import importlib
import subprocess
import sys

# Function to check and install modules if not already installed
def install_and_import(package):
    try:
        importlib.import_module(package)
    except ImportError:
        print(f"Installing {package}...")
        subprocess.check_call([sys.executable, "-m", "pip", "install", package])
    finally:
        globals()[package] = importlib.import_module(package)

# List of required packages
required_packages = [
    "yt_dlp",
    "jiwer",
    "pyngrok",
    "flask_cors",
    "nltk",
    "pydub",
    "torch",
    "transformers",
    "requests",
    "gdown",
    "pyperclip"
]

# Install all required packages
for package in required_packages:
    install_and_import(package)

# Additional setup for nltk
import nltk
nltk.download('punkt')  # Download necessary NLTK data if needed

# Now import all modules
import yt_dlp
import time
import jiwer
from nltk.translate.bleu_score import sentence_bleu, SmoothingFunction
import torch
from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline
from pydub import AudioSegment
import threading
import requests
import gdown
import os

print("<<<<<<<<<<<<<<<<<<<<<           All required modules are installed and imported successfully!          >>>>>>>>>>>>>>>>>")


[nltk_data] Downloading package punkt to /usr/share/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
<<<<<<<<<<<<<<<<<<<<<           All required modules are installed and imported successfully!          >>>>>>>>>>>>>>>>>


# Functions and the initiating the model's pipeline

In [2]:
import os
import shutil
import threading
import time
import requests
import gdown
from flask import Flask, request, jsonify, render_template_string
from pyngrok import ngrok
from flask_cors import CORS
import yt_dlp
from pydub import AudioSegment
import torch
from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline

# Flask app setup
app = Flask(__name__)
CORS(app)

# Global variable to store the ngrok URL
public_url = None

def log_error(error_message):
    """Log errors in a controlled manner."""
    print(f"Error: {error_message}")

def delete():
    """Delete all files in the working directory."""
    working_dir = "/kaggle/working/"
    try:
        for file_name in os.listdir(working_dir):
            file_path = os.path.join(working_dir, file_name)
            if os.path.isfile(file_path) or os.path.islink(file_path):
                os.unlink(file_path)
            elif os.path.isdir(file_path):
                shutil.rmtree(file_path)
        # print("All files in /kaggle/working/ have been deleted.")
    except Exception as e:
        log_error(f"Error deleting files: {e}")

def get_youtube_video_details(url):
    """Fetch YouTube video details."""
    ydl_opts = {'quiet': True, 'noplaylist': True}
    try:
        with yt_dlp.YoutubeDL(ydl_opts) as ydl:
            info = ydl.extract_info(url, download=False)
        return {
            "title": info.get("title", "Unknown Title"),
            "duration": int(info.get("duration", 0)),
            "thumbnail": info.get("thumbnail", ""),
        }
    except Exception as e:
        log_error(f"Error fetching video details: {e}")
        return None

def download_youtube_audio(url, output_file):
    """Download YouTube video and convert it to a 16kHz mono WAV file."""
    ydl_opts = {
        'format': 'bestaudio/best',
        'outtmpl': '/kaggle/working/temp_audio',  # No extension here
        'postprocessors': [{
            'key': 'FFmpegExtractAudio',
            'preferredcodec': 'wav',
            'preferredquality': '192',
        }],
        'ffmpeg_location': '/usr/bin/ffmpeg',  # Ensure ffmpeg location is set if necessary
    }

    try:
        # Download the audio
        with yt_dlp.YoutubeDL(ydl_opts) as ydl:
            ydl.download([url])

        # Check if the file was created
        temp_file_path = "/kaggle/working/temp_audio.wav"
        if not os.path.exists(temp_file_path):
            raise FileNotFoundError(f"File not found: {temp_file_path}")

        # Convert to 16kHz, mono WAV
        audio = AudioSegment.from_file(temp_file_path)
        audio = audio.set_frame_rate(16000).set_channels(1)  # Set to mono if needed
        audio.export(output_file, format="wav")
        print(f"Downloaded and converted to {output_file}")
        return True
    except Exception as e:
        log_error(f"Error downloading or converting audio: {e}")
        return False




def transcribe(id, output_file, ngrok_url):
    """Transcribe audio using the Whisper model."""
    if not os.path.exists(output_file):
        log_error(f"Audio file not found: {output_file}")
        return

    transcription_api = ngrok_url + "/api/v1/videos/addTranscript"
    generate_kwargs = {"return_timestamps": True}

    try:
        transcription_start = time.time()

        # Transcribe the audio
        transcription_s = pipe(output_file, generate_kwargs=generate_kwargs)
        print("Transcription successful.", )

        # Prepare payload for original transcription
        original_payload = {
            "id": id,
            "type": "original",
            "chunks": transcription_s["chunks"],
            "text": transcription_s.get("text", "")
        }
        # Send the original transcription to the API
        return_api(original_payload, transcription_api)

        # Translate the transcription to English
        # print("Translating to English...")
        translation = pipe(output_file, generate_kwargs={"task": "translate", "return_timestamps": True})
        print("Translation successful.")

        # Prepare payload for translated transcription
        translated_payload = {
            "id": id,
            "type": "english",
            "chunks": translation["chunks"],
            "text": translation.get("text", "")
        }
        # Send the translated transcription to the API
        return_api(translated_payload, transcription_api)

        transcription_end = time.time()
        # print(f"Total transcription time: {transcription_end - transcription_start:.2f} sec")
    except Exception as e:
        log_error(f"Error during transcription: {e}")
        # Send error details to the new endpoint
        error_api = ngrok_url + "/api/v1/videos/error"
        error_payload = {"id": id, "error": str(e)}
        try:
            response = requests.post(error_api, json=error_payload)
            if response.status_code == 200:
                print("Error details sent to the API.")
            else:
                log_error(f"Failed to send error details: {response.status_code}")
        except requests.exceptions.RequestException as api_error:
            log_error(f"Error during API call for error details: {api_error}")

def return_api(payload, ngrok_url):
    """Send transcription results or error details to the API."""
    try:
        response = requests.post(ngrok_url, json=payload)
        if response.status_code == 200:
            print("API call successful.")
        else:
            log_error(f"API call failed with status code {response.status_code}")
    except requests.exceptions.RequestException as e:
        log_error(f"Error during the API call: {e}")

def transcription_api(id, video_url, api_url):
    """Fetch video details, download audio, and transcribe it."""
    try:
        video_info = get_youtube_video_details(video_url)
        if not video_info:
            raise Exception("Failed to fetch video details.")

        # Check if the video duration exceeds the limit (1200 seconds)
        if video_info["duration"] > 1200:
            video_limit_api = api_url + "/api/v1/videos/limitIssue"
            # Include video details in the payload
            payload = {
                "id": id,
                "status": 301,
                "video_details": video_info  # Include video details here
            }
            # return_api(payload, video_limit_api)
            print("Video duration exceeds the limit. Transcription aborted.")
            return  # Stop further execution

        # print(f"Video info: {video_info}")
        # data_heading = "VideoDetail"
        # video_detail_api = api_url + "/api/v1/videos/addVideoDetails"
        # video_detail_payload = {
        #     "id": id,
        #     "type": data_heading,
        #     "videoDetail": video_info
        # }
        # return_api(video_detail_payload, video_detail_api)

        output_file = "/kaggle/working/output_audio.wav"
        start = time.time()
        if not download_youtube_audio(video_url, output_file):
            raise Exception("Failed to download or convert audio.")

        download_end = time.time()
        # print(f"Download time: {download_end - start:.2f} sec")
        transcribe(id, output_file, api_url)
    except Exception as e:
        log_error(f"Error in transcription_api: {e}")

# Whisper model setup
device_use = "cuda:0" if torch.cuda.is_available() else "cpu"
torch_dtype_use = torch.float16 if torch.cuda.is_available() else torch.float32
model_id_use = "openai/whisper-large-v3"
try:
    trans_model = AutoModelForSpeechSeq2Seq.from_pretrained(
        model_id_use, torch_dtype=torch_dtype_use, low_cpu_mem_usage=True, use_safetensors=True
    )
    trans_model.to(device_use)
    processor = AutoProcessor.from_pretrained(model_id_use)
    pipe = pipeline(
        "automatic-speech-recognition",
        model=trans_model,
        tokenizer=processor.tokenizer,
        feature_extractor=processor.feature_extractor,
        max_new_tokens=128,
        chunk_length_s=30,
        batch_size=16,
        return_timestamps=True,  # Sentence-wise timestamps
        torch_dtype=torch_dtype_use,
        device=device_use,
        generate_kwargs={"input_features": True},
    )
except Exception as e:
    log_error(f"Error setting up Whisper model: {e}")







print("<<<<<<<<<<<<<<<<<<<<<<<<           Done with configuring the model              >>>>>>>>>>>>>>>>>>>>>>>>>>")

Device set to use cuda:0


<<<<<<<<<<<<<<<<<<<<<<<<           Done with configuring the model              >>>>>>>>>>>>>>>>>>>>>>>>>>




# Main function for runing the flask server listening 

In [None]:
import os
import shutil
import threading
import time
import requests
import gdown
from flask import Flask, request, jsonify, render_template_string
from pyngrok import ngrok
from flask_cors import CORS
import yt_dlp
from pydub import AudioSegment
import torch
from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline
from IPython.display import display, HTML, Javascript


# Flask app setup
app = Flask(__name__)
CORS(app)

# Global variable to store the ngrok URL
public_url = None

def log_error(error_message):
    """Log errors in a controlled manner."""
    print(f"Error: {error_message}")

@app.route('/')
def index():
    """Serve a simple HTML page with the ngrok URL and a copy button."""
    if public_url:
        return render_template_string('''
            <h1>ngrok URL</h1>
            <p id="ngrok-url">{{ public_url }}</p>
            <button onclick="copyToClipboard()">Copy ngrok URL</button>
            <script>
                function copyToClipboard() {
                    const url = document.getElementById('ngrok-url').innerText;
                    navigator.clipboard.writeText(url).then(() => {
                        alert('URL copied to clipboard!');
                    }).catch(err => {
                        alert('Failed to copy URL: ' + err);
                    });
                }
            </script>
        ''', public_url=public_url)
    else:
        return "ngrok URL is not available."


@app.route('/translate', methods=['POST'])
def translate():
    try:
        data = request.json
        # print("Received Data:", data)
        id = data.get('videoId')
        url = data.get('videoUrl')
        ngrokurl = data.get('serverUrl')

        # Fetch video details BEFORE starting the thread
        video_info = get_youtube_video_details(url)
        if not video_info:
            return jsonify({'error': 'Failed to fetch video details'}), 500

        # Check duration limit (1200 seconds = 20 minutes)
        if video_info["duration"] > 1200:
            video_limit_api = ngrokurl + "/api/v1/videos/limitIssue"
            payload = {
                "id": id,
                "status": 301,
                "video_details": video_info  # Include video details
            }
            # return_api(payload, video_limit_api)
            return jsonify({'message': 'Video duration exceeds the limit'}), 409

        # Start the thread only if the duration is valid
        thread = threading.Thread(target=transcription_api, args=(id, url, ngrokurl))
        thread.start()
        print("Thread started, processing in background.")

        return jsonify({'result': 'Successfully Received and processing started', 'videoInfo': video_info, 'id': id})

    except Exception as e:
        log_error(f"Error in /translate endpoint: {str(e)}")
        return jsonify({'error': 'An error occurred during translation.'}), 500




@app.route('/translate-video', methods=['POST'])
def translate_video():
    try:
        data = request.json
        # print("Received Data:", data)
        id = data.get('videoId')
        url = data.get('videoUrl')
        ngrokurl = data.get('serverUrl')
        delete()
        output_video_path = "/kaggle/working/temp_video.mp4"
        output_audio_path = "/kaggle/working/temp_audio.wav"

        # Download the video from Google Drive
        gdown.download(url, output_video_path, quiet=True)

        # Convert video to WAV using ffmpeg
        os.system(f"ffmpeg -i {output_video_path} -ac 1 -ar 16000 {output_audio_path}")

        thread = threading.Thread(target=transcribe, args=(id, output_audio_path, ngrokurl))
        thread.start()
        print("Thread started, main program continues.")
        return jsonify({'result': 'Successfully Received'})
    except Exception as e:
        log_error(f"Error in /translate-video endpoint: {str(e)}")
        return jsonify({'error': 'An error occurred during video translation.'}), 500

# Set up ngrok
try:
    ngrok.set_auth_token('')  # Replace with your ngrok authtoken
    public_url = ngrok.connect(5000).public_url
    print(f"{public_url}")

    # Display the ngrok URL in an input box with a copy button
    display(HTML(f"""
        <p><strong>Copy The ngrok URL:</strong></p>
        <input type="text" id="ngrok-url" value="{public_url}" style="width: 400px; padding: 5px;" readonly>
        <button onclick="copyUrl()" style="margin-left: 10px; padding: 5px;">Copy URL</button>
        <script>
            function copyUrl() {{
                const inputBox = document.getElementById('ngrok-url');
                inputBox.select();
                document.execCommand('copy');
            }}
        </script>
    """))

    

except Exception as e:
    log_error(f"Failed to set up ngrok: {str(e)}")

import warnings
import os
# ... other imports

# --- Suppress ALL FutureWarning ---
warnings.filterwarnings(action='ignore', category=FutureWarning)

# --- Your existing imports ---
import shutil
# ... etc ...
from IPython.display import display, HTML, Javascript

# --- Rest of your code ---
# ... (Flask app setup, routes, functions, etc.) ...

if __name__ == '__main__':
    try:
        app.run(port=5000)
    except Exception as e:
        log_error(f"Failed to start Flask app: {str(e)}")

https://2bcd-34-118-202-217.ngrok-free.app


 * Serving Flask app '__main__'
 * Debug mode: off
Thread started, processing in background.
[youtube] Extracting URL: https://www.youtube.com/watch?v=cGmzBNH2wUo&pp=ygUYand0IGV4cGxhaW5lY3ggaW4gMzAgbWlu
[youtube] cGmzBNH2wUo: Downloading webpage
[youtube] cGmzBNH2wUo: Downloading tv client config
[youtube] cGmzBNH2wUo: Downloading player 179bab65-main
[youtube] cGmzBNH2wUo: Downloading tv player API JSON
[youtube] cGmzBNH2wUo: Downloading ios player API JSON
[youtube] cGmzBNH2wUo: Downloading m3u8 information
[info] cGmzBNH2wUo: Downloading 1 format(s): 251
[download] Destination: /kaggle/working/temp_audio
[download] 100% of   16.30MiB in 00:00:06 at 2.49MiB/s     
[ExtractAudio] Destination: /kaggle/working/temp_audio.wav
Deleting original file /kaggle/working/temp_audio (pass -k to keep)


You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset


Downloaded and converted to /kaggle/working/output_audio.wav
Transcription successful.
API call successful.
Translation successful.
API call successful.


# End of the Notebook