In [None]:
# ===============================
# Install dependencies
# ===============================
!pip install SpeechRecognition googletrans==4.0.0-rc1 gTTS pydub moviepy soundfile openai-whisper flask flask-ngrok pyngrok -q

# ===============================
# Imports
# ===============================
import os
import io
import base64
from flask import Flask, request, jsonify, send_file
from pyngrok import ngrok
from gtts import gTTS
from googletrans import Translator
from moviepy.editor import VideoFileClip
import soundfile as sf
import whisper

# ===============================
# Set your ngrok authtoken here
# ===============================
NGROK_AUTH_TOKEN = "YOUR_TOKEN"
ngrok.set_auth_token(NGROK_AUTH_TOKEN)

# ===============================
# Whisper model
# ===============================
print("⏳ Loading Whisper model...")
whisper_model = whisper.load_model("large")  # or medium/small for faster results
print(" Whisper model loaded!")

# ===============================
# Translator & language map
# ===============================
translator = Translator()
lang_code_map = {
    "english": "en","hindi": "hi","bengali": "bn","tamil": "ta",
    "telugu": "te","kannada": "kn","malayalam": "ml","marathi": "mr",
    "gujarati": "gu","punjabi": "pa","nepali": "ne","assamese": "as"
}

# ===============================
# Core functions
# ===============================
def transcribe_audio(file_path):
    try:
        result = whisper_model.transcribe(file_path, fp16=False)
        return result["text"]
    except Exception as e:
        return f"[Transcription failed: {e}]"

def translate_text(text, target_lang):
    try:
        t_code = lang_code_map.get(target_lang, "en")
        return translator.translate(text, dest=t_code).text
    except Exception as e:
        return f"[Translation failed: {e}]"

def generate_tts(text, target_lang):
    t_code = lang_code_map.get(target_lang, "en")
    tts_path = "tts_output.mp3"
    try:
        tts = gTTS(text=text, lang=t_code)
        tts.save(tts_path)
        with open(tts_path, "rb") as f:
            b64_audio = base64.b64encode(f.read()).decode()
        return b64_audio
    except Exception as e:
        return None

def extract_audio_from_video(video_path):
    audio_path = video_path.rsplit('.', 1)[0] + "_audio.wav"
    video = VideoFileClip(video_path)
    video.audio.write_audiofile(audio_path, fps=16000)
    return audio_path

# ===============================
# HTML frontend
# ===============================
html_code = """
<!DOCTYPE html>
<html lang="en">
<head>
  <meta charset="UTF-8">
  <title>🎧 Whisper Translator</title>
  <link href="https://fonts.googleapis.com/css2?family=Poppins:wght@400;600&display=swap" rel="stylesheet">
  <style>
    * { box-sizing: border-box; transition: all 0.3s ease; }
    body {
      font-family: "Poppins", sans-serif;
      background: linear-gradient(135deg, #89f7fe 0%, #66a6ff 100%);
      display: flex;
      justify-content: center;
      align-items: center;
      min-height: 100vh;
      margin: 0;
    }
    h1 {
      color: #2563eb;
      text-shadow: 1px 2px 5px rgba(0,0,0,0.2);
      margin-bottom: 10px;
    }
    .card {
      background: rgba(255, 255, 255, 0.25);
      backdrop-filter: blur(12px);
      -webkit-backdrop-filter: blur(12px);
      border-radius: 20px;
      box-shadow: 0 4px 30px rgba(0,0,0,0.1);
      padding: 40px;
      width: 500px;
      text-align: center;
      color: #1f2937;
      animation: fadeIn 0.6s ease-in-out;
    }
    @keyframes fadeIn {
      from { opacity: 0; transform: scale(0.95); }
      to { opacity: 1; transform: scale(1); }
    }
    .upload-box {
      border: 2px dashed #93c5fd;
      border-radius: 12px;
      padding: 18px;
      margin: 20px 0;
      cursor: pointer;
      font-weight: 600;
      color: #2563eb;
      background: rgba(255,255,255,0.4);
    }
    .upload-box:hover {
      background: #2563eb;
      color: white;
      transform: scale(1.02);
    }
    select, button {
      width: 100%;
      padding: 12px;
      border-radius: 10px;
      border: none;
      font-size: 16px;
      margin-top: 12px;
      cursor: pointer;
      font-weight: 600;
    }
    select {
      background-color: rgba(255,255,255,0.7);
      color: #1f2937;
    }
    button {
      background-color: #2563eb;
      color: white;
    }
    button:hover:not(:disabled) {
      background-color: #1d4ed8;
      transform: scale(1.05);
    }
    button:disabled {
      background-color: #a5b4fc;
      cursor: not-allowed;
    }
    hr {
      border: none;
      border-top: 1px solid rgba(255,255,255,0.3);
      margin: 20px 0;
    }
    audio {
      width: 100%;
      margin-top: 15px;
      border-radius: 8px;
    }
    .output {
      background: rgba(255,255,255,0.5);
      padding: 15px;
      border-radius: 10px;
      margin-top: 20px;
      text-align: left;
      font-size: 15px;
      color: #111827;
      max-height: 250px;
      overflow-y: auto;
    }
    .filename {
      font-weight: bold;
      margin-top: 10px;
      color: #2563eb;
    }
    footer {
      margin-top: 15px;
      font-size: 13px;
      color: rgba(255,255,255,0.7);
    }
    /* Microphone Button */
    .mic-btn {
      background-color: #ef4444;
      color: white;
      font-size: 18px;
      padding: 12px 20px;
      border-radius: 50px;
      margin-top: 10px;
      width: 100%;
      border: none;
      font-weight: 600;
      cursor: pointer;
    }
    .mic-btn:hover {
      background-color: #dc2626;
      transform: scale(1.05);
    }
    .mic-btn.recording {
      background-color: #22c55e;
      animation: pulse 1s infinite;
    }
    @keyframes pulse {
      0% { box-shadow: 0 0 0 0 rgba(34,197,94,0.7); }
      70% { box-shadow: 0 0 0 12px rgba(34,197,94,0); }
      100% { box-shadow: 0 0 0 0 rgba(34,197,94,0); }
    }
  </style>
</head>
<body>
  <div class="card">
    <h1>🎧 Whisper Translator</h1>

    <!-- Audio Upload -->
    <label class="upload-box">
      <input type="file" id="audioInput" hidden accept="audio/*">
      Upload Audio File
    </label>
    <div id="audioName" class="filename"></div>
    <button id="processAudioBtn" disabled>🚀 Process Audio</button>

    <hr>

    <!-- Video Upload -->
    <label class="upload-box">
      <input type="file" id="videoInput" hidden accept="video/*">
      Upload Video File
    </label>
    <div id="videoName" class="filename"></div>
    <button id="processVideoBtn" disabled>🚀 Process Video</button>

    <hr>

    <!-- Mic Recording -->
    <button id="recordBtn" class="mic-btn">🎙 Start Recording</button>
    <audio id="recordedAudio" controls hidden></audio>

    <hr>

    <!-- Language Dropdown -->
    <select id="langSelect">
      <option value="english">English</option>
      <option value="hindi">Hindi</option>
      <option value="bengali">Bengali</option>
      <option value="tamil">Tamil</option>
      <option value="telugu">Telugu</option>
      <option value="kannada">Kannada</option>
      <option value="malayalam">Malayalam</option>
      <option value="marathi">Marathi</option>
      <option value="gujarati">Gujarati</option>
      <option value="punjabi">Punjabi</option>
      <option value="nepali">Nepali</option>
      <option value="assamese">Assamese</option>
    </select>

    <div class="output" id="outputBox"></div>

    <footer>Made with  using Whisper + Flask</footer>
  </div>

  <script>
    const audioInput = document.getElementById('audioInput');
    const videoInput = document.getElementById('videoInput');
    const processAudioBtn = document.getElementById('processAudioBtn');
    const processVideoBtn = document.getElementById('processVideoBtn');
    const recordBtn = document.getElementById('recordBtn');
    const recordedAudio = document.getElementById('recordedAudio');
    const langSelect = document.getElementById('langSelect');
    const outputBox = document.getElementById('outputBox');
    const audioName = document.getElementById('audioName');
    const videoName = document.getElementById('videoName');

    let selectedAudio = null;
    let selectedVideo = null;
    let mediaRecorder, audioChunks = [];

    audioInput.addEventListener('change', e => {
      selectedAudio = e.target.files[0];
      audioName.innerText = selectedAudio ? selectedAudio.name : "";
      processAudioBtn.disabled = !selectedAudio;
    });

    videoInput.addEventListener('change', e => {
      selectedVideo = e.target.files[0];
      videoName.innerText = selectedVideo ? selectedVideo.name : "";
      processVideoBtn.disabled = !selectedVideo;
    });

    async function processFile(file) {
      if (!file) return alert("Please select or record a file first!");
      outputBox.innerHTML = "<p>⏳ Processing... please wait</p>";
      const formData = new FormData();
      formData.append("file", file);
      formData.append("target_lang", langSelect.value);
      try {
        const res = await fetch(`/process`, { method: "POST", body: formData });
        const data = await res.json();
        if (data.error) { outputBox.innerHTML = `<p style='color:red;'>${data.error}</p>`; return; }
        outputBox.innerHTML = `<p><strong>🗣️ Transcription:</strong> ${data.transcription}</p>
                               <p><strong>🌐 Translation (${langSelect.value}):</strong> ${data.translation}</p>`;
        if (data.tts_base64) {
          const audioEl = document.createElement("audio");
          audioEl.controls = true;
          audioEl.src = "data:audio/mp3;base64," + data.tts_base64;
          outputBox.appendChild(audioEl);
        }
      } catch (err) {
        console.error(err);
        outputBox.innerHTML = `<p style='color:red;'>⚠️ Error: ${err}</p>`;
      }
    }

    processAudioBtn.addEventListener('click', () => processFile(selectedAudio));
    processVideoBtn.addEventListener('click', () => processFile(selectedVideo));

    // 🎙 Mic recording logic
    recordBtn.addEventListener('click', async () => {
      if (recordBtn.textContent.includes("Start")) {
        const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
        mediaRecorder = new MediaRecorder(stream);
        audioChunks = [];
        mediaRecorder.ondataavailable = e => audioChunks.push(e.data);
        mediaRecorder.onstop = () => {
          const blob = new Blob(audioChunks, { type: 'audio/wav' });
          recordedAudio.src = URL.createObjectURL(blob);
          recordedAudio.hidden = false;
          const file = new File([blob], "mic_recording.wav");
          processFile(file);
        };
        mediaRecorder.start();
        recordBtn.textContent = "⏹ Stop Recording";
        recordBtn.classList.add("recording");
      } else {
        mediaRecorder.stop();
        recordBtn.textContent = "🎙 Start Recording";
        recordBtn.classList.remove("recording");
      }
    });
  </script>
</body>
</html>
"""



with open("index.html", "w") as f:
    f.write(html_code)

# ===============================
# Flask app
# ===============================
app = Flask(__name__)

@app.route('/')
def home():
    return send_file("index.html")

@app.route('/process', methods=['POST'])
def process_file():
    file = request.files.get("file")
    target_lang = request.form.get("target_lang", "english")
    if not file:
        return jsonify({"error": "No file uploaded"})
    os.makedirs("uploads", exist_ok=True)
    file_path = f"uploads/{file.filename}"
    file.save(file_path)

    # If video, extract audio
    if file.filename.lower().endswith(('.mp4','.avi','.mov','.mkv')):
        file_path = extract_audio_from_video(file_path)

    transcription = transcribe_audio(file_path)
    translation = translate_text(transcription, target_lang)
    tts_base64 = generate_tts(translation, target_lang)

    return jsonify({"transcription": transcription, "translation": translation, "tts_base64": tts_base64})

# ===============================
# Start ngrok + Flask
# ===============================
public_url = ngrok.connect(5000).public_url
print(" Public URL:", public_url)

app.run(port=5000)
