<a href="https://colab.research.google.com/github/ayussri64/AI-Powered-Multi-Language-Translator/blob/main/Untitled4.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [17]:
# === CORRECTED AND ENHANCED IMPORTS ===
!pip install transformers torch gradio langdetect gtts pygame pandas

import torch
from transformers import M2M100ForConditionalGeneration, M2M100Tokenizer
import gradio as gr
import time
import pandas as pd
from datetime import datetime
import pygame
# Enhanced language detection imports
from langdetect import detect as langdetect_detect, LangDetectException

from gtts import gTTS
import io
import base64
import warnings
from functools import lru_cache
warnings.filterwarnings('ignore')

print("✅ Enhanced dependencies installed with dual language detection!")

✅ Enhanced dependencies installed with dual language detection!


In [20]:
# === CELL 2: COMPLETE HybridAdvancedTranslator Class (Fixed Methods) ===
class HybridAdvancedTranslator:
    def __init__(self, model_size="418M"):
        self.model_size = model_size
        self.tokenizer, self.model = self._load_model()
        self.supported_languages = self._get_enhanced_language_mapping()
        self.translation_cache = {}
        self.history = []

        print(f"🚀 Hybrid Advanced Translator Initialized:")
        print(f"   Model: M2M100-{model_size}")
        print(f"   Languages: {len(self.supported_languages)} supported")
        print(f"   Device: {'GPU' if torch.cuda.is_available() else 'CPU'}")

    def _load_model(self):
        """Load model with optimization"""
        model_name = f"facebook/m2m100_{self.model_size.lower()}"

        print(f"📦 Loading {model_name}...")
        start_time = time.time()

        # Optimized loading
        torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
        tokenizer = M2M100Tokenizer.from_pretrained(model_name)
        model = M2M100ForConditionalGeneration.from_pretrained(
            model_name,
            torch_dtype=torch_dtype,
            device_map="auto" if torch.cuda.is_available() else None
        )

        print(f"✅ Model loaded in {time.time() - start_time:.2f}s")
        return tokenizer, model

    def _get_enhanced_language_mapping(self):
        """Combines both approaches - codes for internal, names for UI"""
        lang_codes = list(self.tokenizer.lang_code_to_id.keys())

        # Comprehensive mapping
        code_to_name = {
            'en': 'English', 'es': 'Spanish', 'fr': 'French', 'de': 'German',
            'zh': 'Chinese', 'hi': 'Hindi', 'ar': 'Arabic', 'ru': 'Russian',
            'pt': 'Portuguese', 'ja': 'Japanese', 'ko': 'Korean', 'it': 'Italian',
            'nl': 'Dutch', 'tr': 'Turkish', 'pl': 'Polish', 'uk': 'Ukrainian',
            'th': 'Thai', 'vi': 'Vietnamese', 'id': 'Indonesian', 'ms': 'Malay',
            'fil': 'Filipino', 'my': 'Burmese', 'km': 'Khmer', 'lo': 'Lao',
            'da': 'Danish', 'fi': 'Finnish', 'no': 'Norwegian', 'sv': 'Swedish',
            'cs': 'Czech', 'sk': 'Slovak', 'hu': 'Hungarian', 'ro': 'Romanian',
            'bg': 'Bulgarian', 'el': 'Greek', 'hr': 'Croatian', 'sr': 'Serbian',
            'sl': 'Slovenian', 'et': 'Estonian', 'lv': 'Latvian', 'lt': 'Lithuanian',
            'ga': 'Irish', 'mt': 'Maltese'
        }

        # Two-way mapping for flexibility
        self.code_to_name = {code: code_to_name.get(code, code) for code in lang_codes}
        self.name_to_code = {name: code for code, name in self.code_to_name.items()}

        return self.code_to_name

    @lru_cache(maxsize=128)
    def detect_language(self, text):
        """Efficient detection with better error handling"""
        try:
            if len(text.strip()) < 3:
                return "en"  # Default for short texts
            return langdetect_detect(text)
        except LangDetectException:
            return "en"  # Fallback to English

    def translate(self, text, target_lang, source_lang="auto"):
        """Hybrid approach with best features from both"""
        if not text.strip():
            return {"success": False, "message": "❌ Please enter text to translate"}

        start_time = time.time()

        # Use efficient detection
        detected_code = self.detect_language(text) if source_lang == "auto" else source_lang
        target_code = target_lang if len(target_lang) == 2 else self.name_to_code.get(target_lang, "en")

        # Cache key approach
        cache_key = (text, detected_code, target_code)
        if cache_key in self.translation_cache:
            return self.translation_cache[cache_key]

        try:
            # Robust translation
            self.tokenizer.src_lang = detected_code
            inputs = self.tokenizer(text, return_tensors="pt", truncation=True, max_length=1024)

            # Generate translation
            with torch.no_grad():
                outputs = self.model.generate(
                    **inputs,
                    forced_bos_token_id=self.tokenizer.get_lang_id(target_code),
                    max_length=1024,
                    num_beams=5,
                    early_stopping=True
                )

            translated_text = self.tokenizer.decode(outputs[0], skip_special_tokens=True)

            # Build comprehensive result
            result = {
                "success": True,
                "translated_text": translated_text,
                "source_lang": self.code_to_name.get(detected_code, detected_code),
                "target_lang": self.code_to_name.get(target_code, target_code),
                "translation_time": time.time() - start_time,
                "word_count": len(text.split()),
                "confidence": "High" if len(text) > 10 else "Medium",
                "truncated": len(inputs['input_ids'][0]) >= 1024,
                "token_count": len(inputs['input_ids'][0])
            }

            # Add to history
            self._add_to_history(text, result)

            self.translation_cache[cache_key] = result
            return result

        except Exception as e:
            return {"success": False, "message": f"❌ Translation error: {str(e)}"}

    def _add_to_history(self, original_text, result):
        """Add translation to history"""
        entry = {
            'timestamp': datetime.now().strftime("%H:%M:%S"),
            'original': original_text[:100] + "..." if len(original_text) > 100 else original_text,
            'translated': result['translated_text'][:100] + "..." if len(result['translated_text']) > 100 else result['translated_text'],
            'source_lang': result['source_lang'],
            'target_lang': result['target_lang'],
            'time_taken': f"{result['translation_time']:.3f}s",
            'word_count': result['word_count'],
            'confidence': result['confidence']
        }
        self.history.insert(0, entry)
        self.history = self.history[:20]  # Keep last 20 entries

    # === FIXED METHODS ===
    def get_history_dataframe(self):
        """Get history as pandas DataFrame - FIXED METHOD"""
        if not self.history:
            return pd.DataFrame()
        return pd.DataFrame(self.history)

    def export_history(self):
        """Export history to CSV - FIXED METHOD"""
        if not self.history:
            return None
        filename = f"translation_history_{datetime.now().strftime('%Y%m%d_%H%M%S')}.csv"
        df = self.get_history_dataframe()
        df.to_csv(filename, index=False)
        return filename

    def text_to_speech(self, text, target_lang):
        """Convert text to speech"""
        try:
            lang_code = self.name_to_code.get(target_lang, "en")
            tts = gTTS(text=text, lang=lang_code, slow=False)
            audio_buffer = io.BytesIO()
            tts.write_to_fp(audio_buffer)
            audio_buffer.seek(0)
            return audio_buffer
        except Exception as e:
            print(f"TTS Error: {e}")
            return None

# Initialize the translator
translator = HybridAdvancedTranslator(model_size="418M")
print("✅ Hybrid Advanced Translator initialized successfully!")

📦 Loading facebook/m2m100_418m...
✅ Model loaded in 5.21s
🚀 Hybrid Advanced Translator Initialized:
   Model: M2M100-418M
   Languages: 100 supported
   Device: GPU
✅ Hybrid Advanced Translator initialized successfully!


In [26]:
# === CELL 3: CORRECTED Professional Interface (Fixed Dataframe height issue) ===
def create_interface():
    """Create fully enhanced Gradio interface"""

    custom_css = """
    .gradio-container {
        background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
        font-family: 'Segoe UI', sans-serif;
    }
    .metric-card {
        background: rgba(255,255,255,0.95);
        padding: 15px;
        border-radius: 10px;
        margin: 5px;
        text-align: center;
        box-shadow: 0 2px 8px rgba(0,0,0,0.1);
    }
    .warning-box {
        background: #fff3cd; padding: 10px; border-radius: 5px;
        border-left: 4px solid #ffc107; margin: 10px 0;
    }
    """

    with gr.Blocks(css=custom_css, title="🚀 Advanced AI Translator") as demo:
        # Header
        gr.Markdown("""
        <div style="text-align: center; color: white;">
            <h1>🌍 Advanced AI Translator</h1>
            <h3>Powered by M2M100 • True Auto-Detection • Professional Features</h3>
            <p><em>No external APIs • Your data processed locally • Enterprise-grade translation</em></p>
        </div>
        """)

        with gr.Tabs():
            # Tab 1: Main Translation
            with gr.TabItem("💬 Translate"):
                with gr.Row():
                    # Left Panel - Controls
                    with gr.Column(scale=1):
                        gr.Markdown("### ⚙️ Settings")

                        auto_detect = gr.Checkbox(
                            value=True,
                            label="🔍 Enable Auto Language Detection",
                            info="Automatically detect source language"
                        )

                        source_lang = gr.Dropdown(
                            choices=["Auto-Detect"] + list(translator.supported_languages.values()),
                            value="Auto-Detect",
                            label="🔤 Source Language"
                        )

                        target_lang = gr.Dropdown(
                            choices=list(translator.supported_languages.values()),
                            value="Spanish",
                            label="🎯 Target Language"
                        )

                        # Real-time stats
                        gr.Markdown("### 📊 System Info")
                        with gr.Row():
                            gr.Markdown(f"""
                            <div class="metric-card">
                                <h3>🚀 Model</h3>
                                <p>M2M100-{translator.model_size}</p>
                            </div>
                            """)
                            gr.Markdown(f"""
                            <div class="metric-card">
                                <h3>🌍 Languages</h3>
                                <p>{len(translator.supported_languages)}+</p>
                            </div>
                            """)
                            gr.Markdown(f"""
                            <div class="metric-card">
                                <h3>⚡ Device</h3>
                                <p>{'GPU' if torch.cuda.is_available() else 'CPU'}</p>
                            </div>
                            """)

                    # Right Panel - Translation Area
                    with gr.Column(scale=2):
                        gr.Markdown("### 💬 Translation Engine")

                        input_text = gr.Textbox(
                            lines=4,
                            placeholder="Enter text to translate... (Supports 500+ characters)",
                            label="📝 Input Text",
                            max_length=1000
                        )

                        with gr.Row():
                            translate_btn = gr.Button("🔄 Translate", variant="primary", size="lg")
                            clear_btn = gr.Button("🗑️ Clear All", size="lg")

                        # Results display
                        output_text = gr.Textbox(
                            lines=4,
                            label="💫 Translation Result",
                            interactive=False
                        )

                        # Performance metrics
                        with gr.Row():
                            time_metric = gr.Textbox(label="⏱️ Translation Time", interactive=False, value="Ready")
                            lang_metric = gr.Textbox(label="🌐 Detected Language", interactive=False, value="Waiting")
                            conf_metric = gr.Textbox(label="🎯 Confidence Level", interactive=False, value="Waiting")

                        # Warnings
                        warning_html = gr.HTML(visible=False)

                        # Audio output
                        with gr.Row():
                            tts_btn = gr.Button("🔊 Listen to Translation")
                            audio_display = gr.HTML()

            # Tab 2: History & Analytics
            with gr.TabItem("📜 History"):
                with gr.Row():
                    with gr.Column():
                        gr.Markdown("### 📊 Translation History")
                        # FIXED: Removed height parameter and used proper Dataframe configuration
                        history_df = gr.Dataframe(
                            headers=["Time", "Source Lang", "Target Lang", "Words", "Duration", "Confidence"],
                            interactive=False,
                            row_count=8,  # Fixed: Use row_count instead of height
                            col_count=(6, "fixed"),
                            wrap=True,
                            datatype="str"
                        )

                        with gr.Row():
                            refresh_btn = gr.Button("🔄 Refresh History")
                            export_btn = gr.Button("💾 Export to CSV")
                            clear_history_btn = gr.Button("🗑️ Clear History")

                    with gr.Column():
                        gr.Markdown("### 📈 Usage Analytics")
                        stats_html = gr.HTML()

        # Event handlers
        def process_translation(text, src_lang, tgt_lang, auto_detect_flag):
            """Main translation handler"""
            if not text.strip():
                return "", "Ready", "Waiting", "Waiting", gr.HTML(visible=False), ""

            # Handle auto-detection
            source_param = "auto" if auto_detect_flag or src_lang == "Auto-Detect" else src_lang

            result = translator.translate(text, tgt_lang, source_param)

            if not result['success']:
                error_msg = result['message']
                return error_msg, "Error", "Failed", "Failed", gr.HTML(visible=False), ""

            # Main translation
            main_output = result['translated_text']

            # Warnings
            warning_html = ""
            if result.get('truncated', False):
                warning_html = f"""
                <div class="warning-box">
                    ⚠️ <b>Text truncated:</b> Your input exceeded 512 tokens. Only the first {result.get('token_count', 0)} tokens were translated.
                </div>
                """

            return (
                main_output,
                f"{result['translation_time']:.3f}s",
                result['source_lang'],
                result['confidence'],
                gr.HTML(visible=bool(warning_html)),
                warning_html if warning_html else ""
            )

        # Connect main translation button
        translate_btn.click(
            fn=process_translation,
            inputs=[input_text, source_lang, target_lang, auto_detect],
            outputs=[output_text, time_metric, lang_metric, conf_metric, warning_html, warning_html]
        )

        # Text-to-speech
        def generate_audio(text, lang):
            if text and text.strip() and not text.startswith("❌"):
                audio_buffer = translator.text_to_speech(text, lang)
                return create_audio_player(audio_buffer) if audio_buffer else "🔇 Audio generation failed"
            return "🔇 No valid translation to speak"

        tts_btn.click(
            fn=generate_audio,
            inputs=[output_text, target_lang],
            outputs=audio_display
        )

        # History management - FIXED FUNCTIONS
        def update_history():
            df = translator.get_history_dataframe()
            if not df.empty:
                # Fix column names to match actual history structure
                required_columns = ['timestamp', 'source_lang', 'target_lang', 'word_count', 'time_taken', 'confidence']

                # Ensure all columns exist
                for col in required_columns:
                    if col not in df.columns:
                        df[col] = 'N/A'

                display_df = df[required_columns]

                # Statistics with error handling
                total_translations = len(df)
                try:
                    # Convert time_taken to numeric (remove 's' suffix)
                    time_series = df['time_taken'].astype(str).str.replace('s', '', regex=False)
                    avg_time = pd.to_numeric(time_series, errors='coerce').mean()
                    avg_time_str = f"{avg_time:.3f}s" if not pd.isna(avg_time) else "N/A"
                except Exception as e:
                    print(f"Stats calculation error: {e}")
                    avg_time_str = "N/A"

                stats = f"""
                <div style="background: white; padding: 20px; border-radius: 10px;">
                    <h3>📈 Usage Statistics</h3>
                    <p><b>Total Translations:</b> {total_translations}</p>
                    <p><b>Average Time:</b> {avg_time_str}</p>
                    <p><b>Languages Used:</b> {df['source_lang'].nunique()} source, {df['target_lang'].nunique()} target</p>
                </div>
                """
                return display_df, stats

            # Return empty dataframe with correct structure
            empty_df = pd.DataFrame(columns=["timestamp", "source_lang", "target_lang", "word_count", "time_taken", "confidence"])
            return empty_df, "<p>No translation history yet.</p>"

        def export_history_func():
            if translator.history:
                filename = translator.export_history()
                if filename:
                    return gr.Info(f"✅ History exported to {filename}")
                else:
                    return gr.Warning("❌ Failed to export history")
            return gr.Warning("❌ No history to export")

        def clear_history_func():
            translator.history.clear()
            empty_df = pd.DataFrame(columns=["timestamp", "source_lang", "target_lang", "word_count", "time_taken", "confidence"])
            return empty_df, "<p>History cleared.</p>"

        # Initialize history on load
        demo.load(fn=update_history, outputs=[history_df, stats_html])

        refresh_btn.click(fn=update_history, outputs=[history_df, stats_html])
        export_btn.click(fn=export_history_func)
        clear_history_btn.click(fn=clear_history_func, outputs=[history_df, stats_html])

        # Clear button
        def clear_all():
            return "", "Ready", "Waiting", "Waiting", gr.HTML(visible=False), "", ""

        clear_btn.click(
            fn=clear_all,
            outputs=[input_text, output_text, time_metric, lang_metric, conf_metric, warning_html, audio_display]
        )

    return demo

# Launch the interface
print("🎯 Launching Advanced AI Translator...")
print("✅ All methods are now properly defined!")
interface = create_interface()
interface.launch(share=True, debug=True)

🎯 Launching Advanced AI Translator...
✅ All methods are now properly defined!
Colab notebook detected. This cell will run indefinitely so that you can see errors and logs. To turn off, set debug=False in launch().
* Running on public URL: https://59414c08a11ef3a0aa.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


Keyboard interruption in main thread... closing server.
Killing tunnel 127.0.0.1:7860 <> https://59414c08a11ef3a0aa.gradio.live


