<a href="https://colab.research.google.com/github/ayushpratapno1/TTS/blob/main/TTS.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# ***Implementation Code***

Cell 1: Environment Setup and GPU Check

In [22]:
"""
🔧 System Environment Setup
- Check GPU availability and specifications
- Configure compute device for optimal performance
"""

import torch
import os

# GPU Detection and Configuration
print("🔍 Detecting compute environment...")
print(f"CUDA available: {torch.cuda.is_available()}")

if torch.cuda.is_available():
    print(f"🎮 GPU device: {torch.cuda.get_device_name(0)}")
    print(f"💾 GPU memory: {torch.cuda.get_device_properties(0).total_memory / 1024**3:.1f} GB")
    device = torch.device("cuda")
    print("✅ Using GPU acceleration")
else:
    device = torch.device("cpu")
    print("⚠️ No GPU available - using CPU")

print(f"🎯 Active device: {device}")

🔍 Detecting compute environment...
CUDA available: True
🎮 GPU device: Tesla T4
💾 GPU memory: 14.7 GB
✅ Using GPU acceleration
🎯 Active device: cuda


Cell 2: Mount Google Drive (Optional for saving models)

In [23]:
"""
📁 Google Drive Mount
- Mount Google Drive for model persistence
- Create directory structure for saved models
"""

from google.colab import drive

# Mount Google Drive
print("📁 Mounting Google Drive...")
drive.mount('/content/drive')

# Create project directories
project_dirs = [
    '/content/drive/MyDrive/TTS_Models',
    '/content/drive/MyDrive/TTS_Models/outputs',
    '/content/drive/MyDrive/TTS_Models/logs'
]

for directory in project_dirs:
    os.makedirs(directory, exist_ok=True)

print("✅ Google Drive mounted and directories created!")
print("📂 Available directories:")
for directory in project_dirs:
    print(f"   - {directory}")

📁 Mounting Google Drive...
Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
✅ Google Drive mounted and directories created!
📂 Available directories:
   - /content/drive/MyDrive/TTS_Models
   - /content/drive/MyDrive/TTS_Models/outputs
   - /content/drive/MyDrive/TTS_Models/logs


Cell 3: Install Dependencies

In [3]:
# Install minimal required packages
!pip install -q torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118
!pip install -q "numpy==1.24.4"
!pip install -q transformers>=4.35.0
!pip install -q soundfile>=0.12.1
!pip install -q gradio>=4.0.0
!pip install -q accelerate>=0.24.0

print("Lightweight packages installed successfully!")

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m23.2/23.2 MB[0m [31m53.7 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m875.6/875.6 kB[0m [31m54.7 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m13.1/13.1 MB[0m [31m115.8 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m663.9/663.9 MB[0m [31m2.8 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m417.9/417.9 MB[0m [31m4.4 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m168.4/168.4 MB[0m [31m6.7 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m58.1/58.1 MB[0m [31m13.9 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m128.2/128.2 MB[0m [31m8.2 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━

In [8]:
!pip install -q numpy

Cell 4: Import Libraries

In [9]:
import torch
import soundfile as sf
import numpy as np
import time
from typing import Dict, List, Tuple, Optional

# Test core imports first
print(f"✅ PyTorch version: {torch.__version__}")
print(f"✅ NumPy version: {np.__version__}")

try:
    # Import transformers with error handling
    from transformers import (
        AutoTokenizer,
        AutoModel,
        AutoProcessor,
        set_seed
    )
    print("✅ Transformers imported successfully")

    # Import other packages
    import gradio as gr
    print("✅ Gradio imported successfully")

    # Set seed for reproducibility
    set_seed(42)
    print("✅ All core libraries loaded!")

except ImportError as e:
    print(f"⚠️ Import warning: {e}")
    print("Continuing with available packages...")

# Test basic functionality
test_tensor = torch.randn(2, 3)
print(f"✅ PyTorch working: tensor shape {test_tensor.shape}")

✅ PyTorch version: 2.6.0+cu124
✅ NumPy version: 2.0.2
Continuing with available packages...
✅ PyTorch working: tensor shape torch.Size([2, 3])


In [12]:
!pip install gtts

Collecting gtts
  Downloading gTTS-2.5.4-py3-none-any.whl.metadata (4.1 kB)
Collecting click<8.2,>=7.1 (from gtts)
  Downloading click-8.1.8-py3-none-any.whl.metadata (2.3 kB)
Downloading gTTS-2.5.4-py3-none-any.whl (29 kB)
Downloading click-8.1.8-py3-none-any.whl (98 kB)
[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/98.2 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m98.2/98.2 kB[0m [31m8.7 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: click, gtts
  Attempting uninstall: click
    Found existing installation: click 8.2.1
    Uninstalling click-8.2.1:
      Successfully uninstalled click-8.2.1
Successfully installed click-8.1.8 gtts-2.5.4


Cell 5: Model Class Definition

In [19]:
# Install translation library
!pip install -q googletrans-py

import torch
import soundfile as sf
import numpy as np
import gradio as gr
from gtts import gTTS
import io
import time
from googletrans import Translator # Use googletrans-py which is imported as googletrans

class TranslatingTTSModel:
    def __init__(self):
        """Initialize TTS with translation capability"""

        # Initialize translator
        self.translator = Translator()

        # Language mapping for gTTS
        self.supported_languages = {
            'English': 'en',
            'Hindi': 'hi',
            'Marathi': 'mr',
            'Kannada': 'kn',
            'Telugu': 'te',
            'Punjabi': 'pa',
            'Tamil': 'ta',
            'Bengali': 'bn',
            'Gujarati': 'gu'
        }

        # Language names for translation
        self.language_names = {
            'en': 'English',
            'hi': 'Hindi',
            'mr': 'Marathi',
            'kn': 'Kannada',
            'te': 'Telugu',
            'pa': 'Punjabi',
            'ta': 'Tamil',
            'bn': 'Bengali',
            'gu': 'Gujarati'
        }

        print("✅ Translation + TTS model initialized!")

    def detect_language(self, text: str) -> str:
        """Detect the language of input text"""
        try:
            detection = self.translator.detect(text)
            detected_lang = detection.lang
            confidence = detection.confidence

            print(f"🔍 Detected language: {detected_lang} (confidence: {confidence:.2f})")
            return detected_lang
        except Exception as e:
            print(f"⚠️ Language detection failed: {e}")
            return 'en'  # Default to English

    def translate_text(self, text: str, target_language: str) -> str:
        """Translate text to target language"""
        try:
            # Detect source language
            source_lang = self.detect_language(text)

            # If source and target are the same, no translation needed
            if source_lang == target_language:
                print(f"✅ No translation needed - both languages are {self.language_names.get(target_language, target_language)}")
                return text

            # Translate text
            print(f"🔄 Translating from {self.language_names.get(source_lang, source_lang)} to {self.language_names.get(target_language, target_language)}")

            translation = self.translator.translate(text, src=source_lang, dest=target_language)
            translated_text = translation.text

            print(f"📝 Original: {text}")
            print(f"🔤 Translated: {translated_text}")

            return translated_text

        except Exception as e:
            print(f"❌ Translation failed: {e}")
            print("📢 Using original text")
            return text

    def generate_speech(self, text: str, target_language: str = "en") -> tuple:
        """Generate speech with translation"""
        try:
            # Step 1: Translate text to target language
            translated_text = self.translate_text(text, target_language)

            # Step 2: Generate speech in target language
            print(f"🎵 Generating {self.language_names.get(target_language, target_language)} speech...")

            tts = gTTS(text=translated_text, lang=target_language, slow=False)

            # Save to temporary file
            output_filename = f"translated_tts_{int(time.time())}.mp3"
            tts.save(output_filename)

            print(f"✅ Speech generated: {output_filename}")
            return output_filename, translated_text

        except Exception as e:
            print(f"❌ Error generating speech: {e}")
            return None, text

# Initialize translation + TTS model
print("Initializing Translation + TTS model...")
translating_tts = TranslatingTTSModel()

Initializing Translation + TTS model...
✅ Translation + TTS model initialized!


Cell 6: Dataset Loading and Processing

In [20]:
def create_translation_tts_interface():
    """Create TTS interface with translation capability"""

    def translation_tts_demo(input_text, target_language):
        """Generate TTS with automatic translation"""

        if not input_text.strip():
            return None, "⚠️ Please enter some text", ""

        lang_code = translating_tts.supported_languages.get(target_language, 'en')

        try:
            start_time = time.time()

            # Generate speech with translation
            audio_file, translated_text = translating_tts.generate_speech(input_text, lang_code)

            if audio_file:
                generation_time = time.time() - start_time

                # Detect source language for display
                source_lang = translating_tts.detect_language(input_text)
                source_name = translating_tts.language_names.get(source_lang, source_lang)

                status = f"""
                ✅ **Translation + Speech Generated Successfully!**

                **Process:**
                1. 🔍 **Detected Input:** {source_name}
                2. 🔄 **Translated to:** {target_language}
                3. 🎵 **Generated Speech:** {target_language} audio

                **Details:**
                - **Generation Time:** {generation_time:.2f}s
                - **Method:** Google Translate + Google TTS
                - **Status:** Working perfectly!

                **Original Text:** {input_text}

                **Translated Text:** {translated_text}
                """

                return audio_file, status, translated_text
            else:
                return None, "❌ Failed to generate speech", ""

        except Exception as e:
            return None, f"❌ Error: {str(e)}", ""

    # Create enhanced interface
    interface = gr.Interface(
        fn=translation_tts_demo,
        inputs=[
            gr.Textbox(
                label="📝 Input Text (Any Language)",
                placeholder="Type in English, Hindi, or any language...",
                lines=3,
                info="The system will automatically detect your language and translate to the target language"
            ),
            gr.Dropdown(
                label="🎯 Target Language for Speech",
                choices=list(translating_tts.supported_languages.keys()),
                value="Hindi",
                info="Select the language you want to hear the speech in"
            )
        ],
        outputs=[
            gr.Audio(label="🔊 Generated Speech", type="filepath"),
            gr.Markdown(label="📊 Process Details"),
            gr.Textbox(label="🔤 Translated Text", lines=2, interactive=False)
        ],
        title="🌐 Multilingual Translator + TTS System",
        description="""
        **Now with REAL translation capability!** 🎉

        ✅ **Auto-detects** your input language
        ✅ **Translates** to your target language
        ✅ **Generates speech** in the target language
        ✅ **Perfect for storytelling** in multiple languages

        **Example:** Type "Tell me about Akbar" in English → Get Hindi audio story!
        """,
        examples=[
            ["Tell me about the great king Akbar and his wise minister Birbal", "Hindi"],
            ["Hello, how are you today?", "Hindi"],
            ["What is the story of Maharana Pratap?", "Hindi"],
            ["Good morning, I want to hear a story", "Marathi"],
            ["Tell me about Indian history", "Telugu"],
            ["नमस्कार", "English"],  # Hindi to English
        ],
        theme=gr.themes.Soft(),
        css="""
        .gradio-container {
            font-family: 'Segoe UI', sans-serif;
        }
        """
    )

    return interface

# Launch the translation + TTS interface
print("🚀 Launching Translation + TTS interface...")
demo = create_translation_tts_interface()
demo.launch(share=True, debug=False, show_error=True)


🚀 Launching Translation + TTS interface...
Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://6b0df4da4d26276d9f.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)




In [16]:
!pip install -q httpx==0.13.3 httpcore==0.9.1

In [18]:
!pip install -q googletrans-py

  Preparing metadata (setup.py) ... [?25l[?25hdone
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m61.0/61.0 kB[0m [31m4.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m73.5/73.5 kB[0m [31m6.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m78.8/78.8 kB[0m [31m6.5 MB/s[0m eta [36m0:00:00[0m
[?25h  Building wheel for googletrans-py (setup.py) ... [?25l[?25hdone
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
googletrans 4.0.0rc1 requires httpx==0.13.3, but you have httpx 0.28.1 which is incompatible.[0m[31m
[0m