# Real-Time English-Spanish Speech Translator
This notebook trains and runs a speech translation model that:
- Recognizes voice input (English/Spanish)
- Translates text using a pretrained model
- Reads out the translated text using TTS


In [None]:
!pip install transformers sentencepiece speechrecognition pyttsx3 pyaudio langdetect gtts

In [None]:
from transformers import MarianMTModel, MarianTokenizer
import speech_recognition as sr
import pyttsx3
from gtts import gTTS
import os
from langdetect import detect

In [None]:
# Load pre-trained translation models
en_es_model_name = "Helsinki-NLP/opus-mt-en-es"
es_en_model_name = "Helsinki-NLP/opus-mt-es-en"

en_es_tokenizer = MarianTokenizer.from_pretrained(en_es_model_name)
en_es_model = MarianMTModel.from_pretrained(en_es_model_name)

es_en_tokenizer = MarianTokenizer.from_pretrained(es_en_model_name)
es_en_model = MarianMTModel.from_pretrained(es_en_model_name)

In [None]:
def translate_text(text, source_lang):
    """Translates text from English to Spanish or vice versa."""
    if source_lang == "en":
        tokenizer, model = en_es_tokenizer, en_es_model
    else:
        tokenizer, model = es_en_tokenizer, es_en_model

    inputs = tokenizer(text, return_tensors="pt")
    translated = model.generate(**inputs)
    return tokenizer.decode(translated[0], skip_special_tokens=True)

In [None]:
def speak_text(text, lang):
    """Converts text to speech using gTTS."""
    tts = gTTS(text=text, lang=lang)
    tts.save("output.mp3")
    os.system("mpg321 output.mp3")

In [None]:
def speech_to_text():
    """Captures speech from microphone and converts to text."""
    recognizer = sr.Recognizer()
    with sr.Microphone() as source:
        print("Listening...")
        audio = recognizer.listen(source)
    
    try:
        return recognizer.recognize_google(audio)
    except sr.UnknownValueError:
        print("Could not understand audio.")
        return None
    except sr.RequestError as e:
        print(f"Speech recognition error: {e}")
        return None

In [None]:
def main():
    while True:
        text = speech_to_text()
        if text:
            source_lang = detect(text)
            target_lang = "es" if source_lang == "en" else "en"
            translated_text = translate_text(text, source_lang)
            print(f"{source_lang} → {target_lang}: {translated_text}")
            speak_text(translated_text, target_lang)
        else:
            print("No valid input detected.")

        if input("Press Enter to continue or type 'exit' to stop: ") == 'exit':
            break

In [None]:
if __name__ == "__main__":
    main()