In [1]:
# ==============================================================================
# PART 1: INSTALL REQUIRED LIBRARIES
# ==============================================================================
print("⏳ Installing required libraries...")
!pip install gradio groq huggingface_hub -q
print("✅ Libraries installed.")

# ==============================================================================
# PART 2: IMPORTS, API KEY SETUP, AND AUTHENTICATION
# ==============================================================================
import gradio as gr
import os
from groq import AsyncGroq, Groq
from huggingface_hub import InferenceClient, HfFolder
from google.colab import userdata

print("🔑 Setting up API keys and authentication...")

# Securely get the Groq API key
try:
    GROQ_API_KEY = userdata.get('GROQ_API_KEY')
    os.environ["GROQ_API_KEY"] = GROQ_API_KEY
    print("✅ Groq API Key loaded.")
except Exception as e:
    print(f"⚠️ Could not load Groq API Key. Please add it to Colab Secrets. Error: {e}")

# Securely get the Hugging Face token (only for image generation)
try:
    HF_TOKEN = userdata.get('HUGGING_FACE_API_KEY')
    HfFolder.save_token(HF_TOKEN)
    print("✅ Hugging Face Token loaded and authenticated.")
except Exception as e:
    print(f"⚠️ Could not load Hugging Face Token. Please add it to Colab Secrets. Error: {e}")


# ==============================================================================
# PART 3: INITIALIZE API CLIENTS
# ==============================================================================
print("☁️ Initializing API clients...")

# 1. Groq Clients (Async for chatbot, Sync for audio transcription)
try:
    groq_async_client = AsyncGroq()
    groq_sync_client = Groq()
    print("✅ Groq clients initialized (async and sync).")
except Exception as e:
    print(f"🔥 Error initializing Groq clients: {e}")

# 2. Hugging Face Inference Client (only for Image Generation)
try:
    inference_client = InferenceClient(token=HF_TOKEN)
    print("✅ Hugging Face Inference client initialized.")
except Exception as e:
    print(f"🔥 Error initializing Hugging Face client: {e}")

# Define the image model
IMAGE_MODEL_ID = "stabilityai/stable-diffusion-xl-base-1.0"


# ==============================================================================
# PART 4: DEFINE CORE FUNCTIONS
# ==============================================================================

# --- Function for Tab 1: Audio Processing (Using Groq for transcription & translation) ---
async def process_audio_and_generate(audio_path, source_language, generate_image_checkbox):
    if not audio_path:
        gr.Warning("No audio file provided. Please upload an audio file.")
        return "", "", None

    transcription, english_text, image_output = "", "", None
    detected_language = ""

    # 1. Transcribe audio to text (Using Groq Whisper)
    try:
        print("🎤 Calling Groq API for transcription...")

        with open(audio_path, "rb") as audio_file:
            # Auto-detect language if not specified
            if source_language == "Auto-detect":
                transcription_result = groq_sync_client.audio.transcriptions.create(
                    file=(audio_path, audio_file.read()),
                    model="whisper-large-v3",
                    response_format="verbose_json"
                )
                detected_language = transcription_result.language
                print(f"🔍 Detected language: {detected_language}")
            else:
                # Use specified language
                lang_code = "ta" if source_language == "Tamil" else "en"
                transcription_result = groq_sync_client.audio.transcriptions.create(
                    file=(audio_path, audio_file.read()),
                    model="whisper-large-v3",
                    language=lang_code,
                    response_format="json"
                )
                detected_language = source_language

        transcription = transcription_result.text
        print(f"✅ Transcription: {transcription}")

    except Exception as e:
        gr.Error(f"Groq transcription failed! Error: {e}")
        return "Transcription Failed", "", None

    # 2. Translate text to English (Using Groq) - Only if not already in English
    try:
        # Check if the audio is already in English
        if detected_language.lower() in ["english", "en"]:
            english_text = transcription
            print("✅ Audio is already in English, skipping translation.")
        else:
            print("🌐 Calling Groq API for translation...")
            translation_prompt = f"Translate the following text to English. Provide only the English translation and nothing else. Text: '{transcription}'"

            chat_completion = await groq_async_client.chat.completions.create(
                messages=[
                    {"role": "system", "content": "You are a helpful translation assistant."},
                    {"role": "user", "content": translation_prompt}
                ],
                model="moonshotai/kimi-k2-instruct",
            )
            english_text = chat_completion.choices[0].message.content.strip()
            print(f"✅ Translation: {english_text}")

    except Exception as e:
        gr.Error(f"Groq translation failed! Error: {e}")
        return transcription, "Translation Failed", None

    # 3. Generate image (Using Hugging Face) if checkbox is ticked
    if generate_image_checkbox:
        try:
            print("🎨 Calling HF API for image generation...")
            image_output = inference_client.text_to_image(english_text, model=IMAGE_MODEL_ID)
            print("✅ Image generated successfully!")
        except Exception as e:
            gr.Warning(f"Image generation failed! The model might be loading. Error: {e}")

    return transcription, english_text, image_output

# --- Function for Tab 2: Prompt to Image (Using Hugging Face) ---
def generate_image_from_prompt(prompt):
    if not prompt or not prompt.strip():
        gr.Warning("Prompt is empty. Please enter some text.")
        return None
    try:
        print(f"🎨 Calling HF API for image generation from prompt: '{prompt}'")
        image = inference_client.text_to_image(prompt, model=IMAGE_MODEL_ID)
        return image
    except Exception as e:
        gr.Error(f"Image generation failed! The model might be loading. Please try again. Error: {e}")
        return None

# --- Function for Tab 3: Groq Chatbot ---
async def chatbot_response(message, history):
    print("💬 Generating chatbot response with Groq...")
    history_groq_format = [{"role": "system", "content": "You are a helpful assistant."}]
    for human, assistant in history:
        history_groq_format.append({"role": "user", "content": human})
        history_groq_format.append({"role": "assistant", "content": assistant})
    history_groq_format.append({"role": "user", "content": message})

    try:
        chat_completion = await groq_async_client.chat.completions.create(
            messages=history_groq_format,
            model="moonshotai/kimi-k2-instruct"
        )
        return chat_completion.choices[0].message.content
    except Exception as e:
        gr.Error(f"Chatbot failed! Error: {e}")
        return f"Sorry, I encountered an error: {e}"


# ==============================================================================
# PART 5: BUILD THE GRADIO USER INTERFACE
# ==============================================================================
print("🚀 Building the Gradio interface...")

with gr.Blocks(theme=gr.themes.Soft()) as demo:
    gr.Markdown("# 🤖 All-in-One AI Suite (Groq Audio + HF Images)")
    gr.Markdown("**Tab 1**: Audio transcription & translation powered by Groq | **Tab 2**: Image generation by Hugging Face | **Tab 3**: Chatbot by Groq")

    with gr.Tabs():
        # --- Tab 1: Audio Processing (Groq) ---
        with gr.TabItem("🎙️ Audio to Text & Image"):
            with gr.Row():
                with gr.Column(scale=1):
                    audio_input = gr.Audio(type="filepath", label="Upload Audio File")
                    language_dropdown = gr.Dropdown(
                        choices=["Auto-detect", "Tamil", "English"],
                        value="Auto-detect",
                        label="Source Language"
                    )
                    audio_image_checkbox = gr.Checkbox(label="Generate Image from Translation?", value=True)
                    audio_button = gr.Button("Process Audio", variant="primary")
                with gr.Column(scale=2):
                    transcription_output = gr.Textbox(label="Transcription (Original Language)", interactive=False, lines=3)
                    translation_output = gr.Textbox(label="English Text", interactive=False, lines=3)
                    audio_image_output = gr.Image(label="Generated Image")

        # --- Tab 2: Prompt to Image (Hugging Face) ---
        with gr.TabItem("🖼️ Prompt to Image"):
            with gr.Row():
                with gr.Column(scale=2):
                    image_prompt_input = gr.Textbox(label="Enter your prompt", lines=4, placeholder="e.g., A majestic lion in the savanna at sunset...")
                    image_button = gr.Button("Generate Image", variant="primary")
                with gr.Column(scale=1):
                    image_output = gr.Image(label="Generated Image")

        # --- Tab 3: Chatbot (Groq) ---
        with gr.TabItem("💬 Groq Chatbot"):
            gr.ChatInterface(
                chatbot_response,
                title="AI Chatbot",
                description="Ask me anything! Powered by Groq and Llama 3.",
                examples=[["Hello!"], ["What is the capital of India?"], ["Explain quantum computing in simple terms"]],
            )

    # --- Define button click actions ---
    audio_button.click(
        fn=process_audio_and_generate,
        inputs=[audio_input, language_dropdown, audio_image_checkbox],
        outputs=[transcription_output, translation_output, audio_image_output],
        #type="messages"  # ✅ Add this line
    )

    image_button.click(
        fn=generate_image_from_prompt,
        inputs=image_prompt_input,
        outputs=image_output
    )

# ==============================================================================
# PART 6: LAUNCH THE APPLICATION
# ==============================================================================
print("🎉 Launching the application! Click the public URL to open.")
demo.launch(share=True, debug=True)

⏳ Installing required libraries...
✅ Libraries installed.
🔑 Setting up API keys and authentication...
✅ Groq API Key loaded.
✅ Hugging Face Token loaded and authenticated.
☁️ Initializing API clients...
✅ Groq clients initialized (async and sync).
✅ Hugging Face Inference client initialized.
🚀 Building the Gradio interface...


  self.chatbot = Chatbot(


🎉 Launching the application! Click the public URL to open.
Colab notebook detected. This cell will run indefinitely so that you can see errors and logs. To turn off, set debug=False in launch().
* Running on public URL: https://06d61591268078069d.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


💬 Generating chatbot response with Groq...
💬 Generating chatbot response with Groq...
Keyboard interruption in main thread... closing server.
Killing tunnel 127.0.0.1:7860 <> https://06d61591268078069d.gradio.live


