In [7]:
# Core dependencies
!pip install streamlit -q

# Speech recognition + text-to-speech
!pip install SpeechRecognition -q
!pip install gTTS -q

# Audio playback support
!apt-get install -y portaudio19-dev -q
!pip install pyaudio soundfile -q

# Extra: to run streamlit in colab
!pip install pyngrok -q
!pip install -q streamlit google-generativeai PyPDF2 python-docx pillow -q

Reading package lists...
Building dependency tree...
Reading state information...
portaudio19-dev is already the newest version (19.6.0-1.1).
0 upgraded, 0 newly installed, 0 to remove and 35 not upgraded.
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m232.6/232.6 kB[0m [31m4.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m253.0/253.0 kB[0m [31m12.8 MB/s[0m eta [36m0:00:00[0m
[?25h

In [16]:
!apt-get update && apt-get install -y flac portaudio19-dev

# Install pyaudio (might need system dependencies)
!pip install pyaudio

0% [Working]            Get:1 http://security.ubuntu.com/ubuntu jammy-security InRelease [129 kB]
0% [Connecting to archive.ubuntu.com (185.125.190.82)] [1 InRelease 14.2 kB/129                                                                               Get:2 https://cli.github.com/packages stable InRelease [3,917 B]
0% [Connecting to archive.ubuntu.com (185.125.190.82)] [1 InRelease 22.9 kB/1290% [Connecting to archive.ubuntu.com (185.125.190.82)] [1 InRelease 41.7 kB/129                                                                               Get:3 https://cloud.r-project.org/bin/linux/ubuntu jammy-cran40/ InRelease [3,632 B]
0% [Waiting for headers] [1 InRelease 66.3 kB/129 kB 51%] [3 InRelease 3,632 B/0% [Waiting for headers] [1 InRelease 69.2 kB/129 kB 54%] [Waiting for headers]                                                                               Get:4 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64  InRelease [1,581 B]
0% 

In [18]:
%%writefile app.py
import streamlit as st
import os
import tempfile
import traceback
import io
import base64
from typing import List
import google.generativeai as genai
from PIL import Image
import PyPDF2
import docx
import requests
import json

# Audio recording imports for Colab
try:
    from google.colab import output
    from IPython.display import Javascript, HTML, display, Audio
    import numpy as np
    import wave
    COLAB_ENV = True
except ImportError:
    COLAB_ENV = False

# Page configuration
st.set_page_config(
    page_title="Medical Chatbot - Gemini 1.5 Flash",
    page_icon="🤖",
    layout="wide",
    initial_sidebar_state="expanded"
)

# Custom CSS for better UI
st.markdown("""
<style>
    .stApp > header {
        background-color: transparent;
    }
    .main-header {
        font-size: 2.5rem;
        font-weight: bold;
        text-align: center;
        margin-bottom: 2rem;
        background: linear-gradient(90deg, #667eea 0%, #764ba2 100%);
        -webkit-background-clip: text;
        -webkit-text-fill-color: transparent;
        background-clip: text;
    }
    .chat-message {
        padding: 1rem;
        border-radius: 10px;
        margin: 0.5rem 0;
    }
    .user-message {
        background-color: #e3f2fd;
        border-left: 4px solid #2196f3;
    }
    .assistant-message {
        background-color: #f3e5f5;
        border-left: 4px solid #9c27b0;
    }
    .stButton > button {
        width: 100%;
        border-radius: 10px;
        border: 1px solid #ddd;
        padding: 0.5rem;
        font-weight: 500;
    }
    .audio-recorder {
        background-color: #f8f9fa;
        padding: 1rem;
        border-radius: 8px;
        border: 1px solid #dee2e6;
        margin: 1rem 0;
    }
</style>
""", unsafe_allow_html=True)

# Initialize session state
if "messages" not in st.session_state:
    st.session_state.messages = []
if "gemini_api_key" not in st.session_state:
    st.session_state.gemini_api_key = ""
if "model" not in st.session_state:
    st.session_state.model = None

def configure_gemini(api_key: str):
    """Configure Gemini AI with API key"""
    try:
        genai.configure(api_key=api_key)
        model = genai.GenerativeModel('gemini-1.5-flash')
        return model
    except Exception as e:
        st.error(f"Error configuring Gemini: {str(e)}")
        return None

def extract_text_from_pdf(file_path: str) -> str:
    """Extract text from PDF file"""
    try:
        with open(file_path, 'rb') as file:
            pdf_reader = PyPDF2.PdfReader(file)
            text = ""
            for page in pdf_reader.pages:
                text += page.extract_text() + "\n"
        return text
    except Exception as e:
        st.error(f"Error reading PDF: {str(e)}")
        return ""

def extract_text_from_docx(file_path: str) -> str:
    """Extract text from DOCX file"""
    try:
        doc = docx.Document(file_path)
        text = ""
        for paragraph in doc.paragraphs:
            text += paragraph.text + "\n"
        return text
    except Exception as e:
        st.error(f"Error reading DOCX: {str(e)}")
        return ""

def process_uploaded_files(uploaded_files) -> str:
    """Process uploaded files and extract text"""
    combined_text = ""

    if not uploaded_files:
        return combined_text

    for file in uploaded_files:
        try:
            # Save uploaded file temporarily
            with tempfile.NamedTemporaryFile(delete=False, suffix=os.path.splitext(file.name)[1]) as tmp_file:
                tmp_file.write(file.getvalue())
                tmp_file_path = tmp_file.name

            # Extract text based on file type
            if file.type == "application/pdf":
                text = extract_text_from_pdf(tmp_file_path)
            elif file.type == "application/vnd.openxmlformats-officedocument.wordprocessingml.document":
                text = extract_text_from_docx(tmp_file_path)
            elif file.type.startswith("text/"):
                with open(tmp_file_path, 'r', encoding='utf-8') as f:
                    text = f.read()
            elif file.type.startswith("image/"):
                # For images, we'll handle them separately in the main function
                text = f"[Image file: {file.name}]"
            else:
                text = f"[Unsupported file type: {file.name}]"

            combined_text += f"\n--- Content from {file.name} ---\n{text}\n"

            # Clean up
            os.unlink(tmp_file_path)

        except Exception as e:
            st.error(f"Error processing {file.name}: {str(e)}")
            continue

    return combined_text

def get_gemini_response(prompt: str, file_content: str = "", images: List = None) -> str:
    """Get response from Gemini 1.5 Flash"""
    try:
        if not st.session_state.model:
            return "❌ Please configure your Gemini API key first."

        # Prepare the full prompt with context
        full_prompt = f"""You are a helpful medical chatbot assistant. Please provide accurate, informative, and helpful responses.

Important: Always remind users to consult with healthcare professionals for serious medical concerns.

User Query: {prompt}

Additional Context from uploaded files:
{file_content if file_content else 'No additional files provided.'}

Please provide a comprehensive and helpful response:"""

        # If images are provided, include them in the request
        if images:
            content = [full_prompt] + images
        else:
            content = full_prompt

        response = st.session_state.model.generate_content(content)
        return response.text

    except Exception as e:
        return f"❌ Error generating response: {str(e)}"

# JavaScript for audio recording in Colab
def get_audio_recorder_js():
    return """
    <div id="audio-recorder">
        <button id="recordButton" onclick="toggleRecording()">🎤 Start Recording</button>
        <button id="stopButton" onclick="stopRecording()" disabled>⏹️ Stop Recording</button>
        <div id="status">Ready to record</div>
        <audio id="audioPlayback" controls style="display:none;"></audio>
    </div>

    <script>
    let mediaRecorder;
    let audioChunks = [];
    let isRecording = false;

    async function toggleRecording() {
        if (!isRecording) {
            try {
                const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
                mediaRecorder = new MediaRecorder(stream);

                mediaRecorder.ondataavailable = event => {
                    audioChunks.push(event.data);
                };

                mediaRecorder.onstop = async () => {
                    const audioBlob = new Blob(audioChunks, { type: 'audio/wav' });
                    const audioUrl = URL.createObjectURL(audioBlob);
                    const audioElement = document.getElementById('audioPlayback');
                    audioElement.src = audioUrl;
                    audioElement.style.display = 'block';

                    // Convert to base64 and send to Python
                    const reader = new FileReader();
                    reader.onloadend = function() {
                        const base64Audio = reader.result.split(',')[1];
                        google.colab.kernel.invokeFunction('handle_audio', [base64Audio], {});
                    };
                    reader.readAsDataURL(audioBlob);
                };

                mediaRecorder.start();
                isRecording = true;
                document.getElementById('recordButton').disabled = true;
                document.getElementById('stopButton').disabled = false;
                document.getElementById('status').textContent = '🔴 Recording...';

            } catch (err) {
                console.error('Error accessing microphone:', err);
                document.getElementById('status').textContent = '❌ Error accessing microphone';
            }
        }
    }

    function stopRecording() {
        if (mediaRecorder && isRecording) {
            mediaRecorder.stop();
            isRecording = false;
            document.getElementById('recordButton').disabled = false;
            document.getElementById('stopButton').disabled = true;
            document.getElementById('status').textContent = '⏹️ Recording stopped';
            audioChunks = [];
        }
    }
    </script>
    """

# Main header
st.markdown('<h1 class="main-header">🤖 Medical Chatbot - Gemini 1.5 Flash</h1>', unsafe_allow_html=True)

# Sidebar configuration
with st.sidebar:
    st.header("⚙️ Configuration")

    # Gemini API Key input
    api_key = st.text_input(
        "🔑 Gemini API Key",
        value=st.session_state.gemini_api_key,
        type="password",
        help="Get your API key from https://makersuite.google.com/app/apikey"
    )

    if api_key and api_key != st.session_state.gemini_api_key:
        st.session_state.gemini_api_key = api_key
        st.session_state.model = configure_gemini(api_key)
        if st.session_state.model:
            st.success("✅ Gemini configured successfully!")
        else:
            st.error("❌ Failed to configure Gemini")

    st.divider()

    # File upload
    st.subheader("📎 Upload Files")
    uploaded_files = st.file_uploader(
        "Upload medical documents, images, or text files",
        accept_multiple_files=True,
        type=['pdf', 'docx', 'txt', 'png', 'jpg', 'jpeg', 'gif'],
        help="Supported: PDF, DOCX, TXT, Images"
    )

    if uploaded_files:
        st.success(f"✅ {len(uploaded_files)} file(s) uploaded")

    st.divider()

    # Voice input section (Colab-specific)
    st.subheader("🎤 Voice Input")
    if COLAB_ENV:
        st.info("Voice input available in Colab environment")
        if st.button("🎤 Enable Voice Recording"):
            st.components.v1.html(get_audio_recorder_js(), height=200)
    else:
        st.warning("Voice input optimized for Google Colab")

    st.divider()

    # Clear chat button
    if st.button("🗑️ Clear Chat History"):
        st.session_state.messages = []
        st.rerun()

# Display chat messages
for i, message in enumerate(st.session_state.messages):
    with st.chat_message(message["role"]):
        st.markdown(message["content"])

# Audio handling function for Colab
def handle_audio(base64_audio):
    """Handle audio input from JavaScript in Colab"""
    try:
        # Decode base64 audio
        audio_data = base64.b64decode(base64_audio)

        # Save as temporary file
        with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp_file:
            tmp_file.write(audio_data)
            tmp_file_path = tmp_file.name

        # Here you would typically use speech recognition
        # For now, we'll just acknowledge the audio
        st.info("🎵 Audio received! (Speech-to-text conversion would happen here)")

        # Clean up
        os.unlink(tmp_file_path)

    except Exception as e:
        st.error(f"Error processing audio: {str(e)}")

# Register the audio handler for Colab
if COLAB_ENV:
    output.register_callback('handle_audio', handle_audio)

# Main chat interface
prompt = st.chat_input("💬 Ask me anything about medical topics...")

if prompt:
    # Add user message to chat history
    st.session_state.messages.append({"role": "user", "content": prompt})

    # Display user message
    with st.chat_message("user"):
        st.markdown(prompt)

    # Process uploaded files
    file_content = ""
    images = []

    if uploaded_files:
        with st.spinner("📂 Processing uploaded files..."):
            file_content = process_uploaded_files(uploaded_files)

            # Handle images separately
            for file in uploaded_files:
                if file.type.startswith("image/"):
                    try:
                        image = Image.open(file)
                        images.append(image)
                    except Exception as e:
                        st.error(f"Error processing image {file.name}: {str(e)}")

    # Generate and display assistant response
    with st.chat_message("assistant"):
        with st.spinner("🤔 Thinking..."):
            try:
                response = get_gemini_response(prompt, file_content, images)
                st.markdown(response)

                # Add assistant response to chat history
                st.session_state.messages.append({"role": "assistant", "content": response})

            except Exception as e:
                error_msg = f"⚠ Error generating response: {str(e)}"
                st.error(error_msg)
                st.session_state.messages.append({"role": "assistant", "content": error_msg})

                # Show detailed error in expander for debugging
                with st.expander("🔍 Debug Information"):
                    st.code(traceback.format_exc())

Overwriting app.py


In [17]:
ngrok_token = "30LsGu06oX4YgWEJd6z30DNO1kB_5C5VX4h5YGt3rFAUmRAqn"  # Replace with your actual token



# 4: Run Your App (With sharing - requires ngrok token)

from pyngrok import ngrok

import time

import threading



# Set your ngrok authentication token (replace ngrok_token with your actual token)

ngrok.set_auth_token(ngrok_token)



# Function to launch the Streamlit app using a system command

def run_app():

    !streamlit run app.py --server.headless true --server.port 8501



# Terminate any active ngrok tunnels before starting a new one

ngrok.kill()



# Start the Streamlit app in a separate thread so the script can continue running

app_thread = threading.Thread(target=run_app)

app_thread.start()



# Allow time for the Streamlit app to fully start before creating the tunnel

time.sleep(10)



# Create a public URL using ngrok and display it

try:

    public_url = ngrok.connect(8501)

    print("🚀 Your app is live!")

    print(f"🌐 Share this link: {public_url}")

    print("📱 Anyone can access your app with this link!")

except:

    print("⚠️ Need ngrok token for sharing. App is running locally.")


Collecting usage statistics. To deactivate, set browser.gatherUsageStats to false.
[0m
2025-09-11 09:39:38.715 Port 8501 is already in use
🚀 Your app is live!
🌐 Share this link: NgrokTunnel: "https://f7f3e2ad7bc3.ngrok-free.app" -> "http://localhost:8501"
📱 Anyone can access your app with this link!
