In [None]:
!pip install transformers gradio sentencepiece

import gradio as gr
from transformers import BlipProcessor, BlipForConditionalGeneration, pipeline
from PIL import Image

# -------------------------------
# 1. Load BLIP for Image Captioning
# -------------------------------
processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-large")
model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-large")

def generate_caption(image):
    if image is None:
        return "No image provided"
    inputs = processor(images=image, return_tensors="pt")
    out = model.generate(**inputs, max_new_tokens=30)
    caption = processor.decode(out[0], skip_special_tokens=True)
    return caption

# -------------------------------
# 2. Load Whisper for Speech-to-Text
# -------------------------------
asr = pipeline("automatic-speech-recognition", model="openai/whisper-small")

def speech_to_text(audio):
    if audio is None:
        return ""
    text = asr(audio)["text"]
    return text

# -------------------------------
# 3. Hinglish Normalization (Rule-based for Prototype)
# -------------------------------
def normalize_text(text):
    text = text.lower()
    replacements = {
        "paani": "water",
        "gaddha": "pothole",
        "sadak": "road",
        "bijli": "electricity",
        "light": "streetlight",
        "kooda": "garbage",
        "dustbin": "dustbin",
        "pipe": "pipeline",
        "leak": "leakage"
    }
    for k,v in replacements.items():
        text = text.replace(k, v)
    return text

# -------------------------------
# 4. Problem Identification
# -------------------------------
def identify_problem(caption, text):
    combined = caption.lower() + " " + text.lower()
    if "pothole" in combined or "road" in combined:
        return "Pothole / Road Damage"
    elif "streetlight" in combined or "light" in combined or "electricity" in combined:
        return "Streetlight / Electrical Issue"
    elif "water" in combined or "leak" in combined or "pipeline" in combined:
        return "Water Leakage / Pipeline Issue"
    elif "garbage" in combined or "dustbin" in combined:
        return "Garbage / Sanitation Issue"
    else:
        return "Other Civic Issue"

# -------------------------------
# 5. Department Mapping
# -------------------------------
def map_department(problem):
    mapping = {
        "Pothole / Road Damage": "Roads Department",
        "Streetlight / Electrical Issue": "Electrical Department",
        "Water Leakage / Pipeline Issue": "Water Department",
        "Garbage / Sanitation Issue": "Sanitation Department",
        "Other Civic Issue": "General Complaints Department"
    }
    return mapping.get(problem, "General Complaints Department")

# -------------------------------
# 6. Main Pipeline
# -------------------------------
def civic_analyzer(image, text, audio, location):
    # Caption from image (always used if image exists)
    caption = generate_caption(image)

    # Get text input (from either text or audio)
    norm_text = ""
    if text:
        norm_text = normalize_text(text)
    elif audio:
        voice_text = speech_to_text(audio)
        norm_text = normalize_text(voice_text)

    # Problem Identification
    problem = identify_problem(caption, norm_text)
    department = map_department(problem)

    return {
        "Image Caption (BLIP)": caption,
        "Complaint Text": norm_text if norm_text else "(no text/voice provided)",
        "Problem Identified": problem,
        "Department": department,
        "Location": location if location else "(no GPS provided)"
    }

# -------------------------------
# 7. Gradio UI with Auto GPS
# -------------------------------
with gr.Blocks() as demo:
    gr.Markdown("## 🏙️ AI-Powered Civic Collaboration (Prototype)")

    with gr.Row():
        image_input = gr.Image(type="pil", label="Upload Civic Issue Image")
        text_input = gr.Textbox(lines=2, placeholder="Enter complaint in Hinglish (optional)...")
        audio_input = gr.Audio(type="filepath", label="Or record/upload voice complaint (optional)")
        location_output = gr.Textbox(label="Auto-captured Location")

    output_json = gr.JSON(label="System Output")

    submit_btn = gr.Button("Submit")

    submit_btn.click(
        civic_analyzer,
        inputs=[image_input, text_input, audio_input, location_output],
        outputs=[output_json]
    )

    # Auto-capture location from browser
    demo.load(
        fn=None,
        inputs=None,
        outputs=location_output,
        js="""
            async () => {
                if (navigator.geolocation) {
                    return new Promise((resolve) => {
                        navigator.geolocation.getCurrentPosition((pos) => {
                            resolve(pos.coords.latitude + "," + pos.coords.longitude);
                        });
                    });
                } else {
                    return "Location not available";
                }
            }
        """
    )

demo.launch(debug=True)




Device set to use cpu


It looks like you are running Gradio on a hosted Jupyter notebook, which requires `share=True`. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. This cell will run indefinitely so that you can see errors and logs. To turn off, set debug=False in launch().
* Running on public URL: https://bf9a727f26ed9a32e0.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)
