In [None]:
!pip install pytesseract opencv-python requests gradio moviepy pdf2image numpy SpeechRecognition
!apt-get install tesseract-ocr
# Install poppler-utils in Colab
!apt-get install -y poppler-utils
# Install pdf2image and other required libraries
!pip install pdf2image
!pip install pdfplumber
!pdftotext -v



import pytesseract
import cv2
import requests
import json
import gradio as gr
import moviepy.editor as mp
import os
from pdf2image import convert_from_path
import numpy as np
import speech_recognition as sr
import logging

# Setup logging for debugging
logging.basicConfig(level=logging.INFO)

# OpenRouter API key
OPENROUTER_API_KEY = "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"
YOUR_SITE_URL = "https://your-site-url.com"  # Optional, replace with your site URL
YOUR_APP_NAME = "My Chatbot App"  # Optional, replace with your app name

# Utility to safely handle errors and add debugging
def debug_logging(func):
    def wrapper(*args, **kwargs):
        logging.info(f"Running {func.__name__} with args {args}, kwargs {kwargs}")
        try:
            return func(*args, **kwargs)
        except Exception as e:
            logging.error(f"Error in {func.__name__}: {str(e)}")
            return f"An error occurred: {str(e)}"
    return wrapper

# API interaction function
@debug_logging
def query_qwen_api(prompt):
    headers = {
        "Authorization": f"Bearer {OPENROUTER_API_KEY}",
        "Content-Type": "application/json",
        "HTTP-Referer": YOUR_SITE_URL,
        "X-Title": YOUR_APP_NAME,
    }
    payload = {
        "model": "qwen/qwen-2-vl-72b-instruct",
        "messages": [{"role": "user", "content": prompt}],
        "max_tokens": 500,
        "temperature": 0.7,
        "top_p": 1,
    }

    response = requests.post(
        url="https://openrouter.ai/api/v1/chat/completions",
        headers=headers,
        data=json.dumps(payload),
    )
    if response.status_code == 200:
        result = response.json()
        if "choices" in result and len(result["choices"]) > 0:
            return result["choices"][0]["message"]["content"]
        else:
            return "Error: No valid response from the API."
    else:
        return f"Error: {response.status_code} - {response.text}"

# Summarize extracted text from image
@debug_logging
def image_to_text(image_file):
    img = cv2.imread(image_file)
    text = pytesseract.image_to_string(img)  # Extract text from the image
    if text:
        return query_qwen_api(f"Summarize the following text: {text}")
    else:
        return "No text found in the image."

# Convert PDF to text
@debug_logging
def pdf_to_text(pdf_file):
    pages = convert_from_path(pdf_file, 300)  # 300 DPI for better quality
    text = ""
    for page in pages:
        img = cv2.cvtColor(np.array(page), cv2.COLOR_RGB2BGR)
        page_text = pytesseract.image_to_string(img)
        text += page_text
    if text:
        return query_qwen_api(f"Summarize the following text: {text}")
    else:
        return "No text found in the PDF."

# Convert audio to text
@debug_logging
def convert_audio_to_text(audio_file):
    recognizer = sr.Recognizer()
    with sr.AudioFile(audio_file) as source:
        audio = recognizer.record(source)  # Record the audio from file
        return recognizer.recognize_google(audio)

# Extract audio from video and transcribe to text
@debug_logging
def video_to_audio(video_file):
    video = mp.VideoFileClip(video_file.name)
    audio_file = "extracted_audio.wav"
    video.audio.write_audiofile(audio_file)  # Extract audio from video
    return convert_audio_to_text(audio_file)

# Handle all types of media uploads
@debug_logging
def handle_media_upload(uploaded_file, chat_history):
    file_name = uploaded_file.name.lower()
    if file_name.endswith('.pdf'):
        summary = pdf_to_text(uploaded_file)
    elif file_name.endswith(('.mp4', '.mov', '.avi', '.mkv', '.flv', '.webm', '.wmv', '.mpg', '.mpeg')):
        summary = video_to_audio(uploaded_file)
    else:
        summary = image_to_text(uploaded_file)

    # Use OpenRouter API to summarize the text
    return chat_with_ai(summary, chat_history)

# Chat function
@debug_logging
def chat_with_ai(user_input, history=[]):
    # Append user input to the history
    history.append({"role": "user", "content": user_input})

    # Get AI response
    ai_response = query_qwen_api(user_input)

    # Append AI response to the history
    history.append({"role": "assistant", "content": ai_response})

    return history, history, ""  # Return updated chat history and clear input box

# Custom CSS for professional appearance
custom_css = """
    .gradio-container {
        background-color: #f4f7fa;
        color: #333;
        font-family: Arial, sans-serif;
    }
    .gradio-chatbot {
        background-color: #ffffff;
        border: 1px solid #ddd;
        border-radius: 10px;
        padding: 10px;
        margin-bottom: 20px;
    }
    .gradio-textbox input {
        background-color: #f0f4f7;
        border: 1px solid #ccc;
        border-radius: 5px;
    }
    .gradio-button {
        background-color: #007bff;
        color: white;
        border-radius: 5px;
        font-weight: bold;
    }
    .gradio-button:hover {
        background-color: #0056b3;
    }
"""

# Gradio UI
with gr.Blocks() as demo:
    gr.HTML(f"<style>{custom_css}</style>")  # Inject the custom CSS

    chat_history = gr.State([])  # Persistent chat history
    chatbot = gr.Chatbot(type="messages")  # Chat interface
    input_box = gr.Textbox(label="Your Message", placeholder="Type your message here...")
    media_input = gr.File(type="filepath", label="Upload Image, PDF, or Video")  # Accepts file paths
    send_button = gr.Button("Send")

    # Link inputs and outputs
    send_button.click(chat_with_ai,
                      inputs=[input_box, chat_history],
                      outputs=[chatbot, chat_history, input_box])  # Clear input_box after sending
    media_input.change(handle_media_upload,
                       inputs=[media_input, chat_history],
                       outputs=[chatbot, chat_history])

# Launch the Gradio app
demo.launch()


Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
tesseract-ocr is already the newest version (4.1.1-2.1build1).
0 upgraded, 0 newly installed, 0 to remove and 49 not upgraded.
Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
poppler-utils is already the newest version (22.02.0-2ubuntu0.5).
0 upgraded, 0 newly installed, 0 to remove and 49 not upgraded.
pdftotext version 22.02.0
Copyright 2005-2022 The Poppler Developers - http://poppler.freedesktop.org
Copyright 1996-2011 Glyph & Cog, LLC
Running Gradio in a Colab notebook requires sharing enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://d63fd9e59a4d53dd28.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `

