In [None]:
!pip install torch
!pip install gradio
!pip install pymupdf
!pip install markdown2
!pip install requests
!pip install weasyprint
!pip install huggingface_hub
!pip install pillow
!pip install transformers

In [None]:
import torch
import gradio as gr
import fitz
import io
import hashlib
import markdown2
import requests
import os
from weasyprint import HTML
from huggingface_hub import login
from PIL import Image
from transformers import AutoProcessor, AutoModelForVisualQuestionAnswering, pipeline

# === Login and Device Setup ===
login(token="HF_TOKEN")

processor = AutoProcessor.from_pretrained("Salesforce/blip2-opt-2.7b")
blip_model = AutoModelForVisualQuestionAnswering.from_pretrained("Salesforce/blip2-opt-2.7b")

# Load FLAN-T5 model for QA
t5_model_name = "google/flan-t5-large"
t5_qa_model = pipeline("text2text-generation", model=t5_model_name)

device = "cpu" if torch.cuda.is_available() else "cpu"
blip_model = blip_model.to(device)

# Google Gemini API
API_KEY = "GEMINI_API_KEY"
GEMINI_MODEL = "gemini-2.0-flash"
GEMINI_URL = f"https://generativelanguage.googleapis.com/v1/models/{GEMINI_MODEL}:generateContent?key={API_KEY}"

def answer_with_gemini(question, context):
    headers = {"Content-Type": "application/json"}
    payload = {
        "contents": [
            {"parts": [{"text": f"Question: {question}\nContext: {context}"}]}
        ]
    }
    response = requests.post(GEMINI_URL, headers=headers, json=payload)
    if response.status_code == 200:
        return response.json()["candidates"][0]["content"]["parts"][0]["text"]
    else:
        return f"Hata: {response.status_code}\n{response.text}"

def generate_caption(image):
    inputs = processor(images=image, return_tensors="pt").to(device)
    out = blip_model.generate(**inputs)
    caption = processor.decode(out[0], skip_special_tokens=True)
    return caption

def extract_pdf_content(pdf_path):
    doc = fitz.open(pdf_path)
    pages_text, pages_images, image_captions = [], [], []
    seen_hashes = set()
    for page_num in range(len(doc)):
        page = doc.load_page(page_num)
        text = page.get_text("text")
        pages_text.append(text)
        images, captions = [], []
        for img in page.get_images(full=True):
            xref = img[0]
            base_image = doc.extract_image(xref)
            image_bytes = base_image["image"]
            image_hash = hashlib.md5(image_bytes).hexdigest()
            if image_hash not in seen_hashes:
                seen_hashes.add(image_hash)
                img_pil = Image.open(io.BytesIO(image_bytes)).convert("RGB")
                images.append(img_pil)
                captions.append(generate_caption(img_pil))
        pages_images.append(images)
        image_captions.append(captions)
    return pages_text, pages_images, image_captions

def generate_workflow(pages_text, image_captions):
    workflow = "## Workflow\n"
    for i, text in enumerate(pages_text):
        workflow += f"### Step {i+1}: Text Content\n{text.strip()}\n"
        if i < len(image_captions) and image_captions[i]:
            workflow += "#### Image Descriptions:\n"
            for j, caption in enumerate(image_captions[i], 1):
                workflow += f"- Image {j}: {caption}\n"
        workflow += "\n"
    return workflow

def generate_ai_workflow(pages_text, image_captions):
    prompt = f"""
I have a PDF document. Each page has the following text and image descriptions.

Texts:
{pages_text}

Image Captions:
{image_captions}

Generate a clean, professional, and well-structured Markdown workflow.
"""
    return answer_with_gemini("Create workflow", prompt)

def summarize_text(pages_text):
    context = "\n".join(pages_text)
    return answer_with_gemini("Summarize this document", context)

def generate_section_titles(pages_text):
    return answer_with_gemini("Generate section titles for each page", "\n".join(pages_text))

def enrich_image_captions(text, caption):
    prompt = f"Context: {text}\nImage Description: {caption}\nImprove this image description using context."
    return answer_with_gemini("Refine image caption", prompt)

def convert_to_pdf(markdown_content, image_paths, output_file):
    html_content = markdown2.markdown(markdown_content)
    styled_html = f"""
    <html><head><style>
    body {{ font-family: Arial; margin: 20px; line-height: 1.6; }}
    img {{ max-width: 100%; height: auto; margin: 10px 0; }}
    </style></head><body>{html_content}</body></html>
    """
    image_section = "<h3>Images:</h3><ul>"
    for i, image_path in enumerate(image_paths):
        image_tag = f'<li><img src="{image_path}" alt="Image {i+1}"></li>'
        image_section += image_tag
    image_section += "</ul>"
    styled_html = styled_html.replace("</body>", f"{image_section}</body>")
    HTML(string=styled_html).write_pdf(output_file)

with gr.Blocks() as demo:
    gr.Markdown("## PDF AI Workflow Analyzer (Gemini Destekli)")
    with gr.Row():
        pdf_input = gr.File(label="PDF Yükle", file_types=[".pdf"])
        use_ai_workflow = gr.Checkbox(label="Gemini ile gelişmiş workflow üret")
        process_btn = gr.Button("PDF'yi İşle")

    text_output = gr.Markdown(label="PDF Metni")
    image_output = gr.Gallery(label="Görseller")
    caption_output = gr.Textbox(label="Açıklamalar", lines=10)
    workflow_output = gr.Markdown(label="Workflow")
    ai_summary_output = gr.Textbox(label="AI Özeti", lines=5)
    section_titles_output = gr.Textbox(label="Sayfa Başlıkları", lines=5)
    enriched_caption_output = gr.Textbox(label="Zenginleştirilmiş Captionlar", lines=10)
    download_pdf = gr.File(label="Workflow PDF")

    question_input = gr.Textbox(label="PDF hakkında soru sor")
    ask_btn = gr.Button("Soru Gönder")
    answer_output = gr.Textbox(label="Cevap")

    def display_pdf(pdf_path, use_ai):
        pages_text, pages_images, image_captions = extract_pdf_content(pdf_path)
        workflow = generate_ai_workflow(pages_text, image_captions) if use_ai else generate_workflow(pages_text, image_captions)
        summary = summarize_text(pages_text)
        titles = generate_section_titles(pages_text)

        enriched = []
        for text, captions in zip(pages_text, image_captions):
            for caption in captions:
                enriched.append(enrich_image_captions(text, caption))

        image_paths = []
        for page_idx, images in enumerate(pages_images):
            for img_idx, img in enumerate(images):
                img_path = f"temp_image_{page_idx}_{img_idx}.png"
                img.save(img_path)
                image_paths.append(img_path)

        output_file = "workflow.pdf"
        convert_to_pdf(workflow, image_paths, output_file)
        for path in image_paths:
            if os.path.exists(path):
                os.remove(path)

        return (
            "\n\n".join(pages_text),
            [img for page in pages_images for img in page],
            "\n".join([cap for caps in image_captions for cap in caps]),
            workflow,
            summary,
            titles,
            "\n".join(enriched),
            output_file
        )

    def handle_question(question, pdf_path):
        pages_text, _, _ = extract_pdf_content(pdf_path)
        context = " ".join(pages_text)
        return answer_with_gemini(question, context)

    process_btn.click(fn=display_pdf, inputs=[pdf_input, use_ai_workflow], outputs=[
        text_output, image_output, caption_output, workflow_output,
        ai_summary_output, section_titles_output, enriched_caption_output,
        download_pdf
    ])

    ask_btn.click(fn=handle_question, inputs=[question_input, pdf_input], outputs=answer_output)

demo.launch()