In [1]:
pip install openai python-dotenv gradio

Note: you may need to restart the kernel to use updated packages.


In [2]:
def summarize_documents(client, prompt):
    """
    Summarizes and compares two documents using OpenAI's GPT-4o-mini model.
    
    Args:
        client: OpenAI client instance
        prompt: The prompt containing documents to summarize/compare
    
    Returns:
        str: Summary from GPT or error message
    """
    try:
        response = client.chat.completions.create(
            model="gpt-4o-mini",
            messages=[
                {"role": "system", "content": "You summarize and compare two documents."},
                {"role": "user", "content": prompt}
            ],
            temperature=0.3,
        )
        summary = response.choices[0].message.content
        return summary
    except Exception as e:
        return f"❌ Error from GPT: {e}"

# Example usage:
# client = OpenAI(api_key="your-api-key")
# result = summarize_documents(client, "Your document comparison prompt here")
# print(result)

In [3]:
! pip install pdfplumber openai python-docx



In [4]:
# Alternative version using pdfplumber instead of PyMuPDF
# Install with: pip install pdfplumber openai python-docx

import pdfplumber  # Alternative to PyMuPDF
from openai import OpenAI
from docx import Document
import os
from dotenv import load_dotenv
# Initialize the OpenAI client
load_dotenv()  # Load environment variables from .env file
client = OpenAI(api_key=os.environ["OPENAI_API_KEY"])

def extract_text(file_path):
    """
    Extract text from PDF, DOCX, or TXT files using pdfplumber for PDFs.
    
    Args:
        file_path (str): Path to the file
        
    Returns:
        str: Extracted text content
    """
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"File not found: {file_path}")
    
    ext = file_path.split('.')[-1].lower()
    
    if ext == "pdf":
        text = ""
        with pdfplumber.open(file_path) as pdf:
            for page in pdf.pages:
                page_text = page.extract_text()
                if page_text:
                    text += page_text + "\n"
                    
    elif ext == "docx":
        try:
            doc = Document(file_path)
            text = "\n".join(para.text for para in doc.paragraphs)
        except Exception as e:
            raise ValueError(f"Error reading DOCX file: {e}")
            
    elif ext == "txt":
        try:
            encodings = ['utf-8', 'latin-1', 'cp1252']
            text = None
            for encoding in encodings:
                try:
                    with open(file_path, "r", encoding=encoding) as f:
                        text = f.read()
                    break
                except UnicodeDecodeError:
                    continue
            if text is None:
                raise ValueError("Could not decode text file with any supported encoding")
        except Exception as e:
            raise ValueError(f"Error reading TXT file: {e}")
            
    else:
        raise ValueError(f"Unsupported file type: {ext}. Please upload a PDF, DOCX, or TXT file.")
    
    if not text.strip():
        raise ValueError(f"No text content found in {file_path}")
    
    return text

def compare_docs(file1_path, file2_path, max_chars=15000):
    try:
        # ...existing text extraction code...

        comparison_prompt = f"""Compare these two documents and provide analysis in the following format:
        
        1. Brief summary of each document (2-3 sentences each)
        2. Key similarities (bullet points)
        3. Main differences (bullet points)
        4. Overall assessment (2-3 sentences)

        Document A ({file1_name}):
        {text1}

        Document B ({file2_name}):
        {text2}"""

        response = client.chat.completions.create(
            model="gpt-4o-mini",
            messages=[
                {
                    "role": "system", 
                    "content": "You are an expert document analyst. Provide your analysis in HTML format with appropriate styling."
                },
                {
                    "role": "user", 
                    "content": comparison_prompt
                }
            ],
            temperature=0.3,
            max_tokens=1500
        )

        # Format the response in HTML
        result = response.choices[0].message.content
        formatted_html = f"""
        <div style="padding: 20px; color: #e8eaed;">
            <div style="margin-bottom: 20px;">
                <h3 style="color: #64ffda;">Document Comparison Analysis</h3>
                <div style="height: 2px; background: linear-gradient(90deg, #64ffda, transparent); margin: 10px 0;"></div>
            </div>
            {result}
        </div>
        """
        
        return formatted_html

    except Exception as e:
        return f"""<div style="color: #ff4444; padding: 20px;">
            ❌ Error during comparison: {str(e)}
        </div>"""
# Test the installation
print("All modules imported successfully!")
print("You can now use compare_docs(file1_path, file2_path)")

# Installation command for this alternative:
# !pip install pdfplumber openai python-docx

All modules imported successfully!
You can now use compare_docs(file1_path, file2_path)


In [5]:
from openai import OpenAI
from dotenv import load_dotenv
import os

# Load environment variables from .env file
load_dotenv()

# Initialize the OpenAI client using the environment variable
client = OpenAI(api_key=os.getenv('OPENAI_API_KEY'))

# Ask GPT to summarize the diff
response = client.chat.completions.create(
    model="gpt-4o-mini",  # or "gpt-4o"
    messages=[
        {"role": "system", "content": "You are an assistant that explains differences between two text documents in simple terms."},
        {"role": "user", "content": f"Please summarize the following differences between two documents:\n\n{diff_text}"}
    ],
    temperature=0.7
)

# Print summary
summary = response.choices[0].message.content
print(summary)

NameError: name 'diff_text' is not defined

In [None]:
import gradio as gr

# Custom CSS for modern dark theme with accent colors
custom_css = """
/* Main container styling */
.gradio-container {
    background: linear-gradient(135deg, #1a1a2e 0%, #16213e 100%) !important;
    font-family: 'Inter', -apple-system, BlinkMacSystemFont, sans-serif !important;
}

/* Header styling */
.gr-interface h1 {
    color: #64ffda !important;
    text-align: center !important;
    font-weight: 700 !important;
    font-size: 2.5rem !important;
    margin-bottom: 0.5rem !important;
    text-shadow: 0 0 20px rgba(100, 255, 218, 0.3) !important;
}

/* Description styling */
.gr-interface p {
    color: #b8bcc8 !important;
    text-align: center !important;
    font-size: 1.1rem !important;
    margin-bottom: 2rem !important;
}

/* Input section styling */
.input-container {
    background: rgba(255, 255, 255, 0.05) !important;
    border-radius: 16px !important;
    padding: 2rem !important;
    margin: 1rem 0 !important;
    border: 1px solid rgba(100, 255, 218, 0.2) !important;
    backdrop-filter: blur(10px) !important;
}

/* File upload styling */
.gr-file {
    background: rgba(255, 255, 255, 0.08) !important;
    border: 2px dashed #64ffda !important;
    border-radius: 12px !important;
    transition: all 0.3s ease !important;
}

.gr-file:hover {
    border-color: #4fd1c7 !important;
    background: rgba(100, 255, 218, 0.1) !important;
    transform: translateY(-2px) !important;
}

/* Button styling */
.gr-button {
    background: linear-gradient(45deg, #64ffda, #4fd1c7) !important;
    color: #1a1a2e !important;
    border: none !important;
    border-radius: 8px !important;
    font-weight: 600 !important;
    padding: 12px 24px !important;
    transition: all 0.3s ease !important;
    box-shadow: 0 4px 15px rgba(100, 255, 218, 0.3) !important;
}

.gr-button:hover {
    transform: translateY(-2px) !important;
    box-shadow: 0 6px 20px rgba(100, 255, 218, 0.4) !important;
}

/* Output area styling */
.gr-textbox {
    background: rgba(255, 255, 255, 0.05) !important;
    border: 1px solid rgba(100, 255, 218, 0.3) !important;
    border-radius: 12px !important;
    color: #e8eaed !important;
    font-family: 'Monaco', 'Menlo', monospace !important;
    font-size: 14px !important;
    line-height: 1.6 !important;
}

/* Labels */
.gr-label {
    color: #64ffda !important;
    font-weight: 600 !important;
    font-size: 1.1rem !important;
    margin-bottom: 0.5rem !important;
}

/* Progress bars */
.gr-progress {
    background: rgba(100, 255, 218, 0.2) !important;
    border-radius: 8px !important;
}

.gr-progress .progress-bar {
    background: linear-gradient(90deg, #64ffda, #4fd1c7) !important;
}

/* Footer styling */
.gr-footer {
    background: transparent !important;
    border-top: 1px solid rgba(100, 255, 218, 0.2) !important;
}

/* Animations */
@keyframes fadeIn {
    from { opacity: 0; transform: translateY(20px); }
    to { opacity: 1; transform: translateY(0); }
}

.gradio-container > * {
    animation: fadeIn 0.6s ease-out !important;
}

/* Scrollbar styling */
::-webkit-scrollbar {
    width: 8px;
}

::-webkit-scrollbar-track {
    background: rgba(255, 255, 255, 0.1);
    border-radius: 4px;
}

::-webkit-scrollbar-thumb {
    background: linear-gradient(180deg, #64ffda, #4fd1c7);
    border-radius: 4px;
}

::-webkit-scrollbar-thumb:hover {
    background: linear-gradient(180deg, #4fd1c7, #64ffda);
}
"""

# Enhanced Gradio Interface
def create_enhanced_interface():
    with gr.Blocks(css=custom_css, theme=gr.themes.Base(), title="Document Comparator") as iface:
        # Header
        gr.HTML("""
            <div style="text-align: center; margin-bottom: 2rem;">
                <h1 style="color: #64ffda; font-size: 2.5rem; font-weight: 700; margin-bottom: 0.5rem; text-shadow: 0 0 20px rgba(100, 255, 218, 0.3);">
                    📄 Document Comparator
                </h1>
                <p style="color: #b8bcc8; font-size: 1.1rem; margin-bottom: 0;">
                    Upload two documents (PDF, DOCX, or TXT) to get a comprehensive section-wise and overall difference comparison
                </p>
            </div>
        """)
        
        # Input Section
        with gr.Row():
            with gr.Column(scale=1):
                file_a = gr.File(
                    label="📄 Document A",
                    type="filepath",
                    file_types=[".pdf", ".docx", ".txt"],
                    elem_classes=["input-file"]
                )
            
            with gr.Column(scale=1):
                file_b = gr.File(
                    label="📄 Document B", 
                    type="filepath",
                    file_types=[".pdf", ".docx", ".txt"],
                    elem_classes=["input-file"]
                )
        
        # Compare Button
        compare_btn = gr.Button(
            "🔍 Compare Documents", 
            variant="primary",
            size="lg",
            elem_classes=["compare-button"]
        )
        
        # Output Section
        output = gr.Textbox(
            label="📊 Comparison Results",
            lines=20,
            max_lines=30,
            show_copy_button=True,
            placeholder="Upload two documents and click 'Compare Documents' to see the detailed analysis...",
            elem_classes=["output-textbox"]
        )
        
        # Event handler
        compare_btn.click(
            fn=compare_docs,  # Your existing function
            inputs=[file_a, file_b],
            outputs=output,
            show_progress=True
        )
        
        # Footer
        gr.HTML("""
            <div style="text-align: center; margin-top: 2rem; padding-top: 1rem; border-top: 1px solid rgba(100, 255, 218, 0.2);">
                <p style="color: #b8bcc8; font-size: 0.9rem;">
                    ⚡ Powered by AI • Built with Gradio
                </p>
            </div>
        """)
    
    return iface

# Launch the enhanced interface
iface = create_enhanced_interface()
iface.launch(
    share=True,  # Creates a public link
    server_name="0.0.0.0",  # Makes it accessible from other devices
    server_port=7860,  # Custom port
    show_error=True,  # Show detailed errors
    favicon_path=None,  # You can add a custom favicon path here
    app_kwargs={"title": "Document Comparator - AI Powered"}
)

* Running on local URL:  http://0.0.0.0:7861
* Running on public URL: https://7ade5ee53b894e8cfc.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


KeyboardInterrupt: 

In [None]:
%pip install python-docx

Collecting python-docx
  Downloading python_docx-1.2.0-py3-none-any.whl.metadata (2.0 kB)
Downloading python_docx-1.2.0-py3-none-any.whl (252 kB)
[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/253.0 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [91m━━━━━━━━━━━━━━[0m[91m╸[0m[90m━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m92.2/253.0 kB[0m [31m2.6 MB/s[0m eta [36m0:00:01[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m253.0/253.0 kB[0m [31m3.8 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: python-docx
Successfully installed python-docx-1.2.0
