In [4]:
import sys
!{sys.executable} -m pip install gradio


Collecting gradio
  Downloading gradio-5.42.0-py3-none-any.whl.metadata (16 kB)
Collecting aiofiles<25.0,>=22.0 (from gradio)
  Downloading aiofiles-24.1.0-py3-none-any.whl.metadata (10 kB)
Collecting brotli>=1.1.0 (from gradio)
  Downloading Brotli-1.1.0-cp311-cp311-win_amd64.whl.metadata (5.6 kB)
Collecting ffmpy (from gradio)
  Downloading ffmpy-0.6.1-py3-none-any.whl.metadata (2.9 kB)
Collecting gradio-client==1.11.1 (from gradio)
  Downloading gradio_client-1.11.1-py3-none-any.whl.metadata (7.1 kB)
Collecting groovy~=0.1 (from gradio)
  Downloading groovy-0.1.2-py3-none-any.whl.metadata (6.1 kB)
Collecting huggingface-hub<1.0,>=0.33.5 (from gradio)
  Downloading huggingface_hub-0.34.4-py3-none-any.whl.metadata (14 kB)
Collecting orjson~=3.0 (from gradio)
  Downloading orjson-3.11.1-cp311-cp311-win_amd64.whl.metadata (43 kB)
Collecting pydub (from gradio)
  Downloading pydub-0.25.1-py2.py3-none-any.whl.metadata (1.4 kB)
Collecting python-multipart>=0.0.18 (from gradio)
  Downloadin


[notice] A new release of pip is available: 25.1.1 -> 25.2
[notice] To update, run: C:\Users\bhava\AppData\Local\Programs\Python\Python311\python.exe -m pip install --upgrade pip


In [5]:
import sys
!{sys.executable} -m pip install --upgrade pip
!{sys.executable} -m pip install gradio


Collecting pip
  Using cached pip-25.2-py3-none-any.whl.metadata (4.7 kB)
Using cached pip-25.2-py3-none-any.whl (1.8 MB)
Installing collected packages: pip
  Attempting uninstall: pip
    Found existing installation: pip 25.1.1
    Uninstalling pip-25.1.1:
      Successfully uninstalled pip-25.1.1
Successfully installed pip-25.2


In [1]:
import gradio as gr
print(gr.__version__)


5.42.0


In [5]:
# Gradio 4.x inline app — ADGM Corporate Agent Docx Review
# Requirements: python-docx, gradio
# Install inside current kernel if missing:
# import sys
# !{sys.executable} -m pip install --upgrade pip
# !{sys.executable} -m pip install gradio python-docx

import io
import os
import re
import json
import tempfile
import zipfile
import pathlib
from datetime import datetime
from docx import Document
from docx.enum.text import WD_PARAGRAPH_ALIGNMENT
import gradio as gr
import traceback

# -------------------------
# Configuration / Checklist
# -------------------------

INCORPORATION_CHECKLIST = [
    "articles of association",
    "memorandum of association",
    "board resolution",
    "incorporation application",
    "register of members and directors",
    "ubo declaration"
]

DOC_TYPE_KEYWORDS = {
    "Articles of Association": ["articles of association", "aoa", "articles"],
    "Memorandum of Association": ["memorandum of association", "moa", "memorandum"],
    "Board Resolution": ["board resolution", "resolution of the board"],
    "Shareholder Resolution": ["shareholder resolution", "resolution of the shareholders"],
    "Incorporation Application": ["incorporation application", "application for incorporation"],
    "UBO Declaration": ["ubo declaration", "ultimate beneficial owner", "ubo"],
    "Register of Members and Directors": ["register of members", "register of directors", "register of members and directors"],
    "Change of Registered Address": ["change of registered address", "registered address notice"]
}

NON_ADGM_JURISDICTION_PATTERNS = [
    r"UAE Federal Courts",
    r"Federal Courts of the UAE",
    r"Dubai Courts",
    r"Abu Dhabi Courts"
]

AMBIGUOUS_PATTERNS = [
    r"\bmay\b",
    r"\bshould\b",
    r"\bendeavour\b",
    r"\bbest efforts\b",
    r"\bendeavor\b"
]

SIGNATURE_PATTERNS = [
    r"signature\s*:\s*",
    r"signed\s+by",
    r"________________",
    r"authorised signatory",
    r"for and on behalf of"
]

# -------------------------
# Helper functions
# -------------------------

def read_docx_from_bytes(file_bytes):
    return Document(io.BytesIO(file_bytes))

def read_docx_from_path(path):
    return Document(str(path))

def doc_to_text(doc):
    paras = [p.text for p in doc.paragraphs if p.text.strip() != ""]
    return "\n\n".join(paras)

def detect_doc_type(text):
    text_l = text.lower()
    matches = []
    for dtype, keywords in DOC_TYPE_KEYWORDS.items():
        for kw in keywords:
            if kw in text_l:
                matches.append(dtype)
                break
    if not matches and "article" in text_l and "company" in text_l:
        matches.append("Articles of Association")
    return matches or ["Unknown Document Type"]

def find_issues_in_doc(doc):
    issues = []
    for i, para in enumerate(doc.paragraphs):
        ptext = para.text.strip()
        if not ptext:
            continue
        # jurisdiction mismatch
        for pat in NON_ADGM_JURISDICTION_PATTERNS:
            if re.search(pat, ptext, flags=re.IGNORECASE):
                issues.append({
                    "paragraph_index": i,
                    "section_text_snippet": ptext[:300],
                    "issue": f"References non-ADGM jurisdiction: '{re.search(pat, ptext, flags=re.IGNORECASE).group(0)}'",
                    "severity": "High",
                    "suggestion": "Replace jurisdiction reference with ADGM-specific jurisdiction."
                })
                break
        # ambiguous language
        for pat in AMBIGUOUS_PATTERNS:
            if re.search(pat, ptext, flags=re.IGNORECASE):
                issues.append({
                    "paragraph_index": i,
                    "section_text_snippet": ptext[:300],
                    "issue": f"Uses ambiguous language: contains '{re.search(pat, ptext, flags=re.IGNORECASE).group(0)}'",
                    "severity": "Medium",
                    "suggestion": "Replace with mandatory language such as 'shall'."
                })
                break
    # signature check (doc-level)
    full_text = doc_to_text(doc).lower()
    if not any(re.search(pat, full_text, flags=re.IGNORECASE) for pat in SIGNATURE_PATTERNS):
        issues.append({
            "paragraph_index": None,
            "section_text_snippet": full_text[:200] + '...' if len(full_text) > 200 else full_text,
            "issue": "No signature block found.",
            "severity": "High",
            "suggestion": "Add authorized signatory block."
        })
    return issues

def insert_comments_into_doc(original_doc, issues):
    new_doc = Document()
    for i, para in enumerate(original_doc.paragraphs):
        new_p = new_doc.add_paragraph()
        for run in para.runs:
            nr = new_p.add_run(run.text)
            nr.bold = run.bold
            nr.italic = run.italic
            nr.underline = run.underline
        new_p.alignment = para.alignment if para.alignment else WD_PARAGRAPH_ALIGNMENT.LEFT
        # add comments after paragraph if issues
        paras_issues = [iss for iss in issues if iss.get("paragraph_index") == i]
        for iss in paras_issues:
            cp = new_doc.add_paragraph()
            cp.add_run(f"COMMENT (Auto-flag) [{iss['severity']}]: {iss['issue']}").bold = True
            cp.add_run("\nSuggestion: " + iss['suggestion'])
    # document-level issues appended at end
    doc_level = [iss for iss in issues if iss.get("paragraph_index") is None]
    if doc_level:
        new_doc.add_paragraph("\n--- DOCUMENT LEVEL ISSUES ---").runs[0].bold = True
        for iss in doc_level:
            cp = new_doc.add_paragraph()
            cp.add_run(f"COMMENT (Auto-flag) [{iss['severity']}]: {iss['issue']}").bold = True
            cp.add_run("\nSuggestion: " + iss['suggestion'])
    return new_doc

def process_docx_files(files):
    """
    files: list of tuples (filename, file_handle)
    file_handle may be BytesIO, bytes, or Path.
    """
    processed_paths = []
    detected_types = []
    report = {
        "process": None,
        "documents_uploaded": 0,
        "required_documents": len(INCORPORATION_CHECKLIST),
        "missing_documents": [],
        "documents": [],
        "issues_found": []
    }

    for fn, fhandle in files:
        report["documents_uploaded"] += 1
        try:
            if isinstance(fhandle, (io.BytesIO, bytes)):
                file_bytes = fhandle if isinstance(fhandle, bytes) else fhandle.getvalue()
                doc = read_docx_from_bytes(file_bytes)
            elif isinstance(fhandle, (str, pathlib.Path)):
                doc = read_docx_from_path(fhandle)
            else:
                raw = fhandle.read()
                doc = read_docx_from_bytes(raw)
        except Exception as e:
            report["documents"].append({"filename": fn, "status": "failed to read", "error": str(e)})
            continue

        text = doc_to_text(doc)
        types = detect_doc_type(text)
        detected_types.extend([t.lower() for t in types if t != "Unknown Document Type"])
        issues = find_issues_in_doc(doc)
        reviewed_doc = insert_comments_into_doc(doc, issues)

        tmpf = tempfile.NamedTemporaryFile(delete=False, suffix=".docx")
        tmpf.close()
        reviewed_doc.save(tmpf.name)
        processed_paths.append((tmpf.name, fn))

        report["documents"].append({
            "filename": fn,
            "detected_types": types,
            "issues_count": len(issues),
            "reviewed_filename": os.path.basename(tmpf.name)
        })
        for iss in issues:
            report["issues_found"].append({
                "document": fn,
                "section": f"paragraph_index:{iss.get('paragraph_index')}",
                "issue": iss["issue"],
                "severity": iss["severity"],
                "suggestion": iss["suggestion"]
            })

    detected_types_set = set(detected_types)
    if any(any(req in dtype for dtype in detected_types_set) for req in ["articles", "memorandum", "register", "incorporation", "ubo", "board"]):
        report["process"] = "Company Incorporation"
    else:
        report["process"] = "Unknown / Other"

    normalized_uploaded = [dt.lower() for dt in detected_types_set]
    missing = []
    for req in INCORPORATION_CHECKLIST:
        if not any(req in ut for ut in normalized_uploaded):
            missing.append(req)
    report["missing_documents"] = missing
    if report["process"] == "Company Incorporation":
        report["summary"] = "All required incorporation documents present." if not missing else f"Missing: {', '.join(missing)}"
    else:
        report["summary"] = "Process could not be determined."

    return processed_paths, report

# -------------------------
# Gradio wrapper
# -------------------------
def gradio_process(gr_inputs):
    try:
        if not gr_inputs:
            return None, "No files uploaded."
        files = []
        for item in gr_inputs:
            if isinstance(item, (str, pathlib.Path)):
                files.append((os.path.basename(str(item)), pathlib.Path(item)))
            else:
                name = getattr(item, "name", None)
                if name and os.path.exists(name):
                    files.append((os.path.basename(name), pathlib.Path(name)))
                else:
                    b = item.read()
                    files.append((getattr(item, "filename", "uploaded.docx"), io.BytesIO(b)))
        processed, report = process_docx_files(files)

        if len(processed) == 0:
            return None, json.dumps(report, indent=2)
        elif len(processed) == 1:
            tmp_path, _ = processed[0]
            return tmp_path, json.dumps(report, indent=2)  # ✅ just file path
        else:
            zip_tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".zip")
            zip_tmp.close()
            with zipfile.ZipFile(zip_tmp.name, "w") as zf:
                for tmp_path, orig_name in processed:
                    zf.write(tmp_path, arcname=f"reviewed_{orig_name}")
            return zip_tmp.name, json.dumps(report, indent=2)  # ✅ just file path
    except Exception as e:
        tb = traceback.format_exc()
        return None, json.dumps({"error": str(e), "traceback": tb}, indent=2)


       
# -------------------------
# Gradio UI
# -------------------------

with gr.Blocks() as demo:
    gr.Markdown("## ADGM Corporate Agent — Jupyter Inline Review (Gradio 4.x compatible)")
    uploader = gr.File(label="Upload .docx files", file_count="multiple", file_types=[".docx"])
    out_file = gr.File(label="Download Reviewed File(s)")
    out_report = gr.Textbox(label="Structured Report (JSON)", lines=20)
    run_btn = gr.Button("Run Review")
    run_btn.click(fn=gradio_process, inputs=[uploader], outputs=[out_file, out_report])

demo.launch(share=False, inline=True)


* Running on local URL:  http://127.0.0.1:7862
* To create a public link, set `share=True` in `launch()`.


