# Multi-Agent MoM Generator — with Final GPT Polish (LangGraph + LangChain)

This notebook extends the pipeline by adding a **final GPT pass** after the four agents:
**Extract → Summarize → Critique → Aggregate → GPT Polish**

Configure via environment variables; supports **Ollama / HF / OpenAI** for the main agents and a separate GPT polish step.


In [None]:

# Optional: install (uncomment)
# %pip install langchain langgraph pydantic python-docx pymupdf reportlab transformers accelerate torch ollama-client openai tiktoken python-dotenv ipywidgets

import os, json, re
from typing import Dict, Any

# ------------------ Config ------------------
LLM_BACKEND = os.getenv("LLM_BACKEND", "ollama")  # ollama | hf | openai
OLLAMA_MODEL = os.getenv("OLLAMA_MODEL", "llama3")
HF_MODEL = os.getenv("HF_MODEL", "mistralai/Mistral-7B-Instruct-v0.2")
OPENAI_MODEL = os.getenv("OPENAI_MODEL", "gpt-4o-mini")
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY", "")

LLM_FINAL_BACKEND = os.getenv("LLM_FINAL_BACKEND", "openai")  # openai | ollama | hf | same
FINAL_MODEL = os.getenv("FINAL_MODEL", "gpt-4o-mini")

def try_json_loads(s: str) -> Any:
    try:
        return json.loads(s)
    except Exception:
        s2 = re.sub(r"```json|```", "", s).strip()
        s2 = re.sub(r",\s*([}\]])", r"\1", s2)
        try:
            return json.loads(s2)
        except Exception:
            return {"raw": s}

# ------------------ LLM Adapters ------------------
class LLMAdapter:
    def __init__(self, backend: str, model_overrides: Dict[str,str]|None=None):
        self.backend = backend
        self.model_overrides = model_overrides or {}
        if backend == "ollama":
            from ollama import Client as OllamaClient  # type: ignore
            self.client = OllamaClient(host=os.getenv("OLLAMA_HOST", "http://localhost:11434"))
        elif backend == "hf":
            from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline  # type: ignore
            model_name = self.model_overrides.get("hf", os.getenv("HF_MODEL", "mistralai/Mistral-7B-Instruct-v0.2"))
            tok = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
            model = AutoModelForCausalLM.from_pretrained(model_name, device_map="auto", torch_dtype="auto")
            self.pipe = pipeline("text-generation", model=model, tokenizer=tok, max_new_tokens=1024)
        elif backend == "openai":
            from openai import OpenAI  # type: ignore
            self.client = OpenAI(api_key=os.getenv("OPENAI_API_KEY", ""))
        elif backend == "same":
            self.client = None
        else:
            raise ValueError(f"Unknown backend: {backend}")

    def generate(self, prompt: str) -> str:
        if self.backend == "ollama":
            model = self.model_overrides.get("ollama", os.getenv("OLLAMA_MODEL", "llama3"))
            out = self.client.generate(model=model, prompt=prompt, stream=False)
            return out.get("response", "").strip()
        elif self.backend == "hf":
            gen = self.pipe(prompt)[0]["generated_text"]
            return gen[len(prompt):].strip() if gen.startswith(prompt) else gen
        elif self.backend == "openai":
            model = self.model_overrides.get("openai", os.getenv("OPENAI_MODEL", "gpt-4o-mini"))
            chat = self.client.chat.completions.create(
                model=model,
                messages=[
                    {"role":"system","content":"You are a precise, structured writing assistant."},
                    {"role":"user","content":prompt}
                ],
                temperature=0.2
            )
            return chat.choices[0].message.content
        elif self.backend == "same":
            raise RuntimeError("Backend 'same' is a placeholder; set a real adapter.")
        else:
            raise ValueError("Unsupported backend")

# ------------------ Ingestion ------------------
import fitz  # PyMuPDF
from docx import Document
from docx.shared import Pt
from reportlab.lib.pagesizes import A4
from reportlab.pdfgen import canvas

def extract_text_from_pdf(path: str) -> str:
    text = []
    with fitz.open(path) as doc:
        for page in doc:
            text.append(page.get_text())
    return "\n".join(text).strip()

def extract_text_from_docx(path: str) -> str:
    doc = Document(path)
    return "\n".join(p.text for p in doc.paragraphs)

def extract_text_from_txt(path: str) -> str:
    with open(path, "r", encoding="utf-8", errors="ignore") as f:
        return f.read()

def ingest_any(path: str) -> str:
    p = path.lower()
    if p.endswith(".pdf"):
        return extract_text_from_pdf(path)
    if p.endswith(".docx"):
        return extract_text_from_docx(path)
    if p.endswith(".txt"):
        return extract_text_from_txt(path)
    raise ValueError("Unsupported file type. Use PDF, DOCX, or TXT.")

# ------------------ Prompts ------------------
EXTRACT_PROMPT = """
You are an information extraction agent for meeting transcripts.
From the text below, return strict JSON with keys:
- participants: [ { "name": "...", "role": "..." } ]
- meeting_title: string
- meeting_datetime: string (ISO or as stated)
- agenda: [string]
- key_points: [string]
- decisions: [string]
- risks: [string]
- action_items: [ { "description": "...", "owner": "...", "due_date": "...", "priority": "High/Med/Low" } ]

Return only JSON.
TEXT:
{transcript}
"""

SUMMARY_PROMPT = """
You are a summarization agent. Convert this extracted JSON into a
clear, detailed Minutes of Meeting (MoM) with sections:
- Meeting Title
- Date/Time
- Participants (Name - Role)
- Agenda
- Summary (bulleted, but rich and specific)
- Decisions
- Action Items (Owner - Description - Due Date - Priority)
- Risks / Open Questions
- Next Steps (with owners and target dates)

Keep it client-ready, concise but comprehensive.
EXTRACTED_JSON:
{extracted_json}
"""

CRITIQUE_PROMPT = """
You are a critical reviewer. Read the MoM draft and identify missing details,
ambiguities, tone/style issues, and any inconsistencies with typical MoM standards.
Return JSON with keys:
- missing_info: [string]
- suggested_edits: [string]
- tone_issues: [string]
- formatting_changes: [string]

Return only JSON.
MOM_DRAFT:
{mom_draft}
"""

AGGREGATE_PROMPT = """
You are a senior editor. Merge the critique into the MoM draft and produce
the final detailed, polished MoM with the same sections and consistent formatting.
Apply suggested edits and fill reasonable gaps if evidence exists in the transcript.
Return final MoM as plain text (no JSON).

MOM_DRAFT:
{mom_draft}

CRITIQUE_JSON:
{critique_json}
"""

FINAL_GPT_PROMPT = """
You are a senior communications editor. Lightly polish the following Minutes of Meeting (MoM) for clarity,
professional tone, grammar, and formatting without changing factual content. Keep the same sections and structure.
Return the final MoM as plain text.

MOM_FINAL_DRAFT:
{mom_final_draft}
"""

# ------------------ Agents ------------------
llm_main = LLMAdapter(LLM_BACKEND)

def extract_agent(transcript: str) -> Dict[str, Any]:
    prompt = EXTRACT_PROMPT.format(transcript=transcript[:150000])
    data = try_json_loads(llm_main.generate(prompt))
    return {
        "participants": data.get("participants", []),
        "meeting_title": data.get("meeting_title", ""),
        "meeting_datetime": data.get("meeting_datetime", ""),
        "agenda": data.get("agenda", []),
        "key_points": data.get("key_points", []),
        "decisions": data.get("decisions", []),
        "risks": data.get("risks", []),
        "action_items": data.get("action_items", []),
    }

def summarize_agent(extracted: Dict[str, Any]) -> str:
    prompt = SUMMARY_PROMPT.format(extracted_json=json.dumps(extracted, ensure_ascii=False))
    return llm_main.generate(prompt)

def critique_agent(mom_draft: str) -> Dict[str, Any]:
    prompt = CRITIQUE_PROMPT.format(mom_draft=mom_draft)
    return try_json_loads(llm_main.generate(prompt))

def aggregate_agent(mom_draft: str, critique: Dict[str, Any]) -> str:
    prompt = AGGREGATE_PROMPT.format(mom_draft=mom_draft, critique_json=json.dumps(critique, ensure_ascii=False))
    return llm_main.generate(prompt)

def final_gpt_polish(mom_final_draft: str) -> str:
    backend = LLM_FINAL_BACKEND
    if backend == "openai" and not os.getenv("OPENAI_API_KEY", ""):
        backend = LLM_BACKEND
    if backend == "same":
        backend = LLM_BACKEND
    llm_final = LLMAdapter(backend, model_overrides={
        "openai": os.getenv("FINAL_MODEL", "gpt-4o-mini"),
        "ollama": os.getenv("FINAL_OLLAMA_MODEL", os.getenv("OLLAMA_MODEL", "llama3")),
        "hf": os.getenv("FINAL_HF_MODEL", os.getenv("HF_MODEL", "mistralai/Mistral-7B-Instruct-v0.2"))
    })
    prompt = FINAL_GPT_PROMPT.format(mom_final_draft=mom_final_draft)
    return llm_final.generate(prompt)

# ------------------ LangGraph Orchestration ------------------
from typing import TypedDict
from langgraph.graph import StateGraph, END

class MomState(TypedDict):
    transcript: str
    extracted: Dict[str, Any]
    mom_draft: str
    critique: Dict[str, Any]
    mom_final: str
    mom_final_polished: str

def node_extract(state: MomState) -> MomState:
    state["extracted"] = extract_agent(state["transcript"])
    return state

def node_summarize(state: MomState) -> MomState:
    state["mom_draft"] = summarize_agent(state["extracted"])
    return state

def node_critique(state: MomState) -> MomState:
    state["critique"] = critique_agent(state["mom_draft"])
    return state

def node_aggregate(state: MomState) -> MomState:
    state["mom_final"] = aggregate_agent(state["mom_draft"], state["critique"])
    return state

def node_polish(state: MomState) -> MomState:
    state["mom_final_polished"] = final_gpt_polish(state["mom_final"])
    return state

workflow = StateGraph(MomState)
workflow.add_node("extract", node_extract)
workflow.add_node("summarize", node_summarize)
workflow.add_node("critique", node_critique)
workflow.add_node("aggregate", node_aggregate)
workflow.add_node("polish", node_polish)

workflow.set_entry_point("extract")
workflow.add_edge("extract", "summarize")
workflow.add_edge("summarize", "critique")
workflow.add_edge("critique", "aggregate")
workflow.add_edge("aggregate", "polish")
workflow.add_edge("polish", END)

app = workflow.compile()

# ------------------ Export Helpers ------------------
def save_mom_docx(mom_text: str, out_path: str) -> str:
    doc = Document()
    style = doc.styles['Normal']
    style.font.name = 'Calibri'
    style.font.size = Pt(11)
    for line in mom_text.splitlines():
        doc.add_paragraph(line)
    doc.save(out_path)
    return out_path

def save_mom_pdf(mom_text: str, out_path: str) -> str:
    c = canvas.Canvas(out_path, pagesize=A4)
    width, height = A4
    margin = 50
    y = height - margin
    for line in mom_text.splitlines():
        if y < margin:
            c.showPage()
            y = height - margin
        c.drawString(margin, y, line[:110])
        y -= 14
    c.save()
    return out_path

# ------------------ Run (Demo) ------------------
# Set a path to your transcript (PDF/DOCX/TXT). If empty, a small demo text is used.
path = ""  # e.g., "/path/to/meeting_transcript.pdf"

if path:
    transcript_text = ingest_any(path)
else:
    demo_text = [
        "Meeting Title: Q3 Launch Planning",
        "Date/Time: 2025-08-10 15:00 IST",
        "Participants: Alice (PM), Bob (Eng Lead), Carol (Design), Dev (Data)",
        "Agenda: Scope, Risks, Timeline, Owners",
        "Discussion:",
        "  - Alice: Need final scope locked by Aug 25.",
        "  - Bob: We can deliver API by Sep 10; risk on auth integration.",
        "  - Carol: Assets by Aug 22; needs product sign-off.",
        "  - Dev: Data pipeline ready; monitoring gaps remain.",
        "Decisions:",
        "  - Move feature X to Q3.1 patch.",
        "Actions:",
        "  - Bob to finalize auth POC by Aug 18.",
        "  - Carol to share final design comps by Aug 21.",
        "  - Dev to add monitoring alerts by Aug 20."
    ]
    transcript_text = "\n".join(demo_text)

state = {
    "transcript": transcript_text,
    "extracted": {},
    "mom_draft": "",
    "critique": {},
    "mom_final": "",
    "mom_final_polished": ""
}

result = app.invoke(state)
final_text = result.get("mom_final_polished") or result.get("mom_final")

os.makedirs("/mnt/data/output", exist_ok=True)
docx_file = save_mom_docx(final_text, "/mnt/data/output/Minutes_of_Meeting_FINAL.docx")
pdf_file = save_mom_pdf(final_text, "/mnt/data/output/Minutes_of_Meeting_FINAL.pdf")
print("Saved:", docx_file, pdf_file)
