In [None]:
import fitz  # PyMuPDF
import openai
from typing import List, Dict

# ---- Step 1: Extract Text from Engineering Drawing ----
def extract_text_from_pdf(pdf_path: str) -> str:
    doc = fitz.open(pdf_path)
    text = ""
    for page in doc:
        text += page.get_text("text")
    return text

# ---- Step 2: Parse BOM using LLM ----
def parse_bom_with_llm(extracted_text: str) -> Dict:
    prompt = f"""
    Extract a structured Bill of Materials (BOM) in JSON format from the following text:

    {extracted_text}
    """
    
    response = openai.ChatCompletion.create(
        model="gpt-4o-mini",  # or gpt-5 if available in your setup
        messages=[{"role": "user", "content": prompt}],
        temperature=0
    )

    bom_json = response.choices[0].message["content"]
    return eval(bom_json)  # In production, use `json.loads` after validation

# ---- Step 3: RAG Enrichment (mock DB lookup) ----
material_db = {
    "EN8 Steel": {"standards": "Equivalent to AISI 1040", "notes": "Commonly used for automotive gears"},
    "Mild Steel": {"notes": "Economical, easy to machine, lower tensile strength"},
    "Brass": {"notes": "High wear resistance, ideal for rotating parts"},
    "MS": {"notes": "Standard machine key, mild steel"},
    "Cast Iron": {"notes": "Excellent vibration damping and durability"}
}

def enrich_bom_with_rag(bom: Dict) -> Dict:
    for item in bom["bom"]:
        mat = item.get("material")
        if mat in material_db:
            item.update(material_db[mat])
    return bom

# ---- Main Pipeline ----
if __name__ == "__main__":
    # Step 1: Extract text
    pdf_path = "engineering_drawing.pdf"  # your input file
    extracted_text = extract_text_from_pdf(pdf_path)
    
    print("🔹 Extracted Text:\n", extracted_text[:300], "...\n")
    
    # Step 2: LLM to structured BOM
    bom = parse_bom_with_llm(extracted_text)
    print("🔹 Parsed BOM:\n", bom, "\n")
    
    # Step 3: RAG enrichment
    enriched_bom = enrich_bom_with_rag(bom)
    print("🔹 Enriched BOM with RAG:\n", enriched_bom)
