# Installing Necessary Libraries

In [5]:
!pip install openai==1.16.2 pymupdf==1.25.5 langchain==0.1.17 faiss-cpu==1.8.0 langchain-community==0.0.36



# PDF Figure Extraction Setup
### This section sets up the environment to extract figure images from a PDF based on a natural language question

In [10]:
# ------------------- SETUP -------------------
import os
from langchain.document_loaders import PyPDFLoader
from langchain.embeddings import AzureOpenAIEmbeddings
from langchain.vectorstores import FAISS
from langchain.chains import RetrievalQA
from langchain.chat_models import AzureChatOpenAI
from langchain.prompts import PromptTemplate
import fitz  # PyMuPDF
from langchain.document_loaders import PyPDFLoader
from langchain.embeddings import AzureOpenAIEmbeddings
from langchain.vectorstores import FAISS
from langchain.text_splitter import RecursiveCharacterTextSplitter
import os
from PIL import Image
# Load Azure OpenAI environment variables
from dotenv import load_dotenv
load_dotenv()


base_dir = os.getcwd()

# Define the paths relative to this folder
pdf_path = os.path.join(base_dir, "BEST_PRACTICE_MANUAL_ELECTRIC_MOTORS.pdf")
image_output_dir = os.path.join(base_dir, "Extracted_Images")

vector_store_path = os.path.join(base_dir, "FAISS_Figures")

# Initialize embeddings
embeddings = AzureOpenAIEmbeddings(
    deployment="MV_Agusta",
    api_key = os.getenv("AZURE_OPENAI_API_KEY"),
    azure_endpoint=os.getenv("AZURE_OPENAI_ENDPOINT")
)

if os.path.exists(vector_store_path):
    print("📂 Vector store already exists. Loading from disk...")
    vector_store = FAISS.load_local(vector_store_path, embeddings, allow_dangerous_deserialization=True)
else:
    print("📄 Creating new vector store from PDF...")
    loader = PyPDFLoader(pdf_path)
    pages = loader.load()

    splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
    chunks = splitter.split_documents(pages)

    vector_store = FAISS.from_documents(chunks, embeddings)
    vector_store.save_local(vector_store_path)
    print("✅ FAISS index created and saved successfully.")

# ------------------- CHAIN DEFINITION -------------------
def Figure_Extractor_Chain(vector_store):
    prompt_template = """
    You are an AI assistant that helps locate relevant figures from a PDF based on user queries.
    Based on the user prompt and document context, identify the most relevant figure or image.
    Return the full figure label like:Figure 2.3: Vector diagram
    Important: Just return the exact figure name. Punctuations and Spaces should be exactly same.

    Context: {context}
    Question: {question}

    Answer:
    """
    prompt = PromptTemplate(input_variables=["context", "question"], template=prompt_template)

    llm = AzureChatOpenAI(
    deployment_name="Thruxton_R",
    azure_endpoint=os.getenv("AZURE_OPENAI_ENDPOINT"),
    api_key=os.getenv("AZURE_OPENAI_API_KEY"),
    api_version=os.getenv["OPENAI_API_VERSION"]
    )

    chain = RetrievalQA.from_chain_type(
        llm=llm,
        retriever=vector_store.as_retriever(),
        chain_type="stuff",
        chain_type_kwargs={"prompt": prompt}
    )
    return chain


def extract_figure_image_from_pdf(user_question, pdf_path, vector_store_path="FAISS_Figures"):
    # Load vector store
    embeddings = AzureOpenAIEmbeddings(
        deployment="MV_Agusta",
        api_key=os.getenv("AZURE_OPENAI_API_KEY"),,
        azure_endpoint=os.getenv("AZURE_OPENAI_ENDPOINT")
    )
    vector_store = FAISS.load_local(vector_store_path, embeddings, allow_dangerous_deserialization=True)

    # Step 1: Get matched figure label using LLM
    chain = Figure_Extractor_Chain(vector_store)
    matched_figure_label = chain.run(user_question).strip()
    print(f"📌 Matched Figure Caption: {matched_figure_label}")

    # Step 2: Try to find and extract the image closest to this caption
    doc = fitz.open(pdf_path)
    # fig_name = matched_figure_label.split(":")[-1].strip().replace(" ", "_")
    fig_name = matched_figure_label.split(':')[-1]
    print(matched_figure_label)
    image_saved = False

    for page_number in range(3, len(doc)):
        print(f"📄 Checking page {page_number + 1}")
        page = doc[page_number]

        text_instances = page.search_for(matched_figure_label)
        if not text_instances:
            continue

        blocks = page.get_text("dict")["blocks"]
        image_blocks = [b for b in blocks if b["type"] == 1]

        for inst in text_instances:
            caption_y = inst.y1
            closest_img = None
            closest_dist = float("inf")

            for b in image_blocks:
                img_rect = fitz.Rect(b["bbox"])
                img_bytes = b.get("image")

                if img_rect.y1 <= caption_y and isinstance(img_bytes, bytes):
                    dist = caption_y - img_rect.y1
                    if dist < closest_dist:
                        closest_dist = dist
                        closest_img = img_bytes

            if closest_img:
                save_path = os.path.join(image_output_dir, f"{fig_name}.jpg")
                with open(save_path, "wb") as f:
                    f.write(closest_img)
                print(f"✅ Saved closest image near caption to: {save_path}")
                image_saved = True
                break

        if image_saved:
            break

    if not image_saved:
        print("❌ Could not find and save the image for the specified caption.")

    return save_path if image_saved else "❌ No image saved"

📂 Vector store already exists. Loading from disk...


# Usage Example: Ask a Question and Extract the Diagram

In [12]:
# ------------------- USAGE EXAMPLE -------------------
image_path = extract_figure_image_from_pdf(
    user_question="Show the diagram of Induction Motor?",
    pdf_path = pdf_path
)
print("Result:", image_path)

📌 Matched Figure Caption: Figure 2.1: Induction motor principle
Figure 2.1: Induction motor principle
📄 Checking page 4
📄 Checking page 5
📄 Checking page 6
✅ Saved closest image near caption to: C:\Users\v-sachinku\Downloads\Image Extraction Code\Extracted_Images\ Induction motor principle.jpg
Result: C:\Users\v-sachinku\Downloads\Image Extraction Code\Extracted_Images\ Induction motor principle.jpg


In [13]:
img = Image.open(image_path)
img.show()  # This opens in default image viewer as a popup

# Sample Questions
### 1. Can you show diagram of Aluminium Rotor?
### 2. Show diagram for Power factor correction
### 3. How is Motor efficiency and power factor correlated?