Import the dependencies


In [35]:
import fitz  # PyMuPDF
import base64
import requests
import os
import glob
import json
from langchain import PromptTemplate
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate
from langchain.chains import create_retrieval_chain
from langchain_community.vectorstores import FAISS
from langchain.docstore.document import Document
from langchain_openai import ChatOpenAI
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate
from langchain.chains import RetrievalQA
from langchain_openai import OpenAIEmbeddings
from dotenv import load_dotenv
import os

load_dotenv()

True

Configurations of data and OpenAI


In [36]:

#Please change the path of pdfs to be used for ingestion
pdf_directory = r'C:\Users\User\saf\data'
api_key = os.environ["OPENAI_API_KEY"]


Converting images to create summaries


In [37]:
# converting images to base64 encode
def encode_image(image_path):
    with open(image_path, "rb") as image_file:
        return base64.b64encode(image_file.read()).decode('utf-8')

# Function to analyze image using OpenAI API
def analyze_image(image_path):
    base64_image = encode_image(image_path)
    headers = {
        "Content-Type": "application/json",
        "Authorization": f"Bearer {api_key}"
    }
    payload = {
        "model": "gpt-4o",
        "messages": [
            {
                "role": "user",
                "content": [
                    {"type": "text", "text": "You are provided with an image containing technical mechanical drawings. Your task is to analyze and summarize the details presented in the drawing with a focus on key features, dimensions, and any annotations or instructions"},
                    {"type": "image_url", "image_url": {"url": f"data:image/png;base64,{base64_image}"}}
                ]
            }
        ],
        "max_tokens": 1000
    }
    response = requests.post("https://api.openai.com/v1/chat/completions", headers=headers, json=payload)
    data = response.json()
    return data['choices'][0]['message']['content']


In [38]:
pdf_files = glob.glob(os.path.join(pdf_directory, "*.pdf"))
all_summaries = ""

In [39]:

for pdf_path in pdf_files:
    print(f"Processing PDF: {pdf_path}")
    pdf_document = fitz.open(pdf_path)

    for page_number in range(len(pdf_document)):
        page = pdf_document.load_page(page_number)
        pix = page.get_pixmap()
        image_path = f'temp_page_{page_number + 1}.png'
        pix.save(image_path)

        content = analyze_image(image_path)
        all_summaries += f"Summary for {os.path.basename(pdf_path)} - page {page_number + 1}:\n{content}\n{'='*50}\n"
        print(f"Analyzed {os.path.basename(pdf_path)} - page {page_number + 1}")
        os.remove(image_path)

    pdf_document.close()

Processing PDF: C:\Users\User\saf\data\Coding_challenge_source_1.pdf
Analyzed Coding_challenge_source_1.pdf - page 1
Processing PDF: C:\Users\User\saf\data\Coding_challenge_source_2.pdf
Analyzed Coding_challenge_source_2.pdf - page 1


Chunking and embeddings


In [40]:

# Split text into documents
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=20)
final_documents = text_splitter.create_documents([all_summaries])

# Generate embeddings
embeddings = OpenAIEmbeddings(model="text-embedding-3-large")
vectors = FAISS.from_documents(final_documents, embeddings)


In [41]:
# Setup and execute the QA chain
llm = ChatOpenAI(model="gpt-4o")
qa_prompt = ChatPromptTemplate.from_template("""
    Answer the questions based on the provided context only.
    Please provide the most accurate response based on the question.
    <context>
    {context}
    <context>
    Questions: {input}
""")
document_chain = create_stuff_documents_chain(llm, qa_prompt)
retriever = vectors.as_retriever()
retrieval_chain = create_retrieval_chain(retriever, document_chain)

In [42]:
response = retrieval_chain.invoke({'input': "what are instructions"})
answer = response['answer']
print(answer)

The instructions for the task include:

1. **Dimensions**: All dimensions are provided in millimeters.
2. **Software**: The 3D modeling task must be completed using CATIA V5.
3. **File Naming and Saving Protocol**: Specific instructions are provided for the file naming conventions and saving protocol.
4. **Details on Features**: The drawings include multiple views, sectional drawings, and auxiliary views to clarify intricate features. 
5. **Annotation Specifics**: Detailed annotations indicate where to drill and tap.
6. **Time Constraint**: There is a time constraint for completing the task.


In [43]:
query = input('enter your query')
response = retrieval_chain.invoke({'input': query})
answer = response['answer']
print(answer)

The top view of the first PDF, as summarized, includes the following key features and dimensions:

- **Total Length**: The component has a total length of 323.57 mm.
- **Key Dimensions**: Some critical dimensions provided are 135.00 mm and 87.02 mm.
- **Additional Annotation**: There is an annotation of (332.02) mm, which might be an alternative or cumulative dimension.
- **Features**: The top view includes various holes and slots. These are detailed at specific positions labeled A, B, and C.

This information aids in understanding the layout and essential measurements for accurate 3D modeling and drafting of the component.
