<a href="https://colab.research.google.com/github/aayushdebugging/3D_ROOT_CHATBOT/blob/main/3DRoot_Chatbot.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

###YOU NEED TO ADD HUGGINGFACE_TOKEN AND OPEN_API_KEY IN SECRETS

In [None]:
# Install necessary libraries
!pip install "unstructured[all-docs]" pillow pydantic lxml matplotlib unstructured
!sudo apt-get update
!sudo apt-get install poppler-utils
!sudo apt-get install libleptonica-dev tesseract-ocr libtesseract-dev python3-pil tesseract-ocr-eng tesseract-ocr-script-latn
!pip install unstructured-pytesseract
!pip install tesseract-ocr



In [None]:
# ✅ Create extracted data directory automatically
import os

extracted_data_dir = "/content/extracted_data"
os.makedirs(extracted_data_dir, exist_ok=True)

# ✅ Extract PDF elements using Unstructured
from unstructured.partition.pdf import partition_pdf

pdf_path = "..........."  # Update this path to your PDF file

raw_pdf_elements = partition_pdf(
    filename=pdf_path,
    strategy="hi_res",
    extract_images_in_pdf=True,
    extract_image_block_types=["Image", "Table"],
    extract_image_block_to_payload=False,
    extract_image_block_output_dir=extracted_data_dir
)

In [None]:
# ✅ Organize extracted data
Text = []
Table = []
Image = []

for element in raw_pdf_elements:
    element_type = str(type(element))
    if "NarrativeText" in element_type:
        Text.append(str(element))
    elif "Table" in element_type:
        Table.append(str(element))
    elif "Image" in element_type:
        Image.append(str(element))

In [None]:
import os


# ✅ Install LangChain and related libraries
!pip install langchain_core langchain_openai langchain chromadb

from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate
from langchain_openai import ChatOpenAI

# ✅ Setup OpenAI API key
from google.colab import userdata
OPENAI_API_TOKEN = userdata.get('OPENAI_API_KEY')
os.environ["OPENAI_API_KEY"] = OPENAI_API_TOKEN

# ✅ Summarize Tables
table_prompt = ChatPromptTemplate.from_template(
    """You are an assistant tasked with summarizing tables for retrieval. \
    These summaries will be embedded and used to retrieve the raw table elements. \
    Give a concise summary of the table that is well optimized for retrieval. Table:{element}"""
)

model = ChatOpenAI(temperature=0, model="gpt-4")
summarize_chain = {"element": lambda x: x} | table_prompt | model | StrOutputParser()

table_summaries = summarize_chain.batch(Table, {"max_concurrency": 5})

# ✅ Summarize Text
text_prompt = ChatPromptTemplate.from_template(
    """You are an assistant tasked with summarizing text for retrieval. \
    These summaries will be embedded and used to retrieve the raw text elements. \
    Give a concise summary of the text that is well optimized for retrieval. Text:{element}"""
)

text_summaries = summarize_chain.batch(Text, {"max_concurrency": 5})

# ✅ Summarize Images
import base64

def encode_image(image_path):
    with open(image_path, "rb") as image_file:
        return base64.b64encode(image_file.read()).decode("utf-8")

def image_summarize(img_base64, prompt):
    chat = ChatOpenAI(model="gpt-4-turbo-2024-04-09", max_tokens=1024)
    msg = chat.invoke(
        [
            {
                "type": "text",
                "text": prompt
            },
            {
                "type": "image_url",
                "image_url": {"url": f"data:image/jpeg;base64,{img_base64}"}
            }
        ]
    )
    return msg.content

def generate_img_summaries(path):
    img_base64_list = []
    image_summaries = []

    prompt = "Summarize the image concisely for retrieval."

    for img_file in sorted(os.listdir(path)):
        if img_file.endswith(".jpg"):
            img_path = os.path.join(path, img_file)
            base64_image = encode_image(img_path)
            img_base64_list.append(base64_image)
            image_summaries.append(image_summarize(base64_image, prompt))

    return img_base64_list, image_summaries

# ✅ Generate image summaries
img_base64_list, image_summaries = generate_img_summaries(extracted_data_dir)

# ✅ Setup LangChain Retriever
!pip install langchain_community

from langchain.retrievers.multi_vector import MultiVectorRetriever
from langchain.storage import InMemoryStore
from langchain_community.vectorstores import Chroma
from langchain_core.documents import Document
from langchain_openai import OpenAIEmbeddings
import uuid

# ✅ Create Multi-Vector Retriever
def create_multi_vector_retriever(vectorstore, text_summaries, texts, table_summaries, tables, image_summaries, images):
    store = InMemoryStore()
    id_key = "doc_id"

    retriever = MultiVectorRetriever(
        vectorstore=vectorstore,
        docstore=store,
        id_key=id_key
    )

    def add_documents(summaries, contents):
        doc_ids = [str(uuid.uuid4()) for _ in contents]
        summary_docs = [
            Document(page_content=s, metadata={id_key: doc_ids[i]})
            for i, s in enumerate(summaries)
        ]
        retriever.vectorstore.add_documents(summary_docs)
        retriever.docstore.mset(list(zip(doc_ids, contents)))

    if text_summaries:
        add_documents(text_summaries, texts)
    if table_summaries:
        add_documents(table_summaries, tables)
    if image_summaries:
        add_documents(image_summaries, images)

    return retriever

vectorstore = Chroma(collection_name="mm_rag", embedding_function=OpenAIEmbeddings())

retriever = create_multi_vector_retriever(
    vectorstore,
    text_summaries, Text,
    table_summaries, Table,
    image_summaries, img_base64_list
)

# ✅ Create RAG Pipeline
from langchain_core.runnables import RunnableLambda, RunnablePassthrough

def multi_modal_rag_chain(retriever):
    model = ChatOpenAI(temperature=0, model="gpt-4-turbo-2024-04-09", max_tokens=1024)

    def format_prompt(data_dict):
        messages = []
        for image in data_dict["context"]["images"]:
            messages.append({"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{image}"}})
        messages.append({
            "type": "text",
            "text": f"User question: {data_dict['question']}\n\nText:\n" + "\n".join(data_dict["context"]["texts"])
        })
        return messages

    chain = (
        {
            "context": retriever | RunnableLambda(
                lambda docs: {
                    "texts": [doc.page_content for doc in docs if not doc.page_content.startswith("/9j")],
                    "images": [doc.page_content for doc in docs if doc.page_content.startswith("/9j")]
                }
            ),
            "question": RunnablePassthrough(),
        }
        | RunnableLambda(format_prompt)
        | model
        | StrOutputParser()
    )

    return chain

chain_multimodal_rag = multi_modal_rag_chain(retriever)



In [None]:
# ✅ Query the RAG Pipeline
query = "What information is available about 3D Printing?"
result = chain_multimodal_rag.invoke(query)
print(result)