In [None]:
from typing import Any
import os
from unstructured.partition.pdf import partition_pdf
import shutil

#
if os.path.exists("./figures"):
    shutil.rmtree("./figures")
os.makedirs("./figures")

pdf_path = '../content/echap07.pdf'
pdf_name = os.path.basename(pdf_path)
vs_prefix = pdf_name[:-4]
vs_out_path = "faiss_"+vs_prefix

img_output_dir = '../content/'+vs_prefix
os.makedirs(img_output_dir, exist_ok=True)


if os.path.exists(vs_out_path):
    print(f"The directory {vs_out_path} already exists.")
else:
    print(f"The directory {vs_out_path} does not exist.")

# Get elements
raw_pdf_elements = partition_pdf(
    filename=pdf_path,
    strategy='hi_res',
    extract_images_in_pdf=True,
    extract_image_block_types=["Image", "Table"],
    infer_table_structure=True,
    chunking_strategy="by_title",
    extract_image_block_output_dir=img_output_dir,
)

In [None]:
import base64

text_elements = []
table_elements = []
image_elements = []

# Function to encode images
def encode_image(image_path):
    with open(image_path, "rb") as image_file:
        return base64.b64encode(image_file.read()).decode('utf-8')

for element in raw_pdf_elements:
    if 'CompositeElement' in str(type(element)):
        text_elements.append(element)
    elif 'Table' in str(type(element)):
        table_elements.append(element)

table_elements = [i.text for i in table_elements]
text_elements = [i.text for i in text_elements]

In [None]:
image_dir = img_output_dir
for image_file in os.listdir(image_dir):
    if image_file.endswith(('.png', '.jpg', '.jpeg')):
        image_path = os.path.join(image_dir, image_file)
        encoded_image = encode_image(image_path)
        image_elements.append(encoded_image)

In [None]:
len(image_elements)

In [None]:
from IPython.display import display, Image
if image_elements:
    image_data = base64.b64decode(image_elements[0])
    display(Image(image_data))

# summaries

In [None]:
import os
os.environ["OPENAI_API_KEY"] = ""

In [None]:
from langchain.chat_models import ChatOpenAI
from langchain.schema.messages import HumanMessage, AIMessage

chain_gpt_35 = ChatOpenAI(model="gpt-3.5-turbo", max_tokens=1024)
chain_gpt_4_vision = ChatOpenAI(model="gpt-4o", max_tokens=1024)

# Function for text summaries
def summarize_text(text_element):
    prompt = f"Summarize the following text:\n\n{text_element}\n\nSummary:"
    response = chain_gpt_35.invoke([HumanMessage(content=prompt)])
    return response.content

# Function for table summaries
def summarize_table(table_element):
    prompt = f"Summarize the following table:\n\n{table_element}\n\nSummary:"
    response = chain_gpt_35.invoke([HumanMessage(content=prompt)])
    return response.content

# Function for image summaries
def summarize_image(encoded_image):
    prompt = [
        AIMessage(content="You are a bot that is good at analyzing images."),
        HumanMessage(content=[
            {"type": "text", "text": "Describe the contents of this image."},
            {
                "type": "image_url",
                "image_url": {
                    "url": f"data:image/jpeg;base64,{encoded_image}"
                },
            },
        ])
    ]
    response = chain_gpt_4_vision.invoke(prompt)
    return response.content

In [None]:
table_summaries = []
for i, te in enumerate(table_elements):
    summary = summarize_table(te)
    table_summaries.append(summary)

In [None]:
text_summaries = []
for i, te in enumerate(text_elements):
    summary = summarize_text(te)
    text_summaries.append(summary)

In [None]:
import base64
def get_image_size(image_element):
    image_data = base64.b64decode(image_element)
    size_in_bytes = len(image_data)
    size_in_mb = size_in_bytes / (1024 * 1024)
    return size_in_mb


# Processing image elements with feedback and sleep
image_summaries = []
for i, ie in enumerate(image_elements):
    summary = summarize_image(ie)
    image_summaries.append(summary)

# faiss

In [None]:
import os
import uuid
from langchain.embeddings import OpenAIEmbeddings
from langchain.schema.document import Document
from langchain.vectorstores import FAISS
# Create Documents and Vectorstore
documents = []
retrieve_contents = []

for e, s in zip(text_elements, text_summaries):
    i = str(uuid.uuid4())
    doc = Document(page_content=s, metadata={'id': i, 'type': 'text', 'original_content': e } )
    retrieve_contents.append((i, e))
    documents.append(doc)

for e, s in zip(table_elements, table_summaries):
    i = str(uuid.uuid4())
    doc = Document(page_content=s, metadata={'id': i, 'type': 'table', 'original_content': e})
    retrieve_contents.append((i, e))
    documents.append(doc)

for e, s in zip(image_elements, image_summaries):
    i = str(uuid.uuid4())
    doc = Document(page_content=s, metadata={'id': i, 'type': 'image', 'original_content': e})
    retrieve_contents.append((i, e))
    documents.append(doc)

# Create the vector database
vectorstore = FAISS.from_documents(documents=documents, embedding=OpenAIEmbeddings(openai_api_key=os.environ["OPENAI_API_KEY"]))
#
vectorstore.save_local("faiss_"+vs_prefix)

In [None]:
import base64
from openai import OpenAI
from langchain.vectorstores import FAISS
from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate
from IPython.display import display, Image
from langchain.embeddings.base import Embeddings
from langchain.llms.base import LLM
from pydantic import BaseModel, Field

import llm_init
client = llm_init.openai_client

class MyEmbeddings(Embeddings):
    def __init__(self, client):
        super().__init__()
        self.client = client

    def embed_query(self, text):
        response = self.client.embeddings.create(
            model="text-embedding-ada-002",
            input=text,
            encoding_format="float"
        )
        return response.data[0].embedding

    def __call__(self, text):
        return self.embed_query(text)

    def embed_documents(self, texts):
        return [self.embed_query(text) for text in texts]

class MyChatLLM(LLM):
    client: OpenAI = Field(...)

    def __init__(self, client):
        super().__init__()
        self.client = client

    def _call(self, prompt, **kwargs):
        completion = self.client.chat.completions.create(
            model="gpt-3.5-turbo",
            messages=[{"role": "system", "content": "You are a helpful assistant."}, {"role": "user", "content": prompt}],
            stream=False
        )
        return completion.choices[0].message.content


# 
embeddings = MyEmbeddings(client=client)
mdb_chat_llm = MyChatLLM(client=client)

## load db

In [None]:

#
db = FAISS.load_local("faiss_"+vs_prefix, embeddings, allow_dangerous_deserialization=True)

# Define the prompt template for the LLMChain
prompt_template = """
You are an assistant tasked with summarizing tables and text.
Give a concise summary of the table or text.
Answer the question based only on the following context, which can include text, images, and tables:
{context}
Question: {question}
Don't answer if you are not sure and decline to answer and say "Sorry, I don't have much information about it."
Just return the helpful answer in as much detail as possible.
Answer:
"""

In [None]:
qa_chain = LLMChain(llm=mdb_chat_llm, prompt=PromptTemplate.from_template(prompt_template))

# Define the answer function to handle queries
def chat_with_llm_db(question): # gpt3.5
    relevant_docs = db.similarity_search(question)
    context = ""
    relevant_images = []
    relevant_image_summary = []
    for d in relevant_docs:
        if d.metadata['type'] == 'text':
            context += '[text]' + d.metadata['original_content']
        elif d.metadata['type'] == 'table':
            context += '[table]' + d.metadata['original_content']
        elif d.metadata['type'] == 'image':
            context += '[image]' + d.page_content
            relevant_images.append(d.metadata['original_content'])
            relevant_image_summary.append(d.page_content)
    result = qa_chain.run({'context': context, 'question': question})
    return result, relevant_images, relevant_image_summary

In [None]:
query = "Explain the actions of governments in terms of forest cover, carbon storage. Answer in maximum 150 words."
query = "tell about India's National Missions"
result, relevant_images, relevant_image_summary = chat_with_llm_db(query)
print(result)
print(len(relevant_images))
##    
for i in range(len(relevant_images)):
    image_data = base64.b64decode(relevant_images[i])
    display(Image(image_data))
    print(relevant_image_summary[i])