### Load environments and libraries 

In [9]:
# Import necessary libraries
from dotenv import load_dotenv
from langchain_groq import ChatGroq
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_google_genai import GoogleGenerativeAIEmbeddings
from langchain.prompts import PromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain.schema.runnable import RunnablePassthrough
from langchain.vectorstores import FAISS
import os
import textwrap

# Load environment variables
load_dotenv()

# Initialize LLM and embedding model
llm = ChatGroq(model="deepseek-r1-distill-llama-70b")
embedding_model = GoogleGenerativeAIEmbeddings(model="models/embedding-001")

# Load PDF document from local file system
file_path = os.path.join(os.getcwd(), "data", "OpenAI_AI_Trends_Report_2025.pdf")
loader = PyPDFLoader(file_path)
documents = loader.load()

# Split the document into manageable chunks
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=500,
    chunk_overlap=150,
    length_function=len
)
docs = text_splitter.split_documents(documents)

# Create FAISS vectorstore from embedded documents
vectorstore = FAISS.from_documents(docs, embedding_model)

# Convert vectorstore to retriever for semantic search
retriever = vectorstore.as_retriever()

# Define prompt template for RAG (Retrieval-Augmented Generation)
prompt_template = """
        Answer the question based on the context provided below. 
        If the context does not contain sufficient information, respond with: 
        "I do not have enough information about this."

        Context: {context}

        Question: {question}

        Answer:"""

prompt = PromptTemplate(
    template=prompt_template,
    input_variables=["context", "question"]
)

# Function to format documents into plain text
def format_docs(docs):
    return "\n\n".join([doc.page_content for doc in docs])

# Build the RAG chain pipeline
rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

# Invoke the RAG chain with a query and pretty print the result
response = rag_chain.invoke("AI is expanding in which area mostly")
print(textwrap.fill(response, width=100))

<think> Alright, I'm trying to figure out the answer to the question "AI is expanding in which area
mostly." The context provided is a bit repetitive, but I'll parse through it to extract the
necessary information.  First, the context mentions that AI agents are performing complex tasks
autonomously. This suggests that AI is being used in areas where tasks require some level of
independence and complexity. It's not just simple automation but handling intricate operations
without human intervention.  Next, the context points out that OpenAI's AI advancements are growing
exponentially. This indicates that the development and capabilities of AI technologies are rapidly
increasing. OpenAI is a leader in the field, so their advancements would significantly impact the
industry.  Then, the context states that in 2025, the AI market is expected to reach $62.7 billion.
This figure is mentioned multiple times, emphasizing the projected growth and expansion of the AI
market as a whole.  Looking a