In [2]:
import os
import fitz  # PyMuPDF
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_google_genai import GoogleGenerativeAIEmbeddings, ChatGoogleGenerativeAI
from langchain.vectorstores import FAISS
from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate
from langchain.memory import ConversationBufferMemory
from dotenv import load_dotenv
import google.generativeai as genai
import warnings
warnings.filterwarnings("ignore")

In [3]:
# Load environment variables
load_dotenv()
my_api_key = os.getenv("GOOGLE_API_KEY")

In [5]:
# Open the PDF file
pdf_document = fitz.open("AI and Its Importance in Digital Transformation.pdf")

# Extract text from the PDF
text = ""
for page_num in range(len(pdf_document)):
    page = pdf_document.load_page(page_num)
    text += page.get_text()

# Close the document
pdf_document.close()

# Print or process the extracted text
print(text)

AI and Its Importance in Digital Transformation 
In the rapidly evolving digital landscape, Artificial Intelligence (AI) stands at the forefront of 
technological advancements, playing a pivotal role in digital transformation across various 
industries. Digital transformation refers to the integration of digital technologies into all 
areas of business, fundamentally changing how companies operate and deliver value to 
customers. AI enhances this transformation by enabling organizations to automate 
processes, gain deeper insights from data, and create personalized customer experiences, 
thereby driving innovation, efficiency, and competitiveness. 
1. Automation and Efficiency 
AI enables automation of repetitive tasks, allowing businesses to focus on more strategic 
and creative activities. Automation powered by AI can streamline operations, reduce human 
error, and increase efficiency in processes ranging from customer service to supply chain 
management. For instance, AI-driven chat

In [6]:
# Split the text into chunks
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,
    chunk_overlap=200,  
    length_function=len,
    is_separator_regex=False,
)
chunks = text_splitter.create_documents([text])

In [7]:
chunks

[Document(page_content='AI and Its Importance in Digital Transformation \nIn the rapidly evolving digital landscape, Artificial Intelligence (AI) stands at the forefront of \ntechnological advancements, playing a pivotal role in digital transformation across various \nindustries. Digital transformation refers to the integration of digital technologies into all \nareas of business, fundamentally changing how companies operate and deliver value to \ncustomers. AI enhances this transformation by enabling organizations to automate \nprocesses, gain deeper insights from data, and create personalized customer experiences, \nthereby driving innovation, efficiency, and competitiveness. \n1. Automation and Efficiency \nAI enables automation of repetitive tasks, allowing businesses to focus on more strategic \nand creative activities. Automation powered by AI can streamline operations, reduce human \nerror, and increase efficiency in processes ranging from customer service to supply chain'),
 Do

In [8]:
# Extract text content from Document objects
chunks = [doc.page_content for doc in chunks]

In [9]:
chunks

['AI and Its Importance in Digital Transformation \nIn the rapidly evolving digital landscape, Artificial Intelligence (AI) stands at the forefront of \ntechnological advancements, playing a pivotal role in digital transformation across various \nindustries. Digital transformation refers to the integration of digital technologies into all \nareas of business, fundamentally changing how companies operate and deliver value to \ncustomers. AI enhances this transformation by enabling organizations to automate \nprocesses, gain deeper insights from data, and create personalized customer experiences, \nthereby driving innovation, efficiency, and competitiveness. \n1. Automation and Efficiency \nAI enables automation of repetitive tasks, allowing businesses to focus on more strategic \nand creative activities. Automation powered by AI can streamline operations, reduce human \nerror, and increase efficiency in processes ranging from customer service to supply chain',
 'and creative activities.

In [10]:
# Create embeddings and store them in FAISS
embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001", google_api_key=my_api_key)
db = FAISS.from_texts(chunks, embedding=embeddings)
db.save_local("faiss_index")  # Correct method to save FAISS index

In [11]:
# Set up retriever
retriever = db.as_retriever()

In [12]:
# Initialize LLM model
llm = ChatGoogleGenerativeAI(model="gemini-pro", temperature=0.3)

In [14]:
# Define prompt template
template = """
<s>
Using the information contained in the context,
provide a comprehensive answer to the question.
Respond only to the question asked, ensuring your response is concise and relevant.
Reference the source document number when applicable.
If the answer cannot be determined from the context, state "unknown context."

Use the following context (delimited by <ctx></ctx>) and the chat history (delimited by <hs></hs>) to answer the question:
</s>
------
<ctx>
{context}
</ctx>
------
<hs>
{history}
</hs>
------
{question}
Answer:
"""

prompt = PromptTemplate(
    input_variables=["history", "context", "question"],
    template=template,
)


In [15]:
# Set up RetrievalQA
qa = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type='stuff',
    retriever=retriever,
    verbose=True,
    chain_type_kwargs={
        "verbose": True,
        "prompt": prompt,
        "memory": ConversationBufferMemory(
            memory_key="history",
            input_key="question"),
    },
)

In [21]:
# Run the QA system
print(qa.run("Give me the summary"))



[1m> Entering new RetrievalQA chain...[0m


[1m> Entering new StuffDocumentsChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3m
<s>
Using the information contained in the context,
provide a comprehensive answer to the question.
Respond only to the question asked, ensuring your response is concise and relevant.
Reference the source document number when applicable.
If the answer cannot be determined from the context, state "unknown context."

Use the following context (delimited by <ctx></ctx>) and the chat history (delimited by <hs></hs>) to answer the question:
</s>
------
<ctx>
term success of digital transformation efforts. 
Conclusion 
AI is not just a tool for automation; it is a catalyst for digital transformation that enables 
businesses to innovate, operate more efficiently, and deliver enhanced customer 
experiences. By harnessing the power of AI, organizations can unlock new levels of 
performance, resilience, and competiti