# install important dependencies

In [27]:
!pip install langchain
!pip install pypdf
!pip install chromadb



# import important libraries

In [28]:
from langchain import PromptTemplate
from langchain.chains.question_answering import load_qa_chain
from langchain.document_loaders import PyPDFDirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import Chroma

In [29]:
from langchain_community.document_loaders import PyPDFLoader

loader = PyPDFLoader("/content/day 1.pdf")
pages = loader.load_and_split()

In [30]:
print(pages)

[Document(page_content='Data  Science  \nInterview  Questions  \n(30 days of Interview  Preparation)', metadata={'source': '/content/day 1.pdf', 'page': 0}), Document(page_content='INEURON.AI  \n \n Page 2 \n  \nQ1.  What is the difference between AI,  Data  Science , ML, and DL ? \n \nAns 1 :  \n         \n  \n \nArtificial Intelligence : AI is purely math and scientific exercis e, but when it became computa tional , it \nstarted to solve human problems formalized into a subset of computer science. A rtificial intelligence has \nchanged the original computational statistics paradigm to the modern idea that machines could mimic \nactual human capabilities, such as deci sion making and perfo rming more “human” tasks. Modern AI into \ntwo categories  \n1. General AI - Planning, decision making, identifying objects, recognizing sounds, social & \nbusiness transactions  \n2. Applied AI - driverless/ Autonomous car or machine smartly trade st ocks \n \nMachine Learning : Instead of engineer

In [31]:
pages[0]

Document(page_content='Data  Science  \nInterview  Questions  \n(30 days of Interview  Preparation)', metadata={'source': '/content/day 1.pdf', 'page': 0})

# Split the Extracted Data into Text Chunks

In [32]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=10000, chunk_overlap=200)
context = "\n\n".join(str(p.page_content) for p in pages)


In [33]:
texts = text_splitter.split_text(context)

In [34]:
print(len(texts))

2


In [35]:
texts[0]

"Data  Science  \nInterview  Questions  \n(30 days of Interview  Preparation)\n\nINEURON.AI  \n \n Page 2 \n  \nQ1.  What is the difference between AI,  Data  Science , ML, and DL ? \n \nAns 1 :  \n         \n  \n \nArtificial Intelligence : AI is purely math and scientific exercis e, but when it became computa tional , it \nstarted to solve human problems formalized into a subset of computer science. A rtificial intelligence has \nchanged the original computational statistics paradigm to the modern idea that machines could mimic \nactual human capabilities, such as deci sion making and perfo rming more “human” tasks. Modern AI into \ntwo categories  \n1. General AI - Planning, decision making, identifying objects, recognizing sounds, social & \nbusiness transactions  \n2. Applied AI - driverless/ Autonomous car or machine smartly trade st ocks \n \nMachine Learning : Instead of engineers “teaching” or programming computers to have what they need \nto carry out tasks, that perhaps comp

In [36]:
import google.generativeai as genai

In [37]:
%pip install --upgrade --quiet  langchain-google-genai

In [38]:
from langchain_google_genai import GoogleGenerativeAIEmbeddings


In [39]:
import os
from getpass import getpass

if "GOOGLE_API_KEY" not in os.environ:
    os.environ["GOOGLE_API_KEY"] = getpass("Provide your Google API key here: ")


In [40]:
# import getpass
# import os

# if "GOOGLE_API_KEY" not in os.environ:
#     os.environ["GOOGLE_API_KEY"] = getpass("Provide your Google API key here")

# Download the Embeddings from Google

In [41]:
embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")

In [42]:
!pip install chromadb



In [43]:
from langchain_community.vectorstores import Chroma

# Create Embeddings for each of the Text Chunk and save them into a Vector Store

In [44]:
vector_index = Chroma.from_texts(texts, embeddings).as_retriever()

In [45]:
question = "What is the difference between AI,  Data  Science , ML, and DL ?"
docs = vector_index.get_relevant_documents(question)

In [46]:
docs

[Document(page_content="Data  Science  \nInterview  Questions  \n(30 days of Interview  Preparation)\n\nINEURON.AI  \n \n Page 2 \n  \nQ1.  What is the difference between AI,  Data  Science , ML, and DL ? \n \nAns 1 :  \n         \n  \n \nArtificial Intelligence : AI is purely math and scientific exercis e, but when it became computa tional , it \nstarted to solve human problems formalized into a subset of computer science. A rtificial intelligence has \nchanged the original computational statistics paradigm to the modern idea that machines could mimic \nactual human capabilities, such as deci sion making and perfo rming more “human” tasks. Modern AI into \ntwo categories  \n1. General AI - Planning, decision making, identifying objects, recognizing sounds, social & \nbusiness transactions  \n2. Applied AI - driverless/ Autonomous car or machine smartly trade st ocks \n \nMachine Learning : Instead of engineers “teaching” or programming computers to have what they need \nto carry out t

# Create a Prompt Template

In [47]:
prompt_template = """
  Answer the question as detailed as possible from the provided context, make sure to provide all the details, if the answer is not in
  provided context just say, "answer is not available in the context", don't provide the wrong answer\n\n
  Context:\n {context}?\n
  Question: \n{question}\n

  Answer:
"""

prompt = PromptTemplate(template = prompt_template, input_variables = ["context", "question"])


In [48]:
from langchain_google_genai import ChatGoogleGenerativeAI
model = ChatGoogleGenerativeAI(model="gemini-pro",
                             temperature=0.3)

In [49]:
chain = load_qa_chain(model, chain_type="stuff", prompt=prompt)

In [50]:
response = chain(
    {"input_documents":docs, "question": question}
    , return_only_outputs=True)

In [51]:
response

{'output_text': '**Artificial Intelligence (AI)** is a broad field that encompasses the study and development of intelligent agents, which are systems that can reason, learn, and act autonomously. AI has been used to create a wide range of applications, including self-driving cars, facial recognition software, and natural language processing systems.\n\n**Data Science** is a field that combines statistics, computer science, and domain knowledge to extract insights from data. Data scientists use a variety of techniques to analyze data, including machine learning, data mining, and visualization. Data science has been used to solve a wide range of problems, including fraud detection, customer segmentation, and product recommendation.\n\n**Machine Learning (ML)** is a subfield of AI that focuses on the development of algorithms that can learn from data. ML algorithms can be used to solve a variety of problems, including classification, regression, and clustering. ML has been used to create