### Install Required Libraries

In [1]:
!pip install langchain \
  langchain_community \
  InstructorEmbedding \
  sentence-transformers==2.2.2 \
  langchain-google-genai \
  faiss-cpu \
  pillow --quiet

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m974.6/974.6 kB[0m [31m11.9 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.2/2.2 MB[0m [31m41.5 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m86.0/86.0 kB[0m [31m8.0 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m27.0/27.0 MB[0m [31m48.6 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m321.8/321.8 kB[0m [31m28.0 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m127.1/127.1 kB[0m [31m13.3 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m49.2/49.2 kB[0m [31m5.7 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m145.0/145.0 kB[0m [31m14.0

### Setup API and Load Gemini-Pro

In [2]:
from langchain_google_genai import ChatGoogleGenerativeAI

In [3]:
api_key = "YOUR_API_KEY_HERE"

In [4]:
llm = ChatGoogleGenerativeAI(model="gemini-1.5-pro-latest",google_api_key=api_key)
result = llm.invoke("Write a 4 line poem of my love for chai")
print(result.content)

Golden brown, a crispy shell,
Spicy bliss, a savory spell.
With chutney dipped, a perfect bite,
Oh, samosa, my heart's delight! 



### Create Data Loader

In [5]:
# Import the CSV File, Create a Langchain Loader
from langchain.document_loaders import CSVLoader
loader = CSVLoader("/content/codebasics_faqs.csv",source_column="prompt", encoding="latin-1") # Try loading with Latin-1 encoding
data = loader.load()

### Create Embedding

In [None]:
from langchain.embeddings import HuggingFaceInstructEmbeddings

instructor_embeddings = HuggingFaceInstructEmbeddings()
e = instructor_embeddings.embed_query("What is your refund policy?")

### Creating FASSIS Vector Database

In [7]:
from langchain.vectorstores import FAISS

In [8]:
vectordb = FAISS.from_documents(documents= data,
                                embedding = instructor_embeddings) # Creating a Vector DB out of the CSV File using an embedding

In [9]:
vectordb.save_local("faiss_index") # We can also save the vector Database Locally

In [10]:
retriever = vectordb.as_retriever()

#### Creating a Retriver Object From the Vector Database

* The job of this object is, it will take an input
* Convert input to embedding vector
* Pull the similar vectors from the vector database

In [15]:
rdocs = retriever.invoke("Do you have placement support") # Fetching Relevent Documents/Vectors based on a sentence
rdocs

[Document(page_content='prompt: Do you provide any virtual internship?\nresponse: Yes', metadata={'source': 'Do you provide any virtual internship?', 'row': 14}),
 Document(page_content='prompt: Do you provide any job assistance?\nresponse: Yes, We help you with resume and interview preparation along with that we help you in building online credibility, and based on requirements we refer candidates to potential recruiters.', metadata={'source': 'Do you provide any job assistance?', 'row': 11}),
 Document(page_content='prompt: Does this bootcamp have lifetime access?\nresponse: Yes', metadata={'source': 'Does this bootcamp have lifetime access?', 'row': 7}),
 Document(page_content='prompt: Do we have an EMI option?\nresponse: No', metadata={'source': 'Do we have an EMI option?', 'row': 13})]

### Making a prompt template and controlling hallucination

In [19]:
from langchain.prompts import PromptTemplate

prompt_template = """Given the following context and a question, generate an answer based on this context only.
In the answer try to provide as much text as possible from "response" section in the source document context without making much changes.
If the answer is not found in the context, kindly state "Sorry, I do not have answer for this question." Don't try to make up an answer.

CONTEXT: {context}

QUESTION: {question}"""

prompt = PromptTemplate(
    template=prompt_template,
    input_variables=["context", "question"]
    )

### Make a prompt with the relevant docs using RetrivalQA class

In [20]:
from langchain.chains import RetrievalQA

chain = RetrievalQA.from_chain_type(llm = llm,
           chain_type = "stuff", # There are two types of chain stuff and map-reduce
           retriever = retriever,
           input_key = "query",
           return_source_documents = True, # When you get an answer, return source from the csv file
           chain_type_kwargs = {"prompt" : prompt})

In [21]:
chain.invoke("Do you guys provide intenrship and also do you offer EMI payment")

{'query': 'Do you guys provide intenrship and also do you offer EMI payment',
 'result': 'Sorry, I do not have answer for this question. \n',
 'source_documents': [Document(page_content='prompt: Do we have an EMI option?\nresponse: No', metadata={'source': 'Do we have an EMI option?', 'row': 13}),
  Document(page_content='prompt: Do you provide any virtual internship?\nresponse: Yes', metadata={'source': 'Do you provide any virtual internship?', 'row': 14}),
  Document(page_content='prompt: Do you provide any job assistance?\nresponse: Yes, We help you with resume and interview preparation along with that we help you in building online credibility, and based on requirements we refer candidates to potential recruiters.', metadata={'source': 'Do you provide any job assistance?', 'row': 11}),
  Document(page_content='prompt: Does this bootcamp have lifetime access?\nresponse: Yes', metadata={'source': 'Does this bootcamp have lifetime access?', 'row': 7})]}

In [18]:
chain.invoke("Do you have javascript course")

{'query': 'Do you have javascript course',
 'result': "This document doesn't contain the answer to that question. It only talks about the prerequisites and target audience for a data analytics course or bootcamp. \n",
 'source_documents': [Document(page_content='prompt: I have never done programming and belong to a non-technical background. Can I take this course?\nresponse: Yes, this is the perfect course for anyone who has never done coding and wants to build a career in the IT/Data Analytics industry or just wants to perform better in their current job or business using data.', metadata={'source': 'I have never done programming and belong to a non-technical background. Can I take this course?', 'row': 24}),
  Document(page_content='prompt: I have never done programming in my life. Can I take this bootcamp?\nresponse: Yes, this is the perfect bootcamp for anyone who has never done coding and wants to build a career in the IT/Data Analytics industry or just wants to perform better in 