In [1]:
!pip install langchain
!pip install langchain-community
!pip install sentence-transformers
!pip install faiss-gpu

Collecting langchain-community
  Downloading langchain_community-0.3.10-py3-none-any.whl.metadata (2.9 kB)
Collecting dataclasses-json<0.7,>=0.5.7 (from langchain-community)
  Downloading dataclasses_json-0.6.7-py3-none-any.whl.metadata (25 kB)
Collecting httpx-sse<0.5.0,>=0.4.0 (from langchain-community)
  Downloading httpx_sse-0.4.0-py3-none-any.whl.metadata (9.0 kB)
Collecting langchain<0.4.0,>=0.3.10 (from langchain-community)
  Downloading langchain-0.3.10-py3-none-any.whl.metadata (7.1 kB)
Collecting langchain-core<0.4.0,>=0.3.22 (from langchain-community)
  Downloading langchain_core-0.3.22-py3-none-any.whl.metadata (6.3 kB)
Collecting pydantic-settings<3.0.0,>=2.4.0 (from langchain-community)
  Downloading pydantic_settings-2.6.1-py3-none-any.whl.metadata (3.5 kB)
Collecting marshmallow<4.0.0,>=3.18.0 (from dataclasses-json<0.7,>=0.5.7->langchain-community)
  Downloading marshmallow-3.23.1-py3-none-any.whl.metadata (7.5 kB)
Collecting typing-inspect<1,>=0.4.0 (from dataclasses-

In [14]:
from langchain.llms import HuggingFacePipeline
from transformers import pipeline

# Initialize a Hugging Face pipeline
hf_pipeline = pipeline("text-generation", model="gpt2", max_new_tokens=200)

# Wrap it for LangChain
model = HuggingFacePipeline(pipeline=hf_pipeline)


In [3]:
from langchain.chains import RetrievalQA

In [4]:
from langchain.document_loaders.csv_loader import CSVLoader

# Specify the encoding as 'latin1' or 'Windows-1252' when initializing CSVLoader
loader = CSVLoader(file_path='/content/codebasics_faqs.csv',
                  source_column="prompt",
                  encoding='latin1')  # Or try 'Windows-1252' if 'latin1' doesn't work

docs = loader.load()

In [6]:
from langchain_community.embeddings import EmbaasEmbeddings
import requests

embaas_api_key = "emb_1416341ad503f5f1da2bb63a9cdce0e458059c97b64fce3c"

# Using default model and instruction
embeddings = EmbaasEmbeddings(
    embaas_api_key=embaas_api_key,
    instruction="Represent the question for retrieval: "
)

e = embeddings.embed_query("What is your refund policy?")


In [7]:
len(e)

1024

In [8]:
e[:5]

[0.0011739731, -0.05053711, 0.04547119, 0.0068588257, -0.04724121]

In [10]:
from langchain.vectorstores import FAISS

vectordb = FAISS.from_documents(documents=docs,
                            embedding=embeddings)

retriever = vectordb.as_retriever(score_threshold = 0.7)

In [11]:
rdocs = retriever.get_relevant_documents("how about job placement support?")
rdocs

  rdocs = retriever.get_relevant_documents("how about job placement support?")


[Document(metadata={'source': 'Do you provide any job assistance?', 'row': 11}, page_content='prompt: Do you provide any job assistance?\nresponse: Yes, We help you with resume and interview preparation along with that we help you in building online credibility, and based on requirements we refer candidates to potential recruiters.'),
 Document(metadata={'source': 'Can I add this course to my resume? If Yes, how?', 'row': 34}, page_content='prompt: Can I add this course to my resume? If Yes, how?\nresponse: Absolutely, we have a section in this course explaining how you can add the learnings from this course in your resume that will appeal to the hiring team.'),
 Document(metadata={'source': 'Do you provide any virtual internship?', 'row': 14}, page_content='prompt: Do you provide any virtual internship?\nresponse: Yes'),
 Document(metadata={'source': 'I\x92m not sure if this bootcamp is good enough for me to invest some \nmoney. What can I do?', 'row': 4}, page_content='prompt: I\x92m

In [16]:
from langchain.prompts import PromptTemplate

prompt_template = """Given the following context and a question, generate an answer based on this context only.
In the answer try to provide as much text as possible from "response" section in the source document context without making much changes.
If the answer is not found in the context, kindly state "I don't know." Don't try to make up an answer.

CONTEXT: {context}

QUESTION: {question}"""


PROMPT = PromptTemplate(
    template=prompt_template, input_variables=["context", "question"]
)
chain_type_kwargs = {"prompt": PROMPT}


from langchain.chains import RetrievalQA

chain = RetrievalQA.from_chain_type(llm=model,
                            chain_type="stuff",
                            retriever=retriever,
                            input_key="query",
                            return_source_documents=True,
                            chain_type_kwargs=chain_type_kwargs)



In [17]:
chain('Do you provide job assistance and also do you provide job gurantee?')


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


{'query': 'Do you provide job assistance and also do you provide job gurantee?',
 'result': 'Given the following context and a question, generate an answer based on this context only.\nIn the answer try to provide as much text as possible from "response" section in the source document context without making much changes.\nIf the answer is not found in the context, kindly state "I don\'t know." Don\'t try to make up an answer.\n\nCONTEXT: prompt: Do you provide any job assistance?\nresponse: Yes, We help you with resume and interview preparation along with that we help you in building online credibility, and based on requirements we refer candidates to potential recruiters.\n\nprompt: Do you provide any virtual internship?\nresponse: Yes\n\nprompt: Will this course guarantee me a job?\nresponse: We created a much lighter version of this course on YouTube available for free (click this link) and many people gave us feedback that they were able to fetch jobs (see testimonials). Now this p

In [18]:
chain("Do you guys provide internship and also do you offer EMI payments?")


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


{'query': 'Do you guys provide internship and also do you offer EMI payments?',
 'result': 'Given the following context and a question, generate an answer based on this context only.\nIn the answer try to provide as much text as possible from "response" section in the source document context without making much changes.\nIf the answer is not found in the context, kindly state "I don\'t know." Don\'t try to make up an answer.\n\nCONTEXT: prompt: Do you provide any virtual internship?\nresponse: Yes\n\nprompt: Do we have an EMI option?\nresponse: No\n\nprompt: Do you provide any job assistance?\nresponse: Yes, We help you with resume and interview preparation along with that we help you in building online credibility, and based on requirements we refer candidates to potential recruiters.\n\nprompt: I\x92m not sure if this bootcamp is good enough for me to invest some \nmoney. What can I do?\nresponse: We got you covered. Go ahead and watch our youtube videos if you like them and want to 