In [56]:
from langchain_community.document_loaders import WebBaseLoader
# from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_openai import OpenAIEmbeddings
from langchain.vectorstores import FAISS
from langchain_openai import ChatOpenAI

import os
from dotenv import load_dotenv

In [57]:
## load all the environment variables
load_dotenv()

True

In [58]:
## load the document
loader = WebBaseLoader(web_path='https://docs.smith.langchain.com/')
loader

<langchain_community.document_loaders.web_base.WebBaseLoader at 0x2710f6a3c80>

In [59]:
docs = loader.load()

docs

[Document(metadata={'source': 'https://docs.smith.langchain.com/', 'title': 'Get started with LangSmith | 🦜️🛠️ LangSmith', 'description': 'LangSmith is a platform for building production-grade LLM applications.', 'language': 'en'}, page_content="\n\n\n\n\nGet started with LangSmith | 🦜️🛠️ LangSmith\n\n\n\n\n\n\nSkip to main contentJoin us at  Interrupt: The Agent AI Conference by LangChain on May 13 & 14 in San Francisco!API ReferenceRESTPythonJS/TSSearchRegionUSEUGo to AppGet StartedObservabilityEvaluationPrompt EngineeringDeployment (LangGraph Platform)AdministrationSelf-hostingPricingReferenceCloud architecture and scalabilityAuthz and AuthnAuthentication methodsdata_formatsEvaluationDataset transformationsRegions FAQsdk_referenceGet StartedOn this pageGet started with LangSmith\nLangSmith is a platform for building production-grade LLM applications.\nIt allows you to closely monitor and evaluate your application, so you can ship quickly and with confidence.\nObservabilityAnalyze tr

In [60]:
## split the document into chunks
text_spliter = RecursiveCharacterTextSplitter(chunk_size=1000,chunk_overlap=200)
documents = text_spliter.split_documents(docs)

documents

[Document(metadata={'source': 'https://docs.smith.langchain.com/', 'title': 'Get started with LangSmith | 🦜️🛠️ LangSmith', 'description': 'LangSmith is a platform for building production-grade LLM applications.', 'language': 'en'}, page_content='Get started with LangSmith | 🦜️🛠️ LangSmith'),
 Document(metadata={'source': 'https://docs.smith.langchain.com/', 'title': 'Get started with LangSmith | 🦜️🛠️ LangSmith', 'description': 'LangSmith is a platform for building production-grade LLM applications.', 'language': 'en'}, page_content='Skip to main contentJoin us at  Interrupt: The Agent AI Conference by LangChain on May 13 & 14 in San Francisco!API ReferenceRESTPythonJS/TSSearchRegionUSEUGo to AppGet StartedObservabilityEvaluationPrompt EngineeringDeployment (LangGraph Platform)AdministrationSelf-hostingPricingReferenceCloud architecture and scalabilityAuthz and AuthnAuthentication methodsdata_formatsEvaluationDataset transformationsRegions FAQsdk_referenceGet StartedOn this pageGet star

In [61]:
## Generate the embeddings
embeddings = OpenAIEmbeddings()

In [62]:
## store the embeddings to vector store
vectorstoredb = FAISS.from_documents(documents, embeddings)

vectorstoredb

<langchain_community.vectorstores.faiss.FAISS at 0x2710e4d0050>

In [63]:
## semantic search
query = "AI applications involve writing prompts to instruct the LLM on what to do"
result = vectorstoredb.similarity_search(query)

result[0].page_content

'Get started by adding tracing to your application.\nCreate dashboards to view key metrics like RPS, error rates and costs.\n\nEvals\u200b\nThe quality and development speed of AI applications depends on high-quality evaluation datasets and metrics to test and optimize your applications on. The LangSmith SDK and UI make building and running high-quality evaluations easy.\n\nGet started by creating your first evaluation.\nQuickly assess the performance of your application using our off-the-shelf evaluators (Python only) as a starting point.\nAnalyze results of evaluations in the LangSmith UI and compare results over time.\nEasily collect human feedback on your data to improve your application.\n\nPrompt Engineering\u200b\nWhile traditional software applications are built by writing code, AI applications involve writing prompts to instruct the LLM on what to do. LangSmith provides a set of tools designed to enable and facilitate prompt engineering to help you find the perfect prompt for 

In [None]:
## load the llm 
llm  = ChatOpenAI(model='gpt-4o')

ChatOpenAI(client=<openai.resources.chat.completions.completions.Completions object at 0x0000027127796AB0>, async_client=<openai.resources.chat.completions.completions.AsyncCompletions object at 0x000002710E4CC770>, root_client=<openai.OpenAI object at 0x000002710C104080>, root_async_client=<openai.AsyncOpenAI object at 0x0000027127796B10>, model_name='gpt-4o', model_kwargs={}, openai_api_key=SecretStr('**********'))

In [67]:
## ask a simple query
testQuery = 'What is the captial of Pakistan.'
result = llm.invoke(testQuery)

result

AIMessage(content='The capital of Pakistan is Islamabad.', additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 8, 'prompt_tokens': 15, 'total_tokens': 23, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_name': 'gpt-4o-2024-08-06', 'system_fingerprint': 'fp_f9f4fb6dbf', 'finish_reason': 'stop', 'logprobs': None}, id='run-c650f814-c2ee-477f-b320-18653dfc47ae-0', usage_metadata={'input_tokens': 15, 'output_tokens': 8, 'total_tokens': 23, 'input_token_details': {'audio': 0, 'cache_read': 0}, 'output_token_details': {'audio': 0, 'reasoning': 0}})

In [None]:
## Retrieval Chain, Document chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain.prompts import ChatPromptTemplate

prompt = ChatPromptTemplate.from_template(
    """
Answer the following question based only on the provided context:
<context>{context}</context>
"""
)

prompt

ChatPromptTemplate(input_variables=['context'], input_types={}, partial_variables={}, messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context'], input_types={}, partial_variables={}, template='\nAnswer the following question based only on the provided context:\n<context>{context}</context>\n'), additional_kwargs={})])

In [70]:
document_chain = create_stuff_documents_chain(llm, prompt)
document_chain

RunnableBinding(bound=RunnableBinding(bound=RunnableAssign(mapper={
  context: RunnableLambda(format_docs)
}), kwargs={}, config={'run_name': 'format_inputs'}, config_factories=[])
| ChatPromptTemplate(input_variables=['context'], input_types={}, partial_variables={}, messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context'], input_types={}, partial_variables={}, template='\nAnswer the following question based only on the provided context:\n<context>{context}</context>\n'), additional_kwargs={})])
| ChatOpenAI(client=<openai.resources.chat.completions.completions.Completions object at 0x0000027127796AB0>, async_client=<openai.resources.chat.completions.completions.AsyncCompletions object at 0x000002710E4CC770>, root_client=<openai.OpenAI object at 0x000002710C104080>, root_async_client=<openai.AsyncOpenAI object at 0x0000027127796B10>, model_name='gpt-4o', model_kwargs={}, openai_api_key=SecretStr('**********'))
| StrOutputParser(), kwargs={}, config={'run_

In [75]:
from langchain_core.documents import Document
document_chain.invoke({
    "input":"LangSmith has two usage limits: total traces and extended",
    "context":[Document(page_content="LangSmith has two usage limits: total traces and extended traces. These correspond to the two metrics we've been tracking on our usage graph. ")]
})

'LangSmith has two usage limits: total traces and extended traces.'

In [78]:
retriever = vectorstoredb.as_retriever()
from langchain.chains import create_retrieval_chain

retrieval_chain = create_retrieval_chain(retriever, document_chain)

retrieval_chain

RunnableBinding(bound=RunnableAssign(mapper={
  context: RunnableBinding(bound=RunnableLambda(lambda x: x['input'])
           | VectorStoreRetriever(tags=['FAISS', 'OpenAIEmbeddings'], vectorstore=<langchain_community.vectorstores.faiss.FAISS object at 0x000002710E4D0050>, search_kwargs={}), kwargs={}, config={'run_name': 'retrieve_documents'}, config_factories=[])
})
| RunnableAssign(mapper={
    answer: RunnableBinding(bound=RunnableBinding(bound=RunnableAssign(mapper={
              context: RunnableLambda(format_docs)
            }), kwargs={}, config={'run_name': 'format_inputs'}, config_factories=[])
            | ChatPromptTemplate(input_variables=['context'], input_types={}, partial_variables={}, messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context'], input_types={}, partial_variables={}, template='\nAnswer the following question based only on the provided context:\n<context>{context}</context>\n'), additional_kwargs={})])
            | ChatOpen

In [None]:
retriever=vectorstoredb.as_retriever()
from langchain.chains import create_retrieval_chain
retrieval_chain=create_retrieval_chain(retriever,document_chain)


In [79]:
## Get the response form the LLM
response=retrieval_chain.invoke({"input":"LangSmith has two usage limits: total traces and extended"})
response['answer']

"What is the purpose of LangSmith?\n\nLangSmith is a platform for building production-grade LLM (Large Language Model) applications. It provides observability features to monitor and evaluate applications, aids in prompt engineering, and facilitates the creation and analysis of evaluation datasets to ensure high-quality AI application development. LangSmith integrates with LangChain's open source frameworks and helps in all stages of application development, from prototyping to production."

In [80]:
response

{'input': 'LangSmith has two usage limits: total traces and extended',
 'context': [Document(id='eaa0416d-a822-4aed-b001-b50c413979b6', metadata={'source': 'https://docs.smith.langchain.com/', 'title': 'Get started with LangSmith | 🦜️🛠️ LangSmith', 'description': 'LangSmith is a platform for building production-grade LLM applications.', 'language': 'en'}, page_content="LangSmith + LangChain OSSLangSmith integrates seamlessly with LangChain's open source frameworks langchain and langgraph, with no extra instrumentation needed.If you're already using either of these, see the how-to guide for setting up LangSmith with LangChain or setting up LangSmith with LangGraph.\nObservability\u200b\nObservability is important for any software application, but especially so for LLM applications. LLMs are non-deterministic by nature, meaning they can produce unexpected results. This makes them trickier than normal to debug.\nThis is where LangSmith can help! LangSmith has LLM-native observability, all