#### Simple App

In [1]:
import os
from dotenv import load_dotenv
load_dotenv()

os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")

## langsmith tracing
os.environ["LANGCHAIN_API_KEY"] = os.getenv("LANGCHAIN_API_KEY")
os.environ["LANGCHAIN_TRACING_V2"] = "true"
os.environ["LANGCHAIN_PROJECT"] = os.getenv("LANGCHAIN_PROJECT")

In [2]:
## Data Ingestion -  From the website to scrape the data

from langchain_community.document_loaders import WebBaseLoader

  from .autonotebook import tqdm as notebook_tqdm
USER_AGENT environment variable not set, consider setting it to identify your requests.


In [3]:
loader=WebBaseLoader("https://docs.smith.langchain.com/tutorials/Administrators/manage_spend")
loader

<langchain_community.document_loaders.web_base.WebBaseLoader at 0x1eb20e07700>

In [4]:
docs = loader.load()
docs

[Document(metadata={'source': 'https://docs.smith.langchain.com/tutorials/Administrators/manage_spend', 'title': 'ü¶úÔ∏èüõ†Ô∏è LangSmith', 'language': 'en'}, page_content='\n\n\n\n\nü¶úÔ∏èüõ†Ô∏è LangSmith\n\n\n\n\n\n\n\n\nSkip to main contentLangSmith docs have moved! Find the LangSmith docs at the new LangChain Docs site.API ReferenceRESTPythonJS/TSSearchRegionUSEUGo to AppPage Not FoundWe could not find what you were looking for.Head back to our main docs page or use the search bar to find the page you need.CommunityLangChain ForumTwitterGitHubDocs CodeLangSmith SDKPythonJS/TSMoreHomepageBlogLangChain Python DocsLangChain JS/TS DocsCopyright ¬© 2026 LangChain, Inc.\n\n')]

In [None]:
### Load Data ---> Docs ---> Divide our docs into chunks documents ---> text ---> Vectors ---> Vector Embeddings ---> Store in VectorDB
from langchain_text_splitters import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(chunk_size = 1000, chunk_overlap=200)
splitted_docs = text_splitter.split_documents(docs)
splitted_docs

[Document(metadata={'source': 'https://docs.smith.langchain.com/tutorials/Administrators/manage_spend', 'title': 'ü¶úÔ∏èüõ†Ô∏è LangSmith', 'language': 'en'}, page_content='ü¶úÔ∏èüõ†Ô∏è LangSmith\n\n\n\n\n\n\n\n\nSkip to main contentLangSmith docs have moved! Find the LangSmith docs at the new LangChain Docs site.API ReferenceRESTPythonJS/TSSearchRegionUSEUGo to AppPage Not FoundWe could not find what you were looking for.Head back to our main docs page or use the search bar to find the page you need.CommunityLangChain ForumTwitterGitHubDocs CodeLangSmith SDKPythonJS/TSMoreHomepageBlogLangChain Python DocsLangChain JS/TS DocsCopyright ¬© 2026 LangChain, Inc.')]

In [6]:
## Convert These Text into Vectors
from langchain_openai import OpenAIEmbeddings
embeddings = OpenAIEmbeddings()


In [7]:
## Store these vector into some kind of vector Database
from langchain_community.vectorstores import FAISS
vectorstoredb = FAISS.from_documents(splitted_docs, embeddings)

In [8]:
vectorstoredb

<langchain_community.vectorstores.faiss.FAISS at 0x1eb23cde1d0>

In [9]:
## Query from a vectore store DB
query = "LangSmith has two usage limits: total traces and extended"
result = vectorstoredb.similarity_search(query)
result[0].page_content

'ü¶úÔ∏èüõ†Ô∏è LangSmith\n\n\n\n\n\n\n\n\nSkip to main contentLangSmith docs have moved! Find the LangSmith docs at the new LangChain Docs site.API ReferenceRESTPythonJS/TSSearchRegionUSEUGo to AppPage Not FoundWe could not find what you were looking for.Head back to our main docs page or use the search bar to find the page you need.CommunityLangChain ForumTwitterGitHubDocs CodeLangSmith SDKPythonJS/TSMoreHomepageBlogLangChain Python DocsLangChain JS/TS DocsCopyright ¬© 2026 LangChain, Inc.'

In [15]:
from langchain_openai import ChatOpenAI
llm = ChatOpenAI(model="gpt-4o", temperature=0)

In [54]:
## Retrieval Chain, Document Chain
from langchain_classic.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate

prompt = ChatPromptTemplate.from_template(
    """
        Answer the following question based only on the provided context:
        <context>
        {context}
        </context>
        Question: {input}
    """
)

document_chain = create_stuff_documents_chain(llm, prompt)
document_chain

RunnableBinding(bound=RunnableBinding(bound=RunnableAssign(mapper={
  context: RunnableLambda(format_docs)
}), kwargs={}, config={'run_name': 'format_inputs'}, config_factories=[])
| ChatPromptTemplate(input_variables=['context', 'input'], input_types={}, partial_variables={}, messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context', 'input'], input_types={}, partial_variables={}, template='\n        Answer the following question based only on the provided context:\n        <context>\n        {context}\n        </context>\n        Question: {input}\n    '), additional_kwargs={})])
| ChatOpenAI(profile={'max_input_tokens': 128000, 'max_output_tokens': 16384, 'image_inputs': True, 'audio_inputs': False, 'video_inputs': False, 'image_outputs': False, 'audio_outputs': False, 'video_outputs': False, 'reasoning_output': False, 'tool_calling': True, 'structured_output': True, 'image_url_inputs': True, 'pdf_inputs': True, 'pdf_tool_message': True, 'image_tool_messa

In [55]:
from langchain_core.documents import Document
document_chain.invoke({
    "input":"LangSmith has two usage limits: total traces and extended",
    "context":[Document(page_content="LangSmith has two usage limits: total traces and extended traces. These correspond to the two metrics we've been tracking on our usage graph. ")]
})



"traces. These correspond to the two metrics we've been tracking on our usage graph."

However, we want the documents to first come from the retriever we just set up. That way, we can use the retriever to dynamically select the most relevant documents and pass those in for a given questions.

In [56]:
## Input --> Retriever --> VectorestoreDB

from langchain_classic.chains.retrieval import create_retrieval_chain
retriever = vectorstoredb.as_retriever() 
retriever_chain = create_retrieval_chain(retriever, document_chain)
retriever_chain

RunnableBinding(bound=RunnableAssign(mapper={
  context: RunnableBinding(bound=RunnableLambda(lambda x: x['input'])
           | VectorStoreRetriever(tags=['FAISS', 'OpenAIEmbeddings'], vectorstore=<langchain_community.vectorstores.faiss.FAISS object at 0x000001EB23CDE1D0>, search_kwargs={}), kwargs={}, config={'run_name': 'retrieve_documents'}, config_factories=[])
})
| RunnableAssign(mapper={
    answer: RunnableBinding(bound=RunnableBinding(bound=RunnableAssign(mapper={
              context: RunnableLambda(format_docs)
            }), kwargs={}, config={'run_name': 'format_inputs'}, config_factories=[])
            | ChatPromptTemplate(input_variables=['context', 'input'], input_types={}, partial_variables={}, messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context', 'input'], input_types={}, partial_variables={}, template='\n        Answer the following question based only on the provided context:\n        <context>\n        {context}\n        </contex

In [57]:
## Get the response from the LLM
response = retriever_chain.invoke({
     "input":"LangSmith has two usage limits: total traces and extended",
})

response['answer']

"The provided context does not contain specific information about LangSmith's usage limits regarding total traces and extended usage."

In [58]:
response

{'input': 'LangSmith has two usage limits: total traces and extended',
 'context': [Document(id='acbab23d-0c98-4da2-bd70-b613417da105', metadata={'source': 'https://docs.smith.langchain.com/tutorials/Administrators/manage_spend', 'title': 'ü¶úÔ∏èüõ†Ô∏è LangSmith', 'language': 'en'}, page_content='ü¶úÔ∏èüõ†Ô∏è LangSmith\n\n\n\n\n\n\n\n\nSkip to main contentLangSmith docs have moved! Find the LangSmith docs at the new LangChain Docs site.API ReferenceRESTPythonJS/TSSearchRegionUSEUGo to AppPage Not FoundWe could not find what you were looking for.Head back to our main docs page or use the search bar to find the page you need.CommunityLangChain ForumTwitterGitHubDocs CodeLangSmith SDKPythonJS/TSMoreHomepageBlogLangChain Python DocsLangChain JS/TS DocsCopyright ¬© 2026 LangChain, Inc.')],
 'answer': "The provided context does not contain specific information about LangSmith's usage limits regarding total traces and extended usage."}

In [46]:
response['context']

[Document(id='acbab23d-0c98-4da2-bd70-b613417da105', metadata={'source': 'https://docs.smith.langchain.com/tutorials/Administrators/manage_spend', 'title': 'ü¶úÔ∏èüõ†Ô∏è LangSmith', 'language': 'en'}, page_content='ü¶úÔ∏èüõ†Ô∏è LangSmith\n\n\n\n\n\n\n\n\nSkip to main contentLangSmith docs have moved! Find the LangSmith docs at the new LangChain Docs site.API ReferenceRESTPythonJS/TSSearchRegionUSEUGo to AppPage Not FoundWe could not find what you were looking for.Head back to our main docs page or use the search bar to find the page you need.CommunityLangChain ForumTwitterGitHubDocs CodeLangSmith SDKPythonJS/TSMoreHomepageBlogLangChain Python DocsLangChain JS/TS DocsCopyright ¬© 2026 LangChain, Inc.')]

In [None]:
from langchain_community.vectorstores import FAISS
from langchain_openai import OpenAIEmbeddings

# embeddings
embeddings = OpenAIEmbeddings()

# vectorstore (assuming you already created it)
vectorstore = FAISS.from_documents(docs, embeddings)

retriever = vectorstore.as_retriever(search_kwargs={"k": 4})


In [22]:
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnablePassthrough
from langchain_openai import ChatOpenAI

llm = ChatOpenAI(model="gpt-4o-mini")

prompt = ChatPromptTemplate.from_template(
    """
    Answer the question based only on the context below:

    {context}

    Question: {question}
    """
)

rag_chain = (
    {
        "context": retriever,   # retriever = vectorstore.as_retriever()
        "question": RunnablePassthrough()
    }
    | prompt
    | llm
)

rag_chain.invoke("What is LangSmith?")


AIMessage(content='LangSmith appears to be a documentation platform related to LangChain, providing resources and information about its functionality and usage. The document suggests it includes an API reference and links to various programming language SDKs, such as Python and JavaScript/TypeScript. However, the specific details regarding what LangSmith is or its features are not clearly outlined in the provided context.', additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 72, 'prompt_tokens': 240, 'total_tokens': 312, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_provider': 'openai', 'model_name': 'gpt-4o-mini-2024-07-18', 'system_fingerprint': 'fp_c4585b5b9c', 'id': 'chatcmpl-CwMAJMJhytTiN4Od1ZYOHIKu1a1H8', 'service_tier': 'default', 'finish_reason': 'stop', 'logprobs': None}, 

In [19]:
import langchain
print(langchain.__version__)

import sys
print(sys.executable)

import langchain
print(langchain.__version__)
print(langchain.__file__)

1.2.0
c:\Users\amank\.conda\envs\langchain_env\python.exe
1.2.0
c:\Users\amank\.conda\envs\langchain_env\lib\site-packages\langchain\__init__.py
