## Simple Gen AI App Using Langchain

In [1]:
import os
from dotenv import load_dotenv
load_dotenv()

os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")
# Langsmith Tracking
os.environ["LANGCHAIN_API_KEY"] = os.getenv("LANGCHAIN_API_KEY")
# Langsmith tracing
os.environ["LANGCHAIN_PROJECT_V2"] = "true"
os.environ["LANGCHAIN_PROJECT"] = os.getenv("LANGCHAIN_PROJECT")

In [2]:
## Data Ingestion -- From the website we need ot scrap the data
from langchain_community.document_loaders import WebBaseLoader
import bs4
url = "https://docs.smith.langchain.com/administration/tutorials/manage_spend"
loader = WebBaseLoader(web_path=(url))
docs = loader.load()


USER_AGENT environment variable not set, consider setting it to identify your requests.


In [3]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
splitted_text = splitter.split_documents(docs)
splitted_text
len(splitted_text)

17

In [4]:
from langchain_openai import OpenAIEmbeddings
from langchain_community.vectorstores import FAISS
embedding = OpenAIEmbeddings()
vector_db = FAISS.from_documents(splitted_text, embedding)

In [5]:
vector_db

<langchain_community.vectorstores.faiss.FAISS at 0x11cf2ed90>

In [6]:
query = "LangSmith has two usage limits: total traces and extended"
result = vector_db.similarity_search(query)
result[0].page_content

'Optimization 2: limit usage\u200b\nIn the previous section, we managed data retention settings to optimize existing spend. In this section, we will\nuse usage limits to prevent future overspend.\nLangSmith has two usage limits: total traces and extended retention traces. These correspond to the two metrics we\'ve\nbeen tracking on our usage graph. We can use these in tandem to have granular control over spend.\nTo set limits, we navigate back to Settings -> Usage and Billing -> Usage configuration. There is a table at the\nbottom of the page that lets you set usage limits per workspace. For each workspace, the two limits appear, along\nwith a cost estimate:\n\nLets start by setting limits on our production usage, since that is where the majority of spend comes from.\nSetting a good total traces limit\u200b\nPicking the right "total traces" limit depends on the expected load of traces that you will send to LangSmith. You should\nclearly think about your assumptions before setting a lim

In [7]:
from langchain_openai import ChatOpenAI
llm = ChatOpenAI(model="gpt-4o")
llm

ChatOpenAI(client=<openai.resources.chat.completions.completions.Completions object at 0x11dd3c290>, async_client=<openai.resources.chat.completions.completions.AsyncCompletions object at 0x11dd3de10>, root_client=<openai.OpenAI object at 0x11dd3ced0>, root_async_client=<openai.AsyncOpenAI object at 0x11dd3dbd0>, model_name='gpt-4o', model_kwargs={}, openai_api_key=SecretStr('**********'))

In [8]:
## Retrieval chain, Documents chain

from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate

prompt = ChatPromptTemplate.from_template("""
Answer the question using only the context. If it's not in the context, say you don't know.

Question:
{input}

Context:
{context}
""")

document_chain = create_stuff_documents_chain(llm, prompt)
document_chain



RunnableBinding(bound=RunnableBinding(bound=RunnableAssign(mapper={
  context: RunnableLambda(format_docs)
}), kwargs={}, config={'run_name': 'format_inputs'}, config_factories=[])
| ChatPromptTemplate(input_variables=['context', 'input'], input_types={}, partial_variables={}, messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context', 'input'], input_types={}, partial_variables={}, template="\nAnswer the question using only the context. If it's not in the context, say you don't know.\n\nQuestion:\n{input}\n\nContext:\n{context}\n"), additional_kwargs={})])
| ChatOpenAI(client=<openai.resources.chat.completions.completions.Completions object at 0x11dd3c290>, async_client=<openai.resources.chat.completions.completions.AsyncCompletions object at 0x11dd3de10>, root_client=<openai.OpenAI object at 0x11dd3ced0>, root_async_client=<openai.AsyncOpenAI object at 0x11dd3dbd0>, model_name='gpt-4o', model_kwargs={}, openai_api_key=SecretStr('**********'))
| StrOutputPar

In [9]:
from langchain_core.documents import Document
document_chain.invoke({
    "input": "LangSmith has two usage limits: total traces and extended",
    "context": [Document(page_content="LangSmith has two usage limits: total traces and extended. These correspond to the two metrics we've been tracking on our usage graph. We can use these in tandem to have granular control over spend.")]
})

"I don't know."

### However, we want the documents to first come from the retriever we just set up. That way, we can use the retriever to dynamically select the most relevant documents and pass those in a given question.

In [10]:
### Input -> Retriever -> vectorestoredb

vector_db

<langchain_community.vectorstores.faiss.FAISS at 0x11cf2ed90>

In [11]:
retriever = vector_db.as_retriever()
from langchain.chains import create_retrieval_chain
retrieval_chain = create_retrieval_chain(retriever, document_chain)

In [12]:
retrieval_chain

RunnableBinding(bound=RunnableAssign(mapper={
  context: RunnableBinding(bound=RunnableLambda(lambda x: x['input'])
           | VectorStoreRetriever(tags=['FAISS', 'OpenAIEmbeddings'], vectorstore=<langchain_community.vectorstores.faiss.FAISS object at 0x11cf2ed90>, search_kwargs={}), kwargs={}, config={'run_name': 'retrieve_documents'}, config_factories=[])
})
| RunnableAssign(mapper={
    answer: RunnableBinding(bound=RunnableBinding(bound=RunnableAssign(mapper={
              context: RunnableLambda(format_docs)
            }), kwargs={}, config={'run_name': 'format_inputs'}, config_factories=[])
            | ChatPromptTemplate(input_variables=['context', 'input'], input_types={}, partial_variables={}, messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context', 'input'], input_types={}, partial_variables={}, template="\nAnswer the question using only the context. If it's not in the context, say you don't know.\n\nQuestion:\n{input}\n\nContext:\n{context}

In [13]:
## Get the response from the LLM
response = retrieval_chain.invoke({"input": "LangSmith usage limits?"})

for d in response.get("context", []):
    print(d.metadata.get("source"), d.page_content[:200])

https://docs.smith.langchain.com/administration/tutorials/manage_spend your use case with LangSmith. For example, if you run evals as part of CI/CD in dev or staging, you may
want to be more liberal with your usage limits to avoid test failures.
Now that our limits are s
https://docs.smith.langchain.com/administration/tutorials/manage_spend Understand your current usage​
The first step of any optimization process is to understand current usage. LangSmith gives two ways to do this: Usage Graph
and Invoices.
Usage Graph​
The usage graph le
https://docs.smith.langchain.com/administration/tutorials/manage_spend Optimization 2: limit usage​
In the previous section, we managed data retention settings to optimize existing spend. In this section, we will
use usage limits to prevent future overspend.
LangSmith ha
https://docs.smith.langchain.com/administration/tutorials/manage_spend noteSome of the features mentioned in this guide are not currently available in Enterprise plan due to its
custom

In [14]:
response

{'input': 'LangSmith usage limits?',
 'context': [Document(id='ddaa51eb-bd26-4aad-8cd8-7e503cb59bde', metadata={'source': 'https://docs.smith.langchain.com/administration/tutorials/manage_spend', 'title': 'Optimize tracing spend on LangSmith | 🦜️🛠️ LangSmith', 'description': 'Before diving into this content, it might be helpful to read the following:', 'language': 'en'}, page_content='your use case with LangSmith. For example, if you run evals as part of CI/CD in dev or staging, you may\nwant to be more liberal with your usage limits to avoid test failures.\nNow that our limits are set, we can see that LangSmith shows a maximum spend estimate across all workspaces:'),
  Document(id='2eff39bc-1786-4874-85e1-9646a36581ac', metadata={'source': 'https://docs.smith.langchain.com/administration/tutorials/manage_spend', 'title': 'Optimize tracing spend on LangSmith | 🦜️🛠️ LangSmith', 'description': 'Before diving into this content, it might be helpful to read the following:', 'language': 'en'

In [15]:
response["context"]

[Document(id='ddaa51eb-bd26-4aad-8cd8-7e503cb59bde', metadata={'source': 'https://docs.smith.langchain.com/administration/tutorials/manage_spend', 'title': 'Optimize tracing spend on LangSmith | 🦜️🛠️ LangSmith', 'description': 'Before diving into this content, it might be helpful to read the following:', 'language': 'en'}, page_content='your use case with LangSmith. For example, if you run evals as part of CI/CD in dev or staging, you may\nwant to be more liberal with your usage limits to avoid test failures.\nNow that our limits are set, we can see that LangSmith shows a maximum spend estimate across all workspaces:'),
 Document(id='2eff39bc-1786-4874-85e1-9646a36581ac', metadata={'source': 'https://docs.smith.langchain.com/administration/tutorials/manage_spend', 'title': 'Optimize tracing spend on LangSmith | 🦜️🛠️ LangSmith', 'description': 'Before diving into this content, it might be helpful to read the following:', 'language': 'en'}, page_content='Understand your current usage\u20