### Simple Gen AI APP Using Langchain

In [4]:
import os
from dotenv import load_dotenv
load_dotenv()

os.environ['OPENAI_API_KEY'] = os.getenv('OPENAI_API_KEY')

#Langsmith Tracking
os.environ['LANGCHAIN_API_KEY'] = os.getenv('LANGCHAIN_API_KEY')
os.environ['LANGCHAIN_TRACING_V2'] = "true"
os.environ['LANGCHAIN_PROJECT '] = os.getenv('LANGCHAIN_PROJECT')

In [11]:
## RAG PIPELINE
from langchain_community.document_loaders import WebBaseLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.embeddings import OpenAIEmbeddings
from langchain_community.vectorstores import FAISS

urL = "https://docs.smith.langchain.com/administration/tutorials/manage_spend"
loader = WebBaseLoader(web_path = urL)
docs = loader.load()

text_splitter = RecursiveCharacterTextSplitter(chunk_size = 1000, chunk_overlap = 200)
documents = text_splitter.split_documents(docs)

embeddings = OpenAIEmbeddings()

vectorstore_db = FAISS.from_documents(documents,embeddings)

In [None]:
# Query From a vector store db
query = "LangSmith Traces usage is measured per workspace, because"
result = vectorstore_db.similarity_search(query)
result[0].page_content

"The first metric tracks all traces that you send to LangSmith. The second tracks all traces that also have our Extended 400 Day Data Retention.\nFor more details, see our data retention conceptual docs. Notice that these graphs look\nidentical, which will come into play later in the tutorial.\nLangSmith Traces usage is measured per workspace, because workspaces often represent development environments (as in our example),\nor teams within an organization. As a LangSmith administrator, we want to understand spend granularly per each of these units. In\nthis case where we just want to cut spend, we can focus on the environment responsible for the majority of costs first for the greatest savings.\nnoteLangSmith's Usage Graph and Invoice use the term tenant_id to refer to a workspace ID. They are interchangeable.\nIn the above image, the vast majority of usage is in the workspace with ID c27dd32c-7c80-4e8c-acde-bfcb67a90ab2. We can"

In [19]:
## Retrival Chain, Document chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.documents import Document
from langchain_openai import ChatOpenAI
llm = ChatOpenAI(model = "gpt-4o")

prompt = ChatPromptTemplate.from_messages([
    ("user", """Answer the following question based only on the provided context:
<context>
{context}
</context>""")
])

document_chain = create_stuff_documents_chain(llm, prompt)

document_chain.invoke({
    "input" : "LangSmith Traces usage is measured per workspace, because",
    "context" : [Document(page_content="LangSmith Traces usage is measured per workspace, because workspaces often represent development environments (as in our example), or teams within an organization.")]
})

'LangSmith Traces usage is measured per workspace. Workspaces typically represent development environments or teams within an organization.'

### Retriver -->
Act as an interface of getting information from vector store db

In [21]:
retriver = vectorstore_db.as_retriever()
from langchain.chains import create_retrieval_chain
retrieval_chain = create_retrieval_chain(retriver, document_chain)
retrieval_chain

RunnableBinding(bound=RunnableAssign(mapper={
  context: RunnableBinding(bound=RunnableLambda(lambda x: x['input'])
           | VectorStoreRetriever(tags=['FAISS', 'OpenAIEmbeddings'], vectorstore=<langchain_community.vectorstores.faiss.FAISS object at 0x000001D7C8493220>, search_kwargs={}), kwargs={}, config={'run_name': 'retrieve_documents'}, config_factories=[])
})
| RunnableAssign(mapper={
    answer: RunnableBinding(bound=RunnableBinding(bound=RunnableAssign(mapper={
              context: RunnableLambda(format_docs)
            }), kwargs={}, config={'run_name': 'format_inputs'}, config_factories=[])
            | ChatPromptTemplate(input_variables=['context'], input_types={}, partial_variables={}, messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context'], input_types={}, partial_variables={}, template='Answer the following question based only on the provided context:\n<context>\n{context}\n</context>'), additional_kwargs={})])
            | ChatOpen

In [22]:
#Getting responce from llm
response = retrieval_chain.invoke({"input": "LangSmith Traces usage is measured per workspace, because"})
response['answer']

'According to the provided context, LangSmith usage is measured per workspace, often corresponding to development environments or teams within an organization. The two metrics tracked by LangSmith are:\n\n1. **LangSmith Traces (Base Charge)**\n2. **LangSmith Traces (Extended Data Retention Upgrades)**\n\nThe usage is detailed in the Usage Graph, and the monetary spend related to this usage can be understood via Invoices. Usage and billing are broken down by "tenant_id," which is another term for Workspace ID. This allows administrators to track spending per workspace to help manage costs, especially when certain workspaces, like the one with ID `c27dd32c-7c80-4e8c-acde-bfcb67a90ab2`, contribute significantly to the overall spend.\n\nFor optimization, understanding current usage is the first step, utilizing both the Usage Graph and the invoices to get a clear view of costs and consumption.'

In [23]:
response

{'input': 'LangSmith Traces usage is measured per workspace, because',
 'context': [Document(id='65c21d4e-42a8-4e0c-8622-30e9df93dbee', metadata={'source': 'https://docs.smith.langchain.com/administration/tutorials/manage_spend', 'title': 'Optimize tracing spend on LangSmith | 🦜️🛠️ LangSmith', 'description': 'Before diving into this content, it might be helpful to read the following:', 'language': 'en'}, page_content="The first metric tracks all traces that you send to LangSmith. The second tracks all traces that also have our Extended 400 Day Data Retention.\nFor more details, see our data retention conceptual docs. Notice that these graphs look\nidentical, which will come into play later in the tutorial.\nLangSmith Traces usage is measured per workspace, because workspaces often represent development environments (as in our example),\nor teams within an organization. As a LangSmith administrator, we want to understand spend granularly per each of these units. In\nthis case where we j