#### Simple GenAI APP using Langchain

In [45]:
import os
from dotenv import load_dotenv
load_dotenv()

os.environ['OPENAI_API_KEY']=os.getenv("OPENAI_API_KEY")
## Langsmith Tracking
os.environ["LANGCHAIN_API_KEY"]=os.getenv("LANGCHAIN_API_KEY")
os.environ["LANGCHAIN_TRACING_V2"]="true"
os.environ["LANGCHAIN_PROJECT"]=os.getenv("LANGCHAIN_PROJECT")

In [46]:
#data ingestion --from website we need to scrape the data
from langchain_community.document_loaders import WebBaseLoader

loader=WebBaseLoader("https://www.langchain.com/pricing-langsmith?_gl=1*91275m*_gcl_au*NDYzNjI2NTkyLjE3NTExMzk1MTA.*_ga*NzM2NjExNTExLjE3NTEwNTg2NzI.*_ga_47WX3HKKY2*czE3NTExMzk1MTAkbzQkZzEkdDE3NTExMzk1NDQkajI2JGwwJGgw")
loader

<langchain_community.document_loaders.web_base.WebBaseLoader at 0x132759810>

In [47]:
docs=loader.load()
docs

[Document(metadata={'source': 'https://www.langchain.com/pricing-langsmith?_gl=1*91275m*_gcl_au*NDYzNjI2NTkyLjE3NTExMzk1MTA.*_ga*NzM2NjExNTExLjE3NTEwNTg2NzI.*_ga_47WX3HKKY2*czE3NTExMzk1MTAkbzQkZzEkdDE3NTExMzk1NDQkajI2JGwwJGgw', 'title': 'Plans and Pricing - LangChain', 'description': "Pricing for LangChain products for teams of any size. Choose the plan that suits your needs, whether you're an individual developer or enterprise.", 'language': 'en'}, page_content='Plans and Pricing - LangChain\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nProducts\n\nFrameworksLangGraphLangChainPlatformsLangSmithLangGraph PlatformResources\n\nResources HubBlogCustomer StoriesLangChain AcademyCommunityExpertsChangelogDocs\n\nPythonLangGraphLangSmithLangChainJavaScriptLangGraphLangSmithLangChainCompany\n\nAboutCareersPricingGet a demoSign upPricing plans for teams of\xa0any\xa0sizeGet all the LangChain products -- pay for what you useDeveloperFor hobbyist projects by solo devs.Starting at$0 / monththen pay

In [48]:
### Load Data--> Docs-->Divide our Docuemnts into chunks documents
# text-->vectors-->Vector Embeddings
# --->Vector Store DB


from langchain_text_splitters import RecursiveCharacterTextSplitter

text_splitter=RecursiveCharacterTextSplitter(chunk_size=500,chunk_overlap=20)
documents=text_splitter.split_documents(docs)


In [49]:
documents

[Document(metadata={'source': 'https://www.langchain.com/pricing-langsmith?_gl=1*91275m*_gcl_au*NDYzNjI2NTkyLjE3NTExMzk1MTA.*_ga*NzM2NjExNTExLjE3NTEwNTg2NzI.*_ga_47WX3HKKY2*czE3NTExMzk1MTAkbzQkZzEkdDE3NTExMzk1NDQkajI2JGwwJGgw', 'title': 'Plans and Pricing - LangChain', 'description': "Pricing for LangChain products for teams of any size. Choose the plan that suits your needs, whether you're an individual developer or enterprise.", 'language': 'en'}, page_content='Plans and Pricing - LangChain\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nProducts\n\nFrameworksLangGraphLangChainPlatformsLangSmithLangGraph PlatformResources\n\nResources HubBlogCustomer StoriesLangChain AcademyCommunityExpertsChangelogDocs\n\nPythonLangGraphLangSmithLangChainJavaScriptLangGraphLangSmithLangChainCompany'),
 Document(metadata={'source': 'https://www.langchain.com/pricing-langsmith?_gl=1*91275m*_gcl_au*NDYzNjI2NTkyLjE3NTExMzk1MTA.*_ga*NzM2NjExNTExLjE3NTEwNTg2NzI.*_ga_47WX3HKKY2*czE3NTExMzk1MTAkbzQkZzEkdDE3NTE

In [50]:
from langchain_openai import OpenAIEmbeddings
embeddings=OpenAIEmbeddings()

In [51]:
from langchain_community.vectorstores import FAISS
vectorstoredb=FAISS.from_documents(documents,embeddings)
vectorstoredb

<langchain_community.vectorstores.faiss.FAISS at 0x133102520>

In [52]:
## Query From a vector db
query="LangSmith plans for teams of any size"
result=vectorstoredb.similarity_search(query)
result[0].page_content

'Our Developer plan is a great choice for personal projects. You will have 1 free seat with access to LangSmith (5k base traces/month included).\xa0\xa0\u200dThe Plus plan is for teams that want to self-serve with moderate usage and collaboration needs. You can purchase up to 10 seats with access to LangSmith (10k base traces/month included). You will be able to ship agents with our managed LangGraph Platform Cloud service, with 1 free dev-sized deployment included.\xa0\u200dThe Enterprise plan is for teams that'

In [53]:
#till now we imported data ,divided into chunks,converted into vectors ,then stored in vectordb 
# That lets us find similar chunks for a given query — but to get meaningful answers, we need to pass those chunks to a language model. That’s where RAG (Retrieval-Augmented Generation) comes in.

In [54]:
from langchain_openai import ChatOpenAI
llm=ChatOpenAI(model="gpt-4o")

In [55]:
#Retrival Chain,Documents Chain 
#A document chain in LangChain is any chain designed to:Take a list of documents and process them in a way that’s useful for an LLM.

from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate

prompt=ChatPromptTemplate.from_template(
    """
Answer the following question based only on the provided context:
<context>
{context}
</context>


"""
)

document_chain=create_stuff_documents_chain(llm,prompt)
document_chain


RunnableBinding(bound=RunnableBinding(bound=RunnableAssign(mapper={
  context: RunnableLambda(format_docs)
}), kwargs={}, config={'run_name': 'format_inputs'}, config_factories=[])
| ChatPromptTemplate(input_variables=['context'], input_types={}, partial_variables={}, messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context'], input_types={}, partial_variables={}, template='\nAnswer the following question based only on the provided context:\n<context>\n{context}\n</context>\n\n\n'), additional_kwargs={})])
| ChatOpenAI(client=<openai.resources.chat.completions.completions.Completions object at 0x133102b10>, async_client=<openai.resources.chat.completions.completions.AsyncCompletions object at 0x133102d70>, root_client=<openai.OpenAI object at 0x1333959d0>, root_async_client=<openai.AsyncOpenAI object at 0x133395bf0>, model_name='gpt-4o', model_kwargs={}, openai_api_key=SecretStr('**********'))
| StrOutputParser(), kwargs={}, config={'run_name': 'stuff_docume

In [62]:
from langchain_core.documents import Document
print(Document)
document_chain.invoke({
    "input":"LangSmith plans for teams of any size",
    "context":[Document(page_content="LangSmith plans for teams of any size Looking for LangGraph Platform pricing?")]
})

<class 'langchain_core.documents.base.Document'>


'The context does not provide specific details about LangGraph Platform pricing. It only mentions that LangSmith plans for teams of any size exist.'

However, we want the documents to first come from the retriever we just set up. That way, we can use the retriever to dynamically select the most relevant documents and pass those in for a given question.

In [57]:
### input--->retriever--->vectorstoredb
retriever=vectorstoredb.as_retriever()



In [58]:
from langchain.chains import create_retrieval_chain
retrieval_chain=create_retrieval_chain(retriever,document_chain)

In [59]:
retrieval_chain

RunnableBinding(bound=RunnableAssign(mapper={
  context: RunnableBinding(bound=RunnableLambda(lambda x: x['input'])
           | VectorStoreRetriever(tags=['FAISS', 'OpenAIEmbeddings'], vectorstore=<langchain_community.vectorstores.faiss.FAISS object at 0x133102520>, search_kwargs={}), kwargs={}, config={'run_name': 'retrieve_documents'}, config_factories=[])
})
| RunnableAssign(mapper={
    answer: RunnableBinding(bound=RunnableBinding(bound=RunnableAssign(mapper={
              context: RunnableLambda(format_docs)
            }), kwargs={}, config={'run_name': 'format_inputs'}, config_factories=[])
            | ChatPromptTemplate(input_variables=['context'], input_types={}, partial_variables={}, messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context'], input_types={}, partial_variables={}, template='\nAnswer the following question based only on the provided context:\n<context>\n{context}\n</context>\n\n\n'), additional_kwargs={})])
            | ChatOpe

In [60]:
#GET THE RESPONSE FROM THE LLM
response=retrieval_chain.invoke({"input":"LangSmith plans for teams of any size"})
response['answer']

"What does the Enterprise plan offer according to the provided context?\n\nThe Enterprise plan offers advanced deployment, security, and support features. These include alternative LangSmith & LangGraph Platform deployment options, such as hybrid and self-hosted solutions to ensure that data doesn't leave your VPC. It also provides custom SSO and RBAC, access to a deployed engineering team, and support SLA. This plan includes everything in the Plus plan, along with these additional capabilities."

In [61]:
response['context']

[Document(id='c3bc0648-0d07-4a0f-bc53-031553856570', metadata={'source': 'https://www.langchain.com/pricing-langsmith?_gl=1*91275m*_gcl_au*NDYzNjI2NTkyLjE3NTExMzk1MTA.*_ga*NzM2NjExNTExLjE3NTEwNTg2NzI.*_ga_47WX3HKKY2*czE3NTExMzk1MTAkbzQkZzEkdDE3NTExMzk1NDQkajI2JGwwJGgw', 'title': 'Plans and Pricing - LangChain', 'description': "Pricing for LangChain products for teams of any size. Choose the plan that suits your needs, whether you're an individual developer or enterprise.", 'language': 'en'}, page_content='Our Developer plan is a great choice for personal projects. You will have 1 free seat with access to LangSmith (5k base traces/month included).\xa0\xa0\u200dThe Plus plan is for teams that want to self-serve with moderate usage and collaboration needs. You can purchase up to 10 seats with access to LangSmith (10k base traces/month included). You will be able to ship agents with our managed LangGraph Platform Cloud service, with 1 free dev-sized deployment included.\xa0\u200dThe Enterp