#Simple Gen AI App Using Langchain

In [1]:
import os
from dotenv import load_dotenv
load_dotenv()

os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")
os.environ["LANGCHAIN_TRACING_V2"] = "true"
os.environ["LANGCHAIN_PROJECT"] = os.getenv("LANGCHAIN_PROJECT") 
os.environ["LANGCHAIN_API_KEY"] = os.getenv("LANGCHAIN_API_KEY")

In [3]:
##Data Ingestion --> From the website we need to scrape the data
from langchain_community.document_loaders import WebBaseLoader

loader = WebBaseLoader("https://docs.smith.langchain.com/")
docs=loader.load()
docs

[Document(metadata={'source': 'https://docs.smith.langchain.com/', 'title': 'Get started with LangSmith | 🦜️🛠️ LangSmith', 'description': 'LangSmith is a platform for building production-grade LLM applications.', 'language': 'en'}, page_content="\n\n\n\n\nGet started with LangSmith | 🦜️🛠️ LangSmith\n\n\n\n\n\n\n\n\nSkip to main contentOur new LangChain Academy Course Deep Research with LangGraph is now live! Enroll for free.API ReferenceRESTPythonJS/TSSearchRegionUSEUGo to AppGet StartedObservabilityEvaluationPrompt EngineeringDeployment (LangGraph Platform)AdministrationSelf-hostingPricingReferenceCloud architecture and scalabilityAuthz and AuthnAuthentication methodsdata_formatsEvaluationDataset transformationsRegions FAQsdk_referenceGet StartedOn this pageGet started with LangSmith\nLangSmith is a platform for building production-grade LLM applications.\nIt allows you to closely monitor and evaluate your application, so you can ship quickly and with confidence.\nObservabilityAnalyze

Load Data --> Docs --> Divide our documents into document chunks --> text --> vectors --> Vector Embeddings --> Vector Store DB --> Retriever with Vector

In [14]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,
    chunk_overlap=20,
)
documents = text_splitter.split_documents(docs)
documents

[Document(metadata={'source': 'https://docs.smith.langchain.com/', 'title': 'Get started with LangSmith | 🦜️🛠️ LangSmith', 'description': 'LangSmith is a platform for building production-grade LLM applications.', 'language': 'en'}, page_content='Get started with LangSmith | 🦜️🛠️ LangSmith'),
 Document(metadata={'source': 'https://docs.smith.langchain.com/', 'title': 'Get started with LangSmith | 🦜️🛠️ LangSmith', 'description': 'LangSmith is a platform for building production-grade LLM applications.', 'language': 'en'}, page_content='Skip to main contentOur new LangChain Academy Course Deep Research with LangGraph is now live! Enroll for free.API ReferenceRESTPythonJS/TSSearchRegionUSEUGo to AppGet StartedObservabilityEvaluationPrompt EngineeringDeployment (LangGraph Platform)AdministrationSelf-hostingPricingReferenceCloud architecture and scalabilityAuthz and AuthnAuthentication methodsdata_formatsEvaluationDataset transformationsRegions FAQsdk_referenceGet StartedOn this pageGet start

In [13]:
#OpenAi Embedding

from langchain_openai import OpenAIEmbeddings
#OpenAIEmbeddings(model="text-embedding-3-large")
embeddings=OpenAIEmbeddings()

In [15]:
#storing the embedding and documents in to FAISS vector store

from langchain_community.vectorstores import FAISS

vectorstoredb=FAISS.from_documents(documents, embeddings)

In [17]:
#query from vector store db

vectorstoredb.similarity_search(query="Observability is important for any software application")[0].page_content

"LangSmith + LangChain OSSLangSmith is framework-agnostic —\xa0it can be used with or without LangChain's open source frameworks langchain and langgraph.If you are using either of these, you can enable LangSmith tracing with a single environment variable.\nFor more see the how-to guide for setting up LangSmith with LangChain or setting up LangSmith with LangGraph.\nObservability\u200b\nObservability is important for any software application, but especially so for LLM applications. LLMs are non-deterministic by nature, meaning they can produce unexpected results. This makes them trickier than normal to debug.\nThis is where LangSmith can help! LangSmith has LLM-native observability, allowing you to get meaningful insights from your application. LangSmith’s observability features have you covered throughout all stages of application development - from prototyping, to beta testing, to production."

In [18]:
#initializing the llm
from langchain_openai import ChatOpenAI
llm = ChatOpenAI(model="gpt-4o", max_tokens=200)

In [None]:
#Retrieval Chain, Document chain

from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate

prompt = ChatPromptTemplate.from_template(
    """
    Answer the following question based only on the provided context:
    <context>
    {context}
    </context>

    """
)

document_chain=create_stuff_documents_chain(llm, prompt)
document_chain

##RunnableBinding output
##Input → format_docs (assigns “context”) → ChatPromptTemplate (injects context) → ChatOpenAI (model call) → StrOutputParser (stringify the model’s output)

RunnableBinding(bound=RunnableBinding(bound=RunnableAssign(mapper={
  context: RunnableLambda(format_docs)
}), kwargs={}, config={'run_name': 'format_inputs'}, config_factories=[])
| ChatPromptTemplate(input_variables=['context'], input_types={}, partial_variables={}, messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context'], input_types={}, partial_variables={}, template='\n    Answer the following question based only on the provided context:\n    <context>\n    {context}\n    </context>\n\n    '), additional_kwargs={})])
| ChatOpenAI(client=<openai.resources.chat.completions.completions.Completions object at 0x0000017DD9B8BB60>, async_client=<openai.resources.chat.completions.completions.AsyncCompletions object at 0x0000017DDA0446E0>, root_client=<openai.OpenAI object at 0x0000017DD9B89550>, root_async_client=<openai.AsyncOpenAI object at 0x0000017DDA044440>, model_name='gpt-4o', model_kwargs={}, openai_api_key=SecretStr('**********'), max_tokens=200)
| S

In [11]:
##Importing the documents from Langchain_core
from langchain_core.documents import Document

document_chain.invoke(
    {
        "input":"Observability is important for any software application",
        "context":[Document(page_content="Observability is important for any software application, but especially so for LLM applications.")]
    }
)


'Observability is important for any software application, particularly for LLM (Large Language Model) applications.'

In [16]:
vectorstoredb

<langchain_community.vectorstores.faiss.FAISS at 0x17df7992f90>

In [19]:
retriever=vectorstoredb.as_retriever()
from langchain.chains import create_retrieval_chain
retrieval_chain=create_retrieval_chain(retriever, document_chain)

In [20]:
retrieval_chain

RunnableBinding(bound=RunnableAssign(mapper={
  context: RunnableBinding(bound=RunnableLambda(lambda x: x['input'])
           | VectorStoreRetriever(tags=['FAISS', 'OpenAIEmbeddings'], vectorstore=<langchain_community.vectorstores.faiss.FAISS object at 0x0000017DF7992F90>, search_kwargs={}), kwargs={}, config={'run_name': 'retrieve_documents'}, config_factories=[])
})
| RunnableAssign(mapper={
    answer: RunnableBinding(bound=RunnableBinding(bound=RunnableAssign(mapper={
              context: RunnableLambda(format_docs)
            }), kwargs={}, config={'run_name': 'format_inputs'}, config_factories=[])
            | ChatPromptTemplate(input_variables=['context'], input_types={}, partial_variables={}, messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context'], input_types={}, partial_variables={}, template='\n    Answer the following question based only on the provided context:\n    <context>\n    {context}\n    </context>\n\n    '), additional_kwargs={}

In [22]:
##Get the response from the LLM
response=retrieval_chain.invoke({"input":"Observability is important for any software application"})
response['answer']

"What is LangSmith, and how does it integrate with LangChain and LangGraph? \n\nLangSmith is a platform designed for building production-grade LLM (Large Language Model) applications. It is framework-agnostic, meaning it can be used independently or in conjunction with LangChain's open-source frameworks, such as langchain and langgraph. If you are using either of these frameworks, you can enable LangSmith tracing by setting a single environment variable. LangSmith provides LLM-native observability to gain insights into applications, tools for high-quality evaluations, and features designed for prompt engineering. The platform allows users to monitor applications, evaluate performance over production traffic, and iterate on models and prompts with tools like a Playground and version control."