In [None]:
# ============================================
# 1. Environment Setup & API Keys
# ============================================

import os
from dotenv import load_dotenv
load_dotenv()

True

In [None]:
os.environ['OPENAI_API_KEY']= os.getenv("OPENAI_API_KEY")

# Langsmith Tracking Configuration
os.environ["LANGCHAIN_API_KEY"]=os.getenv("LANGSMITH_API_KEY")
os.environ["LANGCHAIN_TRACING_V2"]="true"
os.environ["LANGCHAIN_PROJECT"]= os.getenv("LANGSMITH_PROJECT")

In [43]:
# ============================================
# 2. Data Ingestion from Web Source
# ============================================
from langchain_community.document_loaders import WebBaseLoader
loader= WebBaseLoader("https://docs.smith.langchain.com/#prompt-engineering")
docs=loader.load()
docs

[Document(metadata={'source': 'https://docs.smith.langchain.com/#prompt-engineering', 'title': 'Get started with LangSmith | 🦜️🛠️ LangSmith', 'description': 'LangSmith is a platform for building production-grade LLM applications.', 'language': 'en'}, page_content="\n\n\n\n\nGet started with LangSmith | 🦜️🛠️ LangSmith\n\n\n\n\n\n\n\n\nSkip to main contentWe are growing and hiring for multiple roles for LangChain, LangGraph and LangSmith.  Join our team!API ReferenceRESTPythonJS/TSSearchRegionUSEUGo to AppGet StartedObservabilityEvaluationPrompt EngineeringDeployment (LangGraph Platform)AdministrationSelf-hostingPricingReferenceCloud architecture and scalabilityAuthz and AuthnAuthentication methodsdata_formatsEvaluationDataset transformationsRegions FAQsdk_referenceChangelogCloud architecture and scalabilityAuthz and AuthnAuthentication methodsdata_formatsEvaluationDataset transformationsRegions FAQsdk_referenceGet StartedOn this pageGet started with LangSmith\nLangSmith is a platform fo

In [44]:
# ============================================
# 3. Text Splitting into Chunks
# ============================================

### Load Data--> Docs --> Divide out text into chunks--> text--> vectors--> Vector Embeddings-->Vector Store DB
from langchain_text_splitters import RecursiveCharacterTextSplitter

text_splitter= RecursiveCharacterTextSplitter(chunk_size= 1000, chunk_overlap=200)
documents= text_splitter.split_documents(docs)
documents

[Document(metadata={'source': 'https://docs.smith.langchain.com/#prompt-engineering', 'title': 'Get started with LangSmith | 🦜️🛠️ LangSmith', 'description': 'LangSmith is a platform for building production-grade LLM applications.', 'language': 'en'}, page_content='Get started with LangSmith | 🦜️🛠️ LangSmith'),
 Document(metadata={'source': 'https://docs.smith.langchain.com/#prompt-engineering', 'title': 'Get started with LangSmith | 🦜️🛠️ LangSmith', 'description': 'LangSmith is a platform for building production-grade LLM applications.', 'language': 'en'}, page_content='Skip to main contentWe are growing and hiring for multiple roles for LangChain, LangGraph and LangSmith.  Join our team!API ReferenceRESTPythonJS/TSSearchRegionUSEUGo to AppGet StartedObservabilityEvaluationPrompt EngineeringDeployment (LangGraph Platform)AdministrationSelf-hostingPricingReferenceCloud architecture and scalabilityAuthz and AuthnAuthentication methodsdata_formatsEvaluationDataset transformationsRegions F

In [45]:
# ============================================
# 4. Vector Embeddings & Vector Store Creation
# ============================================

from langchain_openai import OpenAIEmbeddings
embeddings= OpenAIEmbeddings()

In [30]:
from langchain_openai import ChatOpenAI
llm= ChatOpenAI(model="gpt-4o")

In [31]:
from langchain_community.vectorstores import FAISS
vectorstoredb= FAISS.from_documents(documents, embeddings)

In [32]:
vectorstoredb

<langchain_community.vectorstores.faiss.FAISS at 0x1a66ec0cac0>

In [46]:
# ============================================
# 5. Query the Vector Store
# ============================================

## query from a vector store db
query= "LangSmith is a platform for building production-grade LLM applications."
result= vectorstoredb.similarity_search(query)
result[0].page_content

"LangSmith is a platform for building production-grade LLM applications.\nIt allows you to closely monitor and evaluate your application, so you can ship quickly and with confidence.\nObservabilityAnalyze traces in LangSmith and configure metrics, dashboards, alerts based on these.EvalsEvaluate your application over production traffic — score application performance and get human feedback on your data.Prompt EngineeringIterate on prompts, with automatic version control and collaboration features.\nLangSmith + LangChain OSSLangSmith is framework-agnostic —\xa0it can be used with or without LangChain's open source frameworks langchain and langgraph.If you are using either of these, you can enable LangSmith tracing with a single environment variable.\nFor more see the how-to guide for setting up LangSmith with LangChain or setting up LangSmith with LangGraph.\nObservability\u200b"

In [47]:
# ============================================
# 6. Creating Retrieval-Augmented Generation (RAG) Chain
# ============================================

### Retrieval Chain, Documents chain

from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate

prompt= ChatPromptTemplate.from_template(
    """Answer the following question based only on the provided context:
    <context>
    {context}
    </context>
    """
)

document_chain= create_stuff_documents_chain(llm,prompt)
document_chain


RunnableBinding(bound=RunnableBinding(bound=RunnableAssign(mapper={
  context: RunnableLambda(format_docs)
}), kwargs={}, config={'run_name': 'format_inputs'}, config_factories=[])
| ChatPromptTemplate(input_variables=['context'], input_types={}, partial_variables={}, messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context'], input_types={}, partial_variables={}, template='Answer the following question based only on the provided context:\n    <context>\n    {context}\n    </context>\n    '), additional_kwargs={})])
| ChatOpenAI(client=<openai.resources.chat.completions.completions.Completions object at 0x000001A66F4BE1A0>, async_client=<openai.resources.chat.completions.completions.AsyncCompletions object at 0x000001A66F4BE200>, root_client=<openai.OpenAI object at 0x000001A66F4BD810>, root_async_client=<openai.AsyncOpenAI object at 0x000001A66F4BDFF0>, model_name='gpt-4o', model_kwargs={}, openai_api_key=SecretStr('**********'))
| StrOutputParser(), kwargs

In [48]:
# ============================================
# 7. Testing Document Chain Invocation
# ============================================

from langchain_core.documents import Document
document_chain.invoke({
    "input": "LangSmith is a platform for building production-grade LLM applications.",
    "context": [Document(page_content="LangSmith is a platform for building production-grade LLM applications. It allows you to closely monitor ")]
})

'LangSmith is a platform designed for building production-grade LLM (Large Language Model) applications. It enables users to closely monitor these applications.'

However, we want the documents to first come from the retriever we just set up. That way, we can use the retriever to dynamically select the most relevant documents and pass those in for a given question.


In [50]:
# 8. Full Retrieval Chain Integration
# ============================================

retriever= vectorstoredb.as_retriever()

from langchain.chains import create_retrieval_chain
retrieval_chain= create_retrieval_chain(retriever, document_chain)
retrieval_chain


RunnableBinding(bound=RunnableAssign(mapper={
  context: RunnableBinding(bound=RunnableLambda(lambda x: x['input'])
           | VectorStoreRetriever(tags=['FAISS', 'OpenAIEmbeddings'], vectorstore=<langchain_community.vectorstores.faiss.FAISS object at 0x000001A66EC0CAC0>, search_kwargs={}), kwargs={}, config={'run_name': 'retrieve_documents'}, config_factories=[])
})
| RunnableAssign(mapper={
    answer: RunnableBinding(bound=RunnableBinding(bound=RunnableAssign(mapper={
              context: RunnableLambda(format_docs)
            }), kwargs={}, config={'run_name': 'format_inputs'}, config_factories=[])
            | ChatPromptTemplate(input_variables=['context'], input_types={}, partial_variables={}, messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context'], input_types={}, partial_variables={}, template='Answer the following question based only on the provided context:\n    <context>\n    {context}\n    </context>\n    '), additional_kwargs={})])
    

In [51]:
# ============================================
# 9. Get Final Response from LLM
# ============================================

## Get the response from llm
response= retrieval_chain.invoke({"input":"LangSmith is a platform for building production-grade LLM applications."})
response

{'input': 'LangSmith is a platform for building production-grade LLM applications.',
 'context': [Document(id='f658e7c5-bb36-4664-84fe-afd3289ba5b6', metadata={'source': 'https://docs.smith.langchain.com/#prompt-engineering', 'title': 'Get started with LangSmith | 🦜️🛠️ LangSmith', 'description': 'LangSmith is a platform for building production-grade LLM applications.', 'language': 'en'}, page_content="LangSmith is a platform for building production-grade LLM applications.\nIt allows you to closely monitor and evaluate your application, so you can ship quickly and with confidence.\nObservabilityAnalyze traces in LangSmith and configure metrics, dashboards, alerts based on these.EvalsEvaluate your application over production traffic — score application performance and get human feedback on your data.Prompt EngineeringIterate on prompts, with automatic version control and collaboration features.\nLangSmith + LangChain OSSLangSmith is framework-agnostic —\xa0it can be used with or without 

In [52]:
response["answer"]

'What are some key features of LangSmith mentioned in the provided context?'