In [1]:
### Simple Gen AI App using Langchain
### Load the keys from the environment variable in .env file
import os
from dotenv import load_dotenv

load_dotenv()

os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")
# Langsmith tracing
os.environ["LANGCHAIN_API_KEY"] = os.getenv("LANGCHAIN_API_KEY")
os.environ["LANGCHAIN_TRACING_V2"] = "true" 
os.environ["LANGCHAIN_PROJECT"]=os.getenv("LANGCHAIN_PROJECT")

# Add a print statement to confirm its value right before LLM invocation
print(f"LANGCHAIN_TRACING_V2: {os.getenv('LANGCHAIN_TRACING_V2')}")
print(f"LANGCHAIN_PROJECT: {os.getenv('LANGCHAIN_PROJECT')}")

LANGCHAIN_TRACING_V2: true
LANGCHAIN_PROJECT: GenAIAPPWithOPENAI


In [7]:
# Data Injestion from the WebSite we need to scrape the data
from langchain_community.document_loaders import WebBaseLoader

# Initialize the WebBaseLoder
loader = WebBaseLoader("https://docs.smith.langchain.com/administration/how_to_guides/organization_management/set_up_billing")
loader

<langchain_community.document_loaders.web_base.WebBaseLoader at 0x1812ec86350>

In [8]:
# Load the documents when calling the load function
docs = loader.load()
docs

[Document(metadata={'source': 'https://docs.smith.langchain.com/administration/how_to_guides/organization_management/set_up_billing', 'title': 'Set up billing for your LangSmith account | 🦜️🛠️ LangSmith', 'description': 'If you are interested in the Enterprise plan, please contact sales. This guide is', 'language': 'en'}, page_content='\n\n\n\n\nSet up billing for your LangSmith account | 🦜️🛠️ LangSmith\n\n\n\n\n\n\n\n\nSkip to main contentOur Building Ambient Agents with LangGraph course is now available on LangChain Academy!API ReferenceRESTPythonJS/TSSearchRegionUSEUGo to AppGet StartedObservabilityEvaluationPrompt EngineeringDeployment (LangGraph Platform)AdministrationTutorialsOptimize tracing spend on LangSmithHow-to GuidesSetupCreate an account and API keySet up an organizationSet up a workspaceSet up billing for your LangSmith accountUpdate invoice email, tax id and, business informationManage your organization using the APISet up access controlSet up resource tagsSAML SSOConce

In [None]:
### Step2 Divide the contents into chunks
from langchain_text_splitters import RecursiveCharacterTextSplitter
# This line initializes an instance of the RecursiveCharacterTextSplitter.
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000,chunk_overlap=200)
# This line takes the loaded documents (docs) and splits them into smaller, manageable chunks.
documents = text_splitter.split_documents(docs)
documents

In [13]:
## Step 3 convert text chunks into vectors
from langchain_openai import OpenAIEmbeddings
from langchain_community.vectorstores import FAISS

# This line initializes an instance of the OpenAIEmbeddings class
embeddings = OpenAIEmbeddings()
# This is the core step where the text chunks are converted into numerical embeddings and 
# then stored in a FAISS vector store
'''More details on this step is below
# When you call FAISS.from_documents(documents, embeddings), the FAISS 
# library iterates through each Document in the documents list.
# For each document's text content, it uses the embeddings object to call the 
# OpenAI embedding API (or a local embedding model, if configured differently). 
# This call converts the text into a numerical vector
'''
vectorstoredb = FAISS.from_documents(documents,embeddings)

vectorstoredb

<langchain_community.vectorstores.faiss.FAISS at 0x1814961bbb0>

In [14]:
# Quering the result from the vector store by passing a text from website to see what it does.

query = "Personal organizations are limited to 5000 traces per month"
result = vectorstoredb.similarity_search(query)
result[0].page_content

'2. Add your credit card info\u200b\nAfter this step, you will no longer be rate limited to 5000 traces, and will be charged for any excess\ntraces at rates specified on our pricing page.\nPlus Plan: set up billing on a shared organization\u200b\nIf you have not yet created an organization, please do so by following this guide. This walkthrough assumes you are\nalready in a new organization.\nnoteNew organizations are not usable until a credit card is entered. After you complete the following steps, you will\ngain complete access to LangSmith.\n1. Click Subscribe on the Plus page\u200b\nnoteIf you are a startup building with AI, please instead click Apply Now on our Startup Plan. You may be\neligible for discounted prices and a generous free, monthly trace allotment.\n\n2. Review your existing members\u200b\nBefore subscribing, LangSmith lets you remove any added users that you would not\nlike to be charged for.'

In [None]:
from langchain_openai import ChatOpenAI
# Initializes an instance of the ChatOpenAI model
llm = ChatOpenAI(model="gpt-4o")

In [None]:
#  Retrival Chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate
'''
This class allows you to define structured prompts for chat models. It's designed to handle messages
from different roles (system, human, AI)
'''
prompt = ChatPromptTemplate.from_template(
    """
Answer the following question based on the provided context:
<context>
{context}
</context>
"""
)
'''
create_stuff_documents_chain: This is a convenience function in LangChain that creates a chain specifically
designed to combine a list of documents into a single string and then "stuff" them into the prompt for the
LLM. This is a common strategy for providing context to an LLM.
Below line, creates the document combining and answering chain.
'''
document_chain = create_stuff_documents_chain(llm,prompt)
document_chain

In [19]:
# Giving context to the vectordb and search in that context only
from langchain_core.documents import Document

document_chain.invoke({
    # Providing the text to search here
    "input":"Personal organizations are limited",
    # Giving the full text where we should focus the search.. implementing RAG 
    # In a full RAG application, this context list would not be hardcoded. Instead, it would be the result of a similarity search on your vectorstoredb (from the previous steps) based on the user's input. 
    # For instance, you'd do something like relevant_docs = vectorstoredb.similarity_search("Personal organizations are limited")
    # and then pass relevant_docs as the context.
    "context": [Document(page_content="Personal organizations are limited to 5000 traces per month until a credit card is added. You can add a credit card on the Plans and Billing page as follows:")]

})

'To exceed the limit of 5000 traces per month for personal organizations, you need to add a credit card. This can be done by going to the Plans and Billing page and updating the payment information by adding a credit card.'

In [None]:
# Adding retriever
# This line is a crucial step in building a fully functional RAG pipeline because it abstracts
#  away the details of the vector search. Instead of manually performing embeddings.embed_query
# ("your question") and then vectorstoredb.similarity_search(query_embedding), you now have a clean,
#  standardized retriever object that does all of that for you with a simple call:
retriever = vectorstoredb.as_retriever()
from langchain.chains import create_retrieval_chain
# Purpose: This line is where the magic of RAG truly comes together. 
# It creates the complete retrieval-augmented generation chain. 
# retriever: This is the retriever object we defined earlier (vectorstoredb.as_retriever()). 
# Its role in this chain is to take the user's question (input) and find the most relevant documents 
# from your vectorstoredb.

retrieval_chain = create_retrieval_chain(retriever,document_chain)
retrieval_chain

In [None]:
# Get the response from the LLM using Retrival Chain
#  Previously we invoked the same thing using document chain, now we are doing with retrival chain, 
# its much simple to write this way

response = retrieval_chain.invoke({"input":"Personal organizations are limited"})
response['answer']