In [2]:
# SimpleGENAI app using langchain

import os
from dotenv import load_dotenv
load_dotenv()


os.environ['OPENAI_API_KEY'] = os.getenv('OPENAI_API_KEY')
os.environ['LANGCHAIN_API_KEY'] = os.getenv('LANGCHAIN_API_KEY')
os.environ['LANGCHAIN_PROJECT'] = os.getenv('LANGCHAIN_PROJECT')
os.environ['LANGCHAIN_TRACING_V2'] = 'true'

# Step 3: Setup LangSmith Tracer
from langsmith import traceable
from langchain.callbacks.tracers.langchain import LangChainTracer
tracer = LangChainTracer()
# Step 4: Use LangChain with OpenAI + LangSmith
from langchain_openai import ChatOpenAI

In [3]:
# data ingestion form the website we need to scrape the data
from langchain_community.document_loaders import WebBaseLoader

USER_AGENT environment variable not set, consider setting it to identify your requests.


In [4]:
loader=WebBaseLoader("https://platform.openai.com/docs/api-reference/introduction")
loader

<langchain_community.document_loaders.web_base.WebBaseLoader at 0x1272d0150>

In [5]:
docs=loader.load()
docs

[Document(metadata={'source': 'https://platform.openai.com/docs/api-reference/introduction', 'language': 'No language found.'}, page_content='\n\n\n\n\n\n\n\n\n\n\n\nEnable JavaScript and cookies to continue\n\n\n\n')]

In [6]:
# load our data
from langchain_text_splitters import RecursiveCharacterTextSplitter

text_splitter=RecursiveCharacterTextSplitter(chunk_size=1000,chunk_overlap=200)
documents=text_splitter.split_documents(docs)
documents

[Document(metadata={'source': 'https://platform.openai.com/docs/api-reference/introduction', 'language': 'No language found.'}, page_content='Enable JavaScript and cookies to continue')]

In [7]:
from langchain_openai import OpenAIEmbeddings

embeddings=OpenAIEmbeddings()


In [8]:
from langchain_community.vectorstores import FAISS

vectordb=FAISS.from_documents(documents,embeddings)
vectordb

<langchain_community.vectorstores.faiss.FAISS at 0x127ff46d0>

In [31]:
query="This API reference describes the RESTful, streaming, and realtime APIs you can use to interact with the OpenAI platform"
result=vectordb.similarity_search(query)
result[0].page_content

'Enable JavaScript and cookies to continue'

In [9]:
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate
from langchain.callbacks.tracers.langchain import LangChainTracer
from langchain_openai import ChatOpenAI

# 1. Initialize tracer
tracer = LangChainTracer()

# 2. Initialize LLM
llm = ChatOpenAI(model="gpt-4o")

# 3. Create prompt template
prompt = ChatPromptTemplate.from_template(
    """
    Answer the question based on the provided context:
    <context>
    {context}
    </context>
    
    Question: {input}
    """
)

# 4. Create document chain (LLM + Prompt)
document_chain = create_stuff_documents_chain(llm, prompt)
document_chain


RunnableBinding(bound=RunnableBinding(bound=RunnableAssign(mapper={
  context: RunnableLambda(format_docs)
}), kwargs={}, config={'run_name': 'format_inputs'}, config_factories=[])
| ChatPromptTemplate(input_variables=['context', 'input'], input_types={}, partial_variables={}, messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context', 'input'], input_types={}, partial_variables={}, template='\n    Answer the question based on the provided context:\n    <context>\n    {context}\n    </context>\n\n    Question: {input}\n    '), additional_kwargs={})])
| ChatOpenAI(client=<openai.resources.chat.completions.completions.Completions object at 0x131ada550>, async_client=<openai.resources.chat.completions.completions.AsyncCompletions object at 0x131adb4d0>, root_client=<openai.OpenAI object at 0x131ada290>, root_async_client=<openai.AsyncOpenAI object at 0x131adb110>, model_name='gpt-4o', model_kwargs={}, openai_api_key=SecretStr('**********'))
| StrOutputParser(), 

In [33]:
from langchain_core.documents import Document
document_chain.invoke({
    "input":"This API reference describes the RESTful, streaming",
    "context":[Document(page_content=("This API reference describes the RESTful, streaming, and realtime APIs you can use to interact with the OpenAI platform"))]
    }
    , config={"callbacks": [tracer]})

'and realtime APIs you can use to interact with the OpenAI platform.'

In [34]:
# retriver
vectordb

<langchain_community.vectorstores.faiss.FAISS at 0x16b6f1010>

In [37]:
# retriver
retriever=vectordb.as_retriever()
from langchain.chains import create_retrieval_chain

retriever_chain=create_retrieval_chain(retriever,document_chain)

In [38]:
retriever_chain

RunnableBinding(bound=RunnableAssign(mapper={
  context: RunnableBinding(bound=RunnableLambda(lambda x: x['input'])
           | VectorStoreRetriever(tags=['FAISS', 'OpenAIEmbeddings'], vectorstore=<langchain_community.vectorstores.faiss.FAISS object at 0x16b6f1010>, search_kwargs={}), kwargs={}, config={'run_name': 'retrieve_documents'}, config_factories=[])
})
| RunnableAssign(mapper={
    answer: RunnableBinding(bound=RunnableBinding(bound=RunnableAssign(mapper={
              context: RunnableLambda(format_docs)
            }), kwargs={}, config={'run_name': 'format_inputs'}, config_factories=[])
            | ChatPromptTemplate(input_variables=['context', 'input'], input_types={}, partial_variables={}, messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context', 'input'], input_types={}, partial_variables={}, template='\n    Answer the question based on the provided context:\n    <context>\n    {context}\n    </context>\n\n    Question: {input}\n    '), a

In [40]:
response = retriever_chain.invoke({"input": "This API reference describes the RESTful, streaming"})
response['answer']


'Unfortunately, the context provided is insufficient to fully describe or answer the question regarding the RESTful streaming API reference. Generally, a RESTful API is a design pattern for interacting with web services where resources are identified by URLs and verbs like GET, POST, PUT, and DELETE are used for operations. Streaming refers to a data transport design where data is delivered in a continuous flow, allowing real-time updates.\n\nPlease provide more detailed context or specify the question further for an accurate response.'