In [3]:
# Simple GenAI App using LangChain
import os
from dotenv import load_dotenv 

load_dotenv()

os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")

## Langsmith tracking. 
os.environ["LANGCHAIN_API_KEY"] = os.getenv("LANGCHAIN_API_KEY")
os.environ["LANGCHAIN_TRACING_V2"] = "true"
os.environ["LANGCHAIN_PROJECT"] = os.getenv("LANGCHAIN_PROJECT")

In [4]:
## Data ingestion - From website we need to scrape the data first. 

from langchain_community.document_loaders import WebBaseLoader


In [14]:
loader=WebBaseLoader("https://en.wikipedia.org/wiki/Boeing_777#777-300ER_(B77W)")
loader

<langchain_community.document_loaders.web_base.WebBaseLoader at 0x266db943100>

In [15]:
docs = loader.load() 
docs 

[Document(metadata={'source': 'https://en.wikipedia.org/wiki/Boeing_777#777-300ER_(B77W)', 'title': 'Boeing 777 - Wikipedia', 'language': 'en'}, page_content='\n\n\n\nBoeing 777 - Wikipedia\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nJump to content\n\n\n\n\n\n\n\nMain menu\n\n\n\n\n\nMain menu\nmove to sidebar\nhide\n\n\n\n\t\tNavigation\n\t\n\n\nMain pageContentsCurrent eventsRandom articleAbout WikipediaContact us\n\n\n\n\n\n\t\tContribute\n\t\n\n\nHelpLearn to editCommunity portalRecent changesUpload fileSpecial pages\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nSearch\n\n\n\n\n\n\n\n\n\n\n\nSearch\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nAppearance\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nDonate\n\nCreate account\n\nLog in\n\n\n\n\n\n\n\n\nPersonal tools\n\n\n\n\n\nDonate Create account Log in\n\n\n\n\n\n\t\tPages for logged out editors learn more\n\n\n\nContributionsTalk\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nContents\nmove to sidebar\nhide\n\n\n\n\n(Top)

In [None]:
### Load Data --> Docs --> Divide our documents into chunk documents --> Texts --> Vectors --> Create Vector Embeddings --> Store in Vector DB 

from langchain.text_splitter import RecursiveCharacterTextSplitter 
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
texts = text_splitter.split_documents(docs)

texts 



[Document(metadata={'source': 'https://en.wikipedia.org/wiki/Boeing_777#777-300ER_(B77W)', 'title': 'Boeing 777 - Wikipedia', 'language': 'en'}, page_content='Boeing 777 - Wikipedia\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nJump to content\n\n\n\n\n\n\n\nMain menu\n\n\n\n\n\nMain menu\nmove to sidebar\nhide\n\n\n\n\t\tNavigation\n\t\n\n\nMain pageContentsCurrent eventsRandom articleAbout WikipediaContact us\n\n\n\n\n\n\t\tContribute\n\t\n\n\nHelpLearn to editCommunity portalRecent changesUpload fileSpecial pages\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nSearch\n\n\n\n\n\n\n\n\n\n\n\nSearch\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nAppearance\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nDonate\n\nCreate account\n\nLog in\n\n\n\n\n\n\n\n\nPersonal tools\n\n\n\n\n\nDonate Create account Log in\n\n\n\n\n\n\t\tPages for logged out editors learn more\n\n\n\nContributionsTalk\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nContents\nmove to sidebar\nhide\n\n\n\n\n(Top)\n\n\n\n

In [17]:
from langchain_openai import OpenAIEmbeddings
embeddings = OpenAIEmbeddings()



In [18]:
from langchain_community.vectorstores import FAISS 
vectorstoredb = FAISS.from_documents(texts, embeddings)
vectorstoredb

<langchain_community.vectorstores.faiss.FAISS at 0x266db51bd90>

In [20]:
query = "Triple Seven" 

result = vectorstoredb.similarity_search(query)
result[0].page_content

'The Triple Seven can accommodate a ten–abreast seating layout and has a typical 3-class capacity of 301 to 368 passengers, with a range of 5,240 to 8,555 nautical miles [nmi] (9,700 to 15,840\xa0km; 6,030 to 9,840\xa0mi). The jetliner is recognizable for its large-diameter turbofan engines, raked wingtips, six wheels on each main landing gear, fully circular fuselage cross-section, and a blade-shaped tail cone. The 777 became the first Boeing airliner to use fly-by-wire controls and to apply a carbon composite structure in the tailplanes.'

In [29]:
## Retrieval Chain, Document chain. 

from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate 

prompt=ChatPromptTemplate.from_template(
    """
Answer the following questions based on the provided context: 
<context>
{context}
</context>


"""
)


In [None]:
from langchain_openai import ChatOpenAI
llm = ChatOpenAI(model="gpt-4o")

document_chain=create_stuff_documents_chain(llm, prompt)
document_chain


RunnableBinding(bound=RunnableBinding(bound=RunnableAssign(mapper={
  context: RunnableLambda(format_docs)
}), kwargs={}, config={'run_name': 'format_inputs'}, config_factories=[])
| ChatPromptTemplate(input_variables=['context'], input_types={}, partial_variables={}, messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context'], input_types={}, partial_variables={}, template='\nAnswer the following questions based on the provided context: \n<context>\n{context}\n</context>\n\n'), additional_kwargs={})])
| ChatOpenAI(client=<openai.resources.chat.completions.completions.Completions object at 0x00000266C2DCCD70>, async_client=<openai.resources.chat.completions.completions.AsyncCompletions object at 0x00000266C2DCD160>, root_client=<openai.OpenAI object at 0x00000266C297C190>, root_async_client=<openai.AsyncOpenAI object at 0x00000266C297C7D0>, model_name='gpt-4o', model_kwargs={}, openai_api_key=SecretStr('**********'))
| StrOutputParser(), kwargs={}, config={'r

In [32]:
from langchain_core.documents import Document

document_chain.invoke({
    "input": "large-diameter turbofan engines", 
    "context": [Document(page_content="The jetliner is recognizable for its large-diameter turbofan engines, raked wingtips, six wheels on each main landing gear")]
    })



'Sure, please go ahead and ask your questions related to the provided context.'

In [35]:
### Retrieval 

retriveal = vectorstoredb.as_retriever()
from langchain.chains import create_retrieval_chain
retriveal_chain = create_retrieval_chain(retriveal, document_chain)


In [37]:
response = retriveal_chain.invoke({"input": "What is Triple Seven?"})
response['answer']


"1. What is the seating capacity range for the Boeing 777 in a typical 3-class configuration?\n   - The Boeing 777 can accommodate between 301 to 368 passengers in a typical 3-class configuration.\n\n2. What range does the Boeing 777 cover in nautical miles?\n   - The Boeing 777 has a range of 5,240 to 8,555 nautical miles.\n\n3. What is a distinguishing design feature of the Boeing 777's fuselage?\n   - The Boeing 777 features a fully circular fuselage cross-section.\n\n4. What significant technological advancement did the Boeing 777 introduce in its control system?\n   - The Boeing 777 was the first Boeing airliner to use fly-by-wire controls.\n\n5. Who were the primary users of the Boeing 777 as mentioned in the context?\n   - The primary users mentioned are Emirates, United Airlines, Cathay Pacific, and Air France.\n\n6. How many Boeing 777 aircraft had been built as of August 2025?\n   - As of August 2025, 1,767 Boeing 777 aircraft had been built.\n\n7. When was the Boeing 777 fir