#### web scrapper application
    User question → retrieve relevant chunks → inject into prompt → LLM answers using only that context
- https://www.udemy.com/course/complete-agentic-ai-bootcamp-with-langgraph-and-langchain/learn/lecture/50044441?components=add_to_cart%2Cavailable_coupons%2Cbase_purchase_section%2Cbuy_button%2Cbuy_for_team%2Ccacheable_buy_button%2Ccacheable_deal_badge%2Ccacheable_discount_expiration%2Ccacheable_price_text%2Ccacheable_purchase_text%2Ccurated_for_ufb_notice_context%2Ccurriculum_context%2Cdeal_badge%2Cdiscount_expiration%2Cgift_this_course%2Cincentives%2Cinstructor_links%2Clifetime_access_context%2Cmoney_back_guarantee%2Cprice_text%2Cpurchase_tabs_context%2Cpurchase%2Crecommendation%2Credeem_coupon%2Csidebar_container%2Cpurchase_body_container%2Cone_click_checkout&couponCode=25BBPMXINACTIVE#questions

In [9]:
import os
from dotenv import load_dotenv

load_dotenv()

os.environ["OPENAI_API_KEY"] = os.environ.get("OPENAI_API_KEY")
# langsmith tracking
os.environ["LANGCHAIN_API_KEY"] = os.environ.get("LANGCHAIN_API_KEY")
os.environ["LANGCHAIN_TRACING_V2"] = "true"
os.environ["LANGCHAIN_PROJECT2_NAME"] = os.environ.get("LANGCHAIN_PROJECT2_NAME")

##### Ingest documents

In [12]:
from langchain_community.document_loaders import WebBaseLoader

loader = WebBaseLoader(web_path=("https://quotes.toscrape.com/page/1/"))
docs = loader.load()

Document(metadata={'source': 'https://quotes.toscrape.com/page/1/', 'title': 'Quotes to Scrape', 'language': 'en'}, page_content="\n\n\n\nQuotes to Scrape\n\n\n\n\n\n\n\n\nQuotes to Scrape\n\n\n\n\nLogin\n\n\n\n\n\n\n“The world as we have created it is a process of our thinking. It cannot be changed without changing our thinking.”\nby Albert Einstein\n(about)\n\n\n            Tags:\n            \nchange\ndeep-thoughts\nthinking\nworld\n\n\n\n“It is our choices, Harry, that show what we truly are, far more than our abilities.”\nby J.K. Rowling\n(about)\n\n\n            Tags:\n            \nabilities\nchoices\n\n\n\n“There are only two ways to live your life. One is as though nothing is a miracle. The other is as though everything is a miracle.”\nby Albert Einstein\n(about)\n\n\n            Tags:\n            \ninspirational\nlife\nlive\nmiracle\nmiracles\n\n\n\n“The person, be it gentleman or lady, who has not pleasure in a good novel, must be intolerably stupid.”\nby Jane Austen\n(abou

##### Split documents

In [14]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
texts = text_splitter.split_documents(docs)

##### create embeddings and store them in database

In [15]:
from langchain_openai import OpenAIEmbeddings
from langchain_community.vectorstores import FAISS

embeddings = OpenAIEmbeddings()

db = FAISS.from_documents(texts, embeddings)

##### Perform similarity search

In [20]:
query = "by Albert Einstein"

results = db.similarity_search(query)
results[0].page_content

'Quotes to Scrape\n\n\n\n\n\n\n\n\nQuotes to Scrape\n\n\n\n\nLogin\n\n\n\n\n\n\n“The world as we have created it is a process of our thinking. It cannot be changed without changing our thinking.”\nby Albert Einstein\n(about)\n\n\n            Tags:\n            \nchange\ndeep-thoughts\nthinking\nworld\n\n\n\n“It is our choices, Harry, that show what we truly are, far more than our abilities.”\nby J.K. Rowling\n(about)\n\n\n            Tags:\n            \nabilities\nchoices\n\n\n\n“There are only two ways to live your life. One is as though nothing is a miracle. The other is as though everything is a miracle.”\nby Albert Einstein\n(about)\n\n\n            Tags:\n            \ninspirational\nlife\nlive\nmiracle\nmiracles\n\n\n\n“The person, be it gentleman or lady, who has not pleasure in a good novel, must be intolerably stupid.”\nby Jane Austen\n(about)\n\n\n            Tags:\n            \naliteracy\nbooks\nclassic\nhumor'

##### Retreival Chain, Document Chain
https://stackoverflow.com/questions/79807773/using-create-retrieval-chain-due-to-retrievalqa-deprecation

In [None]:
from langchain_classic.chains import create_retrieval_chain
from langchain_classic.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate
from langchain_openai import ChatOpenAI

retriever = db.as_retriever()

llm = ChatOpenAI(
    model="gpt-4o-2024-08-06",
)

system_prompt = (
    "Use the given context to answer the question. "
    "If you don't know the answer, say you don't know. "
    "Use three sentence maximum and keep the answer concise. "
    "Context: {context}"
)
prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        ("human", "{input}"),
    ]
)
document_chain = create_stuff_documents_chain(llm, prompt) ## responsible to fill the context in the prompt
retrieval_chain = create_retrieval_chain(retriever, document_chain)

response = retrieval_chain.invoke({"input": query})
print(response)
print(response["answer"])

{'input': 'by Albert Einstein', 'context': [Document(id='b5d60c4d-3312-466e-b73f-dce25311c8a9', metadata={'source': 'https://quotes.toscrape.com/page/1/', 'title': 'Quotes to Scrape', 'language': 'en'}, page_content='Quotes to Scrape\n\n\n\n\n\n\n\n\nQuotes to Scrape\n\n\n\n\nLogin\n\n\n\n\n\n\n“The world as we have created it is a process of our thinking. It cannot be changed without changing our thinking.”\nby Albert Einstein\n(about)\n\n\n            Tags:\n            \nchange\ndeep-thoughts\nthinking\nworld\n\n\n\n“It is our choices, Harry, that show what we truly are, far more than our abilities.”\nby J.K. Rowling\n(about)\n\n\n            Tags:\n            \nabilities\nchoices\n\n\n\n“There are only two ways to live your life. One is as though nothing is a miracle. The other is as though everything is a miracle.”\nby Albert Einstein\n(about)\n\n\n            Tags:\n            \ninspirational\nlife\nlive\nmiracle\nmiracles\n\n\n\n“The person, be it gentleman or lady, who has no