### SmartScrape Project

In [46]:
import os
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_google_genai import GoogleGenerativeAIEmbeddings
from langchain_community.document_loaders import WebBaseLoader
import faiss
from langchain_community.docstore.in_memory import InMemoryDocstore
from langchain_community.vectorstores import FAISS
from dotenv import load_dotenv
load_dotenv()



# loading env varables
os.environ["GOOGLE_API_KEY"] = os.getenv("GOOGLE_API_KEY")
os.environ["LANGSMITH_TRACING"] = os.getenv("LANGSMITH_TRACING")
os.environ["LANGSMITH_PROJECT"] = os.getenv("LANGSMITH_PROJECT")
os.environ["LANGSMITH_API_KEY"] = os.getenv("LANGSMITH_API_KEY")



In [47]:
"GOOGLE_API_KEY" in os.environ

True

In [48]:
page_url = "https://python.langchain.com/docs/how_to/chatbots_memory/"
loader = WebBaseLoader(web_paths=[page_url])

In [49]:
docs = loader.load()
docs

[Document(metadata={'source': 'https://python.langchain.com/docs/how_to/chatbots_memory/', 'title': 'How to add memory to chatbots | 🦜️🔗 LangChain', 'description': 'A key feature of chatbots is their ability to use the content of previous conversational turns as context. This state management can take several forms, including:', 'language': 'en'}, page_content='\n\n\n\n\nHow to add memory to chatbots | 🦜️🔗 LangChain\n\n\n\n\n\n\nSkip to main contentOur Building Ambient Agents with LangGraph course is now available on LangChain Academy!IntegrationsAPI ReferenceMoreContributingPeopleError referenceLangSmithLangGraphLangChain HubLangChain JS/TSv0.3v0.3v0.2v0.1💬SearchIntroductionTutorialsBuild a Question Answering application over a Graph DatabaseTutorialsBuild a simple LLM application with chat models and prompt templatesBuild a ChatbotBuild a Retrieval Augmented Generation (RAG) App: Part 2Build an Extraction ChainBuild an AgentTaggingBuild a Retrieval Augmented Generation (RAG) App: Par

In [50]:
# splitting of text
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=600,
    chunk_overlap=100,
    
)

texts = text_splitter.split_documents(docs)

In [51]:
# google chatmodel 
llm = ChatGoogleGenerativeAI(
    model="gemini-2.0-flash",
)

# google Embedding Model 
embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")

In [52]:
# vector storage 

index = faiss.IndexFlatL2(len(embeddings.embed_query("hello world")))

vector_store = FAISS(
    embedding_function=embeddings,
    index=index,
    docstore=InMemoryDocstore(),
    index_to_docstore_id={},
    )

vector_store.add_documents(documents=texts)

['c8e2de77-aef7-4032-9701-0727df9f2a8d',
 '2d61e485-0a10-4162-a746-133435c15cc7',
 '0d50e3fc-2fec-4fa6-81d8-89de1b1f1f85',
 '9877e301-e572-40b8-a402-7f6a29e3c83f',
 '8b033daa-ebba-48e9-a419-cd8d62cf2bdb',
 '75b60341-cfa2-427d-9947-e118eac99a62',
 'baea4401-cbc1-4a51-ad2c-7a070d2b1d62',
 'b6edcd9f-6382-4d4c-8b7b-d462e84eb949',
 '2d01460c-c9ef-4b58-a9bf-0cdece070962',
 'ff756e9e-cfa2-42b5-ab5a-fb0c171595ec',
 '3447a6e0-85eb-4adc-a687-5cee03e54fa0',
 '07e61aa1-1248-4131-a17b-6c5f9d6ffc69',
 'd37becf6-4522-4cd4-ade2-2349340585f0',
 'cd3631fa-f9f9-476e-8708-0185db13511f',
 'addd7616-271d-4c13-949d-c4cd4a261526',
 '863367d3-6da1-4618-9c07-f7f03680b25e',
 'e3b0473f-95ab-4363-964a-47f108b82c3c',
 '2db9ab80-aaae-4d63-98fe-c932ae219c5c',
 'caa94168-8c89-4972-aea0-99ae6597a62f',
 '40dd65a3-72d5-41ec-b23f-d9fd7295763a',
 '5c420ce3-bdce-40e7-9efb-0c6d2325d758',
 '91c22219-f192-446c-8485-37367f453f78',
 '04d1b2af-1299-40f6-8c45-f3f08452b538',
 '89f06347-afd7-4893-b6c6-36d41c452d75',
 'd0e0c3d5-3cc8-

In [53]:
query ="Automatic history management"
print(vector_store.similarity_search(query)[0].page_content)

Automatic history management​
The previous examples pass messages to the chain (and model) explicitly. This is a completely acceptable approach, but it does require external management of new messages. LangChain also provides a way to build applications that have memory using LangGraph's persistence. You can enable persistence in LangGraph applications by providing a checkpointer when compiling the graph.


In [54]:
retriver = vector_store.as_retriever()

In [84]:
# This chain takes a list of documents and formats them all into a prompt, then passes that prompt to an LLM. 


from langchain.chains.combine_documents import create_stuff_documents_chain

from langchain_core.prompts import ChatPromptTemplate
# 2. Create a prompt template
prompt = ChatPromptTemplate.from_template("""
You are a helpful assistant. Given the following context, answer the question.

Context:
{context}

Question: 

{input}

""")

# prompt = ChatPromptTemplate([
#     ("system","your assitance")
# ])

document_chain = create_stuff_documents_chain(llm,prompt)   

In [85]:
from langchain.chains import create_retrieval_chain

retrieval_chain = create_retrieval_chain(retriver, document_chain)
retrieval_chain

RunnableBinding(bound=RunnableAssign(mapper={
  context: RunnableBinding(bound=RunnableLambda(lambda x: x['input'])
           | VectorStoreRetriever(tags=['FAISS', 'GoogleGenerativeAIEmbeddings'], vectorstore=<langchain_community.vectorstores.faiss.FAISS object at 0x7ddf5a02d100>, search_kwargs={}), kwargs={}, config={'run_name': 'retrieve_documents'}, config_factories=[])
})
| RunnableAssign(mapper={
    answer: RunnableBinding(bound=RunnableBinding(bound=RunnableAssign(mapper={
              context: RunnableLambda(format_docs)
            }), kwargs={}, config={'run_name': 'format_inputs'}, config_factories=[])
            | ChatPromptTemplate(input_variables=['context', 'input'], input_types={}, partial_variables={}, messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context', 'input'], input_types={}, partial_variables={}, template='\nYou are a helpful assistant. Given the following context, answer the question.\n\nContext:\n{context}\n\nQuestion: \n\n{i

In [92]:
res =retrieval_chain.invoke({"input": "what is Automatic history management?"})

In [93]:
print(res["answer"])

Automatic history management in LangChain refers to building applications with memory using LangGraph's persistence. This is achieved by providing a checkpointer when compiling the graph, which allows the application to automatically manage and retain conversation history.


In [94]:
res

{'input': 'what is Automatic history management?',
 'context': [Document(id='57e6bf17-30fa-449d-99b9-ac713ead4210', metadata={'source': 'https://python.langchain.com/docs/how_to/chatbots_memory/', 'title': 'How to add memory to chatbots | 🦜️🔗 LangChain', 'description': 'A key feature of chatbots is their ability to use the content of previous conversational turns as context. This state management can take several forms, including:', 'language': 'en'}, page_content="Automatic history management\u200b\nThe previous examples pass messages to the chain (and model) explicitly. This is a completely acceptable approach, but it does require external management of new messages. LangChain also provides a way to build applications that have memory using LangGraph's persistence. You can enable persistence in LangGraph applications by providing a checkpointer when compiling the graph."),
  Document(id='9faf1de3-86bf-47d8-99a0-1add702d12c9', metadata={'source': 'https://python.langchain.com/docs/how