### Loading Documents From Web

In [22]:
from langchain_community.document_loaders import WebBaseLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter

In [23]:
web_paths = [
    'https://python.langchain.com/docs/introduction/',
    'https://docs.smith.langchain.com/',
    'https://www.ibm.com/think/topics/ai-agents'
]

loader = WebBaseLoader(web_paths=web_paths)
documents = loader.load()

In [24]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
final_documents = text_splitter.split_documents(documents)

In [25]:
final_documents

[Document(metadata={'source': 'https://python.langchain.com/docs/introduction/', 'title': 'Introduction | \uf8ffü¶úÔ∏è\uf8ffüîó LangChain', 'description': 'LangChain is a framework for developing applications powered by large language models (LLMs).', 'language': 'en'}, page_content='Introduction | \uf8ffü¶úÔ∏è\uf8ffüîó LangChain'),
 Document(metadata={'source': 'https://python.langchain.com/docs/introduction/', 'title': 'Introduction | \uf8ffü¶úÔ∏è\uf8ffüîó LangChain', 'description': 'LangChain is a framework for developing applications powered by large language models (LLMs).', 'language': 'en'}, page_content='Skip to main contentJoin us at  Interrupt: The Agent AI Conference by LangChain on May 13 & 14 in San Francisco!IntegrationsAPI ReferenceMoreContributingPeopleError referenceLangSmithLangGraphLangChain HubLangChain JS/TSv0.3v0.3v0.2v0.1\uf8ffüí¨SearchIntroductionTutorialsBuild a Question Answering application over a Graph DatabaseTutorialsBuild a simple LLM application with cha

### Using Ollama Embeddings for Vector Embedding

In [11]:
from langchain_ollama import OllamaEmbeddings

In [42]:
embedding = OllamaEmbeddings(model='nomic-embed-text')

In [43]:
embedding.embed_query('Hello World')

[-0.0068027154,
 -0.0013408619,
 -0.17139174,
 0.0084814895,
 0.0057692737,
 0.06988072,
 -0.00017170164,
 -0.04308937,
 -0.014579962,
 -0.054102205,
 0.00048797898,
 0.039168864,
 0.027731339,
 0.08086464,
 0.045351997,
 -0.06291869,
 0.010284686,
 -0.029609116,
 -0.042803206,
 0.029637048,
 -0.0037316906,
 -0.09433132,
 -0.0076254783,
 0.038094886,
 0.09215726,
 -0.014345816,
 -0.01505932,
 0.061599556,
 0.0065142475,
 -0.022007689,
 -0.0012207448,
 -0.010932164,
 -0.00020625598,
 0.01566597,
 0.0394505,
 0.0027252552,
 0.032584246,
 0.017258106,
 0.01629258,
 0.005911865,
 -0.004685292,
 -0.014823131,
 0.0120996,
 0.010172366,
 0.065933235,
 -0.0015011014,
 -0.004195007,
 0.00031153136,
 0.08684402,
 -0.060564775,
 -0.018206414,
 0.0053304154,
 -0.0009433416,
 0.060220506,
 0.06722448,
 0.035398144,
 0.049684584,
 -0.06165393,
 0.02424063,
 0.03454023,
 0.02173439,
 0.043676823,
 0.032960273,
 0.06524403,
 -0.017497044,
 -0.033589207,
 -0.025221474,
 0.03545324,
 -0.0027342543,
 0.0

In [44]:
from langchain_chroma import Chroma

In [45]:
vectordb = Chroma.from_documents(documents=final_documents, embedding=embedding)

In [46]:
vectordb.similarity_search('what is ai agent')

[Document(metadata={'description': 'An artificial intelligence (AI) agent refers to a system or program that is capable of autonomously performing tasks on behalf of a user or another system.', 'language': 'en', 'source': 'https://www.ibm.com/think/topics/ai-agents', 'title': 'What Are AI Agents? | IBM'}, page_content='What Are AI Agents? | IBM\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n                        \n\n\n\n  \n    What are AI agents?\n\n\n\n\n\n\n    \n\n\n                    \n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n                                    Artificial Intelligence\n                                \n\n\n\n\n\n\n                    \n\n\n\n  \n    3 July 2024\n\n\n\n\n\n\n    \n\n\n                \n\n\n\n\n\n\n\n\n\n\n\n\n                Link copied\n            \n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n

### Using Different Embedding model and Different Vector Store

In [28]:
embedding = OllamaEmbeddings(model='mxbai-embed-large:335m')

In [29]:
embedding.embed_query('what is ai')

[0.004859622,
 0.031150693,
 -0.04210219,
 -0.022775881,
 -1.8101558e-05,
 -0.031040987,
 -0.016380563,
 0.0017737326,
 -0.0017338288,
 0.034942616,
 -0.015602057,
 0.026915608,
 -0.053023454,
 -0.028119445,
 -0.013526653,
 -0.0008373663,
 -0.01467309,
 -0.011266194,
 -0.016141256,
 0.0082986755,
 0.0147023145,
 0.055188905,
 -0.013396988,
 -0.021682573,
 -0.020040149,
 0.033651672,
 0.018008025,
 0.006789289,
 0.033399016,
 0.019730225,
 -0.03129471,
 -0.011517168,
 -0.004386699,
 -0.054803535,
 -0.007379547,
 -0.030446047,
 0.004319304,
 -0.04196533,
 -0.047534186,
 -0.07243464,
 0.012549937,
 0.011677595,
 -0.007757888,
 -0.037536424,
 -0.08139277,
 0.047794737,
 0.0019898582,
 -0.025822636,
 0.0041138506,
 -0.012085761,
 -0.023024607,
 0.03454906,
 0.02904057,
 0.01829884,
 -0.052439954,
 -0.0569482,
 0.015555334,
 -0.015957244,
 -0.011525958,
 0.0060397866,
 0.05096146,
 -0.046360675,
 0.024095025,
 -0.026252093,
 -0.011294547,
 -0.0021465456,
 -0.007831822,
 -0.027421976,
 0.0110

In [30]:
from langchain_objectbox import ObjectBox

In [32]:
vectorstore = ObjectBox.from_documents(documents=final_documents, embedding=embedding, embedding_dimensions=768)

In [33]:
vectorstore.similarity_search('what is ai agent')

[Document(metadata={'description': 'An artificial intelligence (AI) agent refers to a system or program that is capable of autonomously performing tasks on behalf of a user or another system.', 'language': 'en', 'source': 'https://www.ibm.com/think/topics/ai-agents', 'title': 'What Are AI Agents? | IBM'}, page_content='What Are AI Agents? | IBM\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n                        \n\n\n\n  \n    What are AI agents?\n\n\n\n\n\n\n    \n\n\n                    \n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n                                    Artificial Intelligence\n                                \n\n\n\n\n\n\n                    \n\n\n\n  \n    3 July 2024\n\n\n\n\n\n\n    \n\n\n                \n\n\n\n\n\n\n\n\n\n\n\n\n                Link copied\n            \n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n

In [34]:
retriever = vectorstore.as_retriever(
    search_kwargs = {'k' : 5}
)

In [35]:
retriever.invoke('what is langchain')

[Document(metadata={'description': 'LangChain is a framework for developing applications powered by large language models (LLMs).', 'language': 'en', 'source': 'https://python.langchain.com/docs/introduction/', 'title': 'Introduction | \uf8ffü¶úÔ∏è\uf8ffüîó LangChain'}, page_content='LangChain is a framework for developing applications powered by large language models (LLMs).\nLangChain simplifies every stage of the LLM application lifecycle:'),
 Document(metadata={'description': 'LangChain is a framework for developing applications powered by large language models (LLMs).', 'language': 'en', 'source': 'https://python.langchain.com/docs/introduction/', 'title': 'Introduction | \uf8ffü¶úÔ∏è\uf8ffüîó LangChain'}, page_content="Explore the full list of LangChain tutorials here, and check out other LangGraph tutorials here. To learn more about LangGraph, check out our first LangChain Academy course, Introduction to LangGraph, available here.\nHow-to guides‚Äã\nHere you‚Äôll find short answ

In [36]:
vectorstore.similarity_search_with_score('what is langchain')

[(Document(metadata={'description': 'LangChain is a framework for developing applications powered by large language models (LLMs).', 'language': 'en', 'source': 'https://python.langchain.com/docs/introduction/', 'title': 'Introduction | \uf8ffü¶úÔ∏è\uf8ffüîó LangChain'}, page_content='LangChain is a framework for developing applications powered by large language models (LLMs).\nLangChain simplifies every stage of the LLM application lifecycle:'),
  0.16555869579315186),
 (Document(metadata={'description': 'LangChain is a framework for developing applications powered by large language models (LLMs).', 'language': 'en', 'source': 'https://python.langchain.com/docs/introduction/', 'title': 'Introduction | \uf8ffü¶úÔ∏è\uf8ffüîó LangChain'}, page_content="Explore the full list of LangChain tutorials here, and check out other LangGraph tutorials here. To learn more about LangGraph, check out our first LangChain Academy course, Introduction to LangGraph, available here.\nHow-to guides‚Äã\nHer

In [39]:
vectorstore.similarity_search_by_vector(embedding.embed_query('what is langchain'))

[Document(metadata={'description': 'LangChain is a framework for developing applications powered by large language models (LLMs).', 'language': 'en', 'source': 'https://python.langchain.com/docs/introduction/', 'title': 'Introduction | \uf8ffü¶úÔ∏è\uf8ffüîó LangChain'}, page_content='LangChain is a framework for developing applications powered by large language models (LLMs).\nLangChain simplifies every stage of the LLM application lifecycle:'),
 Document(metadata={'description': 'LangChain is a framework for developing applications powered by large language models (LLMs).', 'language': 'en', 'source': 'https://python.langchain.com/docs/introduction/', 'title': 'Introduction | \uf8ffü¶úÔ∏è\uf8ffüîó LangChain'}, page_content="Explore the full list of LangChain tutorials here, and check out other LangGraph tutorials here. To learn more about LangGraph, check out our first LangChain Academy course, Introduction to LangGraph, available here.\nHow-to guides‚Äã\nHere you‚Äôll find short answ