In [19]:
from langchain_community.vectorstores import Chroma
from langchain_huggingface.embeddings import HuggingFaceEmbeddings
from langchain_core.documents import Document
from langchain_classic.retrievers import MultiQueryRetriever
from langchain_huggingface import ChatHuggingFace, HuggingFaceEndpoint
from dotenv import load_dotenv
import os
load_dotenv()
api_key = os.getenv("HUGGINGFACE_API_KEY")

In [12]:
documents = [
    Document(page_content="Langchain is easy to use.", metadata={"source": "test1.txt"}),
    # 4 Similar Documents (Topic: Artificial Intelligence)
    Document(page_content="Artificial Intelligence (AI) is the simulation of human intelligence processes by machines.", metadata={"source": "test2.txt"}),
    Document(page_content="AI systems include expert systems, natural language processing, speech recognition phrases.", metadata={"source": "test3.txt"}),
    Document(page_content="Machine learning is a subset of AI that allows systems to learn from data patterns.", metadata={"source": "test4.txt"}),
    Document(page_content="Deep learning is a type of machine learning based on artificial neural networks.", metadata={"source": "test5.txt"}),

    # 4 Different Documents (Topic: Cricket)
    Document(page_content="Cricket is a bat-and-ball game played between two teams of eleven players.", metadata={"source": "test6.txt"}),
    Document(page_content="The game is played on a field at the centre of which is a 22-yard pitch.", metadata={"source": "test7.txt"}),
    Document(page_content="Sachin Tendulkar is often regarded as one of the greatest batsmen in cricket history.", metadata={"source": "test8.txt"}),
    Document(page_content="The ICC World Cup is the most prestigious tournament in international cricket.", metadata={"source": "test9.txt"})
]

In [13]:

embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")


vectorstore = Chroma.from_documents(documents=documents, embedding=embedding_model, collection_name="sample_collection")



Loading weights: 100%|██████████| 103/103 [00:00<00:00, 213.59it/s, Materializing param=pooler.dense.weight]                             
[1mBertModel LOAD REPORT[0m from: sentence-transformers/all-MiniLM-L6-v2
Key                     | Status     |  | 
------------------------+------------+--+-
embeddings.position_ids | UNEXPECTED |  | 

[3mNotes:
- UNEXPECTED[3m	:can be ignored when loading from different task/architecture; not ok if you expect identical arch.[0m


In [23]:
llm = HuggingFaceEndpoint(
    repo_id="meta-llama/Llama-3.1-8B-Instruct",
    task="text-generation",
    huggingfacehub_api_token=api_key,
)

# model = ChatHuggingFace(llm=llm)

In [29]:
multi_retriver = MultiQueryRetriever.from_llm(vectorstore.as_retriever(search= "mmr" ,search_kwargs={"k": 3}), llm= ChatHuggingFace(llm=llm))

In [30]:
query = "Is AI safe for human?"

result = multi_retriver.invoke(query)

print(result)

[Document(metadata={'source': 'test4.txt'}, page_content='Machine learning is a subset of AI that allows systems to learn from data patterns.'), Document(metadata={'source': 'test3.txt'}, page_content='AI systems include expert systems, natural language processing, speech recognition phrases.'), Document(metadata={'source': 'test2.txt'}, page_content='Artificial Intelligence (AI) is the simulation of human intelligence processes by machines.')]


In [31]:
for item in result:
    print(item.page_content)

Machine learning is a subset of AI that allows systems to learn from data patterns.
AI systems include expert systems, natural language processing, speech recognition phrases.
Artificial Intelligence (AI) is the simulation of human intelligence processes by machines.
