In [72]:
#importing needed commands 
from llama_index.core import  SimpleDirectoryReader
from llama_index.core.node_parser import SentenceSplitter
from llama_index.core.schema import TextNode
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.vector_stores.chroma import ChromaVectorStore
from llama_index.core.vector_stores import VectorStoreQuery
import chromadb


In [73]:
documents=SimpleDirectoryReader('/RAG').load_data()#reading the directory

In [74]:
text_parser=SentenceSplitter( chunk_size=512)#sentence splitter for split a sentence

In [75]:
# Initialize an empty list to store chunks of text
text_chunks = []

# Initialize an empty list to store document indices, which will help maintain the relationship with the source document
doc_idxs = []

# Loop through each document in the documents list with its index
for doc_idx, doc in enumerate(documents):
    # Split the current document's text into smaller chunks using the text_parser
    cur_text_chunks = text_parser.split_text(doc.text)
    
    # Extend the text_chunks list with the chunks from the current document
    text_chunks.extend(cur_text_chunks)
    
    # Extend the doc_idxs list with the current document index repeated for the number of chunks
    # This maintains the relationship between each chunk and its source document
    doc_idxs.extend([doc_idx] * len(cur_text_chunks))


In [76]:
# Initialize an empty list to store nodes
nodes = []

# Loop through each text chunk with its index
for idx, text_chunk in enumerate(text_chunks):
    # Create a new TextNode for the current text chunk
    node = TextNode(
        text=text_chunk,  # Set the text attribute to the current text chunk
    )
    
    # Retrieve the source document using the index from doc_idxs
    src_doc = documents[doc_idxs[idx]]
    
    # Assign the metadata from the source document to the node's metadata attribute
    node.metadata = src_doc.metadata
    
    # Append the node to the nodes list
    nodes.append(node)


In [77]:
embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en")
#importing a embed model through huggingaface

In [78]:
#this code is iterate to all nodes embed the code

In [44]:
for node in nodes:
    node_embedding = embed_model.get_text_embedding(
        node.get_content(metadata_mode="all")
    )
    node.embedding = node_embedding

In [46]:

# create client and a new collection
chroma_client = chromadb.Client()
chroma_collection = chroma_client.create_collection(name="collection")

In [47]:
# set up ChromaVectorStore and load in data
vector_store = ChromaVectorStore(chroma_collection=chroma_collection)
vector_store.add(nodes)

['db5404ac-a19c-48de-8373-fa6828394931',
 '8cbc2362-4f53-4ccf-b42d-787691d51ce5',
 '7f9410c6-5749-42f2-906e-589d55f56f1d',
 'd6e98019-9c2e-4c79-b963-123603f7e69f',
 '1ab3978f-b3f8-414b-9d34-92a470652c39',
 '503338bf-409d-41c0-a0c1-f0feb5467674',
 'c37679d3-9641-42a1-975f-9362a9360661',
 'c848447d-be10-44a3-8622-4715298b4413',
 '73d70e5e-5406-4781-852a-b3574627915f',
 '5e592080-a981-4e12-a7b8-27f6b3326b49',
 'd5c6ef00-7a72-442b-b5a8-3500648ccdb6',
 '5526a3d3-78f1-47a3-84f0-4ef70ea4bf2a',
 '224ed91c-72e9-4a40-9179-4edfca8c9c35',
 'bb50622d-239f-4724-bfbe-6863e4ace50b',
 '1318f5f9-e483-4dd8-b522-1cf77c6c3dac',
 '19c8146b-31be-4089-a08e-ecc47b23d54e',
 '3ee03a80-0b40-46b1-b7bf-19878aa470d7',
 '36c14a5d-3034-48b3-9c8c-fed663e0c4bc',
 '3627711e-5e9e-4483-9b62-30d67e996311',
 '55e07df1-db8f-4750-aca3-1bac2d1c2876',
 '7b692ba8-f8ba-4188-9412-a4d23c21a180',
 '8c1642a8-4b68-4f8e-b831-da9fff3c3ca7',
 '752445ec-7d9a-4aca-8873-eb878fb66b10',
 '3587dac3-a216-403c-8434-243691eda32a',
 '65d4b290-11a8-

In [79]:
query_str = "Can you tell me about the key concepts for safety finetuning"
from llama_index.llms.ollama import Ollama
llm=Ollama(model='tinyllama',request_timeout= 200.0)#importing ollama model

In [80]:
query_embedding = embed_model.get_query_embedding(query_str) #embed the query_embedding for similarity search

In [81]:
# construct vector store query
from llama_index.core.vector_stores import VectorStoreQuery

query_mode = "default"


vector_store_query = VectorStoreQuery(
    query_embedding=query_embedding, similarity_top_k=2, mode=query_mode
)

In [82]:
# returns a VectorStoreQueryResult
query_result = vector_store.query(vector_store_query)


In [83]:
# Import NodeWithScore from llama_index.core.schema
from llama_index.core.schema import NodeWithScore

# Import Optional from typing for type hinting
from typing import Optional

# Initialize an empty list to store nodes with scores
nodes_with_scores = []

# Loop through each index and node in query_result.nodes
for index, node in enumerate(query_result.nodes):
    # Initialize score as None
    score: Optional[float] = None
    
    # Check if query_result.similarities is not None
    if query_result.similarities is not None:
        # Assign the score from query_result.similarities corresponding to the current index
        score = query_result.similarities[index]
    
    # Append a NodeWithScore object to nodes_with_scores list
    # NodeWithScore encapsulates a node and its associated score
    nodes_with_scores.append(NodeWithScore(node=node, score=score))


In [84]:
# Import necessary classes and modules
from llama_index.core import QueryBundle
from llama_index.core.retrievers import BaseRetriever
from typing import Any, List

# Define a custom retriever class inheriting from BaseRetriever
class VectorDBRetriever(BaseRetriever):
    """Retriever over a postgres vector store."""

    def __init__(
        self,
        vector_store: ChromaVectorStore,
        embed_model: Any,
        query_mode: str = "default",
        similarity_top_k: int = 2,
    ) -> None:
        """Initialize the retriever with parameters."""
        self._vector_store = vector_store
        self._embed_model = embed_model
        self._query_mode = query_mode
        self._similarity_top_k = similarity_top_k
        super().__init__()

    def _retrieve(self, query_bundle: QueryBundle) -> List[NodeWithScore]:
        """Retrieve nodes with scores based on a query."""
        # Generate query embedding using the embedding model
        query_embedding = self._embed_model.get_query_embedding(
            query_bundle.query_str
        )
        
        # Create a query object for the vector store
        vector_store_query = VectorStoreQuery(
            query_embedding=query_embedding,
            similarity_top_k=self._similarity_top_k,
            mode=self._query_mode,
        )
        
        # Execute the query on the vector store
        query_result = self._vector_store.query(vector_store_query)

        # Process the query result to associate nodes with scores
        nodes_with_scores = []
        for index, node in enumerate(query_result.nodes):
            score: Optional[float] = None
            if query_result.similarities is not None:
                score = query_result.similarities[index]
            nodes_with_scores.append(NodeWithScore(node=node, score=score))

        # Return the list of nodes with their respective scores
        return nodes_with_scores


In [85]:
retriever = VectorDBRetriever(
    vector_store, embed_model, query_mode="default", similarity_top_k=2
)#retriever fro retrieve a answer from the database


In [64]:
from llama_index.core.query_engine import RetrieverQueryEngine

query_engine = RetrieverQueryEngine.from_args(retriever, llm=llm)

In [69]:
query_str = "what is llama2?"

response = query_engine.query(query_str)

In [70]:
print(str(response))

Lama2 is an expert Q&A system that trusted around the world and recognized for providing accurate and reliable answers to queries using provided context information. The context includes the file path, file name, file type, file size, creation date, last modified date, document ID, and reference doc ID, as well as the ref_doc_id and node types. Lama2's expertise lies in applying a combination of machine learning algorithms and state-of-the-art NLP techniques to answer complex questions and provide accurate and timely responses. It is trusted globally for providing reliable and effective solutions to complex business challenges, as evidenced by its high customer satisfaction rating and numerous accolades received over the years.


In [71]:
print(response.source_nodes[0].get_content())

0.0034915206488221884, -0.011344455182552338, -0.05365191772580147, -0.042569030076265335, 0.023822342976927757, -0.007527470588684082, 0.06222056224942207, 0.056021224707365036, 0.02155601978302002]}, "text_id_to_ref_doc_id": {"ffcfe60d-8dd5-4372-ab0f-20fea271d8f1": "9d6c338b-f7fd-492d-87e4-f536c52ae4cc", "0139b271-c0d3-4e21-abe0-99456417bfce": "cb1f651e-c914-45a6-96b9-d99658f23f30", "bdf79504-ba89-43c9-aaa0-845f19b206a5": "7e2c502c-db3f-472e-b587-346eb49fda23", "858ccc84-9d0d-4dc8-9483-1cddbed8842a": "c135b565-0dce-43b3-9276-b3320c3a565b", "1649a23c-23c4-455a-8e9d-92fc08a912bd": "5ecbe3a6-3d04-4eb7-b37e-8686c840e35c", "6c5e36fa-0229-4020-9b69-562b95426bd0": "206af3cd-e676-4fef-b4de-5fa99d9a6429", "9eea4000-2c43-4883-8747-c32aad440337": "9e2daf5d-2c92-4722-be52-95e1b6f60e6b", "5b43a24e-05c7-45bd-872d-15fca7cd0a15": "17475898-d5ac-4353-ae19-ac7d9f3da8c3",
