In [2]:
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from tqdm import tqdm 

  from .autonotebook import tqdm as notebook_tqdm


In [47]:
from datasets import load_dataset

# Step 1: Load the SQuAD dataset
dataset = load_dataset("squad")

# Step 2: Extract unique contexts from the dataset
data = [item["context"] for item in dataset["train"]]
texts = list(set(data))


In [None]:
len(texts)


In [4]:
def batch_iterate(lst, batch_size):
    for i in range(0, len(lst), batch_size):
        yield lst[i : i + batch_size]

In [3]:
class EmbedData:

    def __init__(self, 
                 embed_model_name="nomic-ai/nomic-embed-text-v1.5",
                 batch_size=32):
        
        self.embed_model_name = embed_model_name
        self.embed_model = self._load_embed_model()
        self.batch_size = batch_size
        self.embeddings = []
        
        
    def _load_embed_model(self):
        embed_model = HuggingFaceEmbedding(model_name=self.embed_model_name,
                                           trust_remote_code=True,
                                           cache_folder='./hf_cache')
        return embed_model
    
    
    def generate_embedding(self, context):
        return self.embed_model.get_text_embedding_batch(context)
    
    def embed(self, contexts):
        self.contexts = contexts
        
        for batch_context in tqdm(batch_iterate(contexts, self.batch_size),
                                  total=len(contexts)//self.batch_size,
                                  desc="Embedding data in batches"):
                                  
            batch_embeddings = self.generate_embedding(batch_context)
            
            self.embeddings.extend(batch_embeddings)

In [None]:
batch_size = 32

embeddata = EmbedData(batch_size=batch_size)

embeddata.embed(texts)

# Above code is for generating embedding for given input file, but it would take much time, so Below I have directly used pickle file for getting embedding 

In [5]:
import pickle
batch_size = 32
embeddata = EmbedData(batch_size=batch_size)

with open('embeddings_and_contexts.pkl', 'rb') as f:
    a, b = pickle.load(f)

embeddata.embeddings = a[:]
embeddata.contexts = b[:]

!!!!!!!!!!!!megablocks not available, using torch.matmul instead
<All keys matched successfully>


# Vector database class

In [16]:
from qdrant_client import models
from qdrant_client import QdrantClient

class QdrantVDB:

    def __init__(self, collection_name, vector_dim=768, batch_size=512):
        self.collection_name = collection_name
        self.batch_size = batch_size
        self.vector_dim = vector_dim
        
    def define_client(self):
        self.client = QdrantClient(url="http://localhost:6333",
                                   prefer_grpc=True)
        
        
    def create_collection(self):
        
        if not self.client.collection_exists(collection_name=self.collection_name):

            self.client.create_collection(collection_name=self.collection_name,
                                          
                                          vectors_config=models.VectorParams(
                                                              size=self.vector_dim,
                                                              distance=models.Distance.DOT,
                                                              on_disk=True),
                                          
                                          optimizers_config=models.OptimizersConfigDiff(
                                                                            default_segment_number=5,
                                                                            indexing_threshold=0)
                                         )
            
    def ingest_data(self, embeddata):
        for batch_context, batch_embeddings in tqdm(zip(batch_iterate(embeddata.contexts, self.batch_size), 
                                                        batch_iterate(embeddata.embeddings, self.batch_size)), 
                                                    total=len(embeddata.contexts)//self.batch_size, 
                                                    desc="Ingesting in batches"):

            self.client.upload_collection(collection_name=self.collection_name,
                                          vectors=batch_embeddings,
                                          payload=[{"context": context} for context in batch_context])

        self.client.update_collection(collection_name=self.collection_name,
                                      optimizer_config=models.OptimizersConfigDiff(indexing_threshold=20000)
                                     )
            
    

In [18]:
database = QdrantVDB("squad_collection")
database.define_client()
database.create_collection()
database.ingest_data(embeddata)

Ingesting in batches: 37it [00:21,  1.70it/s]                                                                          


# Retriever class 

In [24]:
import time 
class Retriever:

    def __init__(self, vector_db, embeddata):   
        self.vector_db = vector_db
        self.embeddata = embeddata
        
    def search(self, query):
        query_embedding = self.embeddata.embed_model.get_query_embedding(query)

        # Start the timer
        start_time = time.time()

        result = self.vector_db.client.search(
            collection_name=self.vector_db.collection_name,

            query_vector=query_embedding,

            search_params=models.SearchParams(
                quantization=models.QuantizationSearchParams(
                    ignore=True,
                    rescore=True,
                    oversampling=2.0,
                )
            ),

            timeout=1000,
        )

        # End the timer
        end_time = time.time()
        elapsed_time = end_time - start_time

        print(f"Execution time for the search: {elapsed_time:.4f} seconds")

        return result
        
    

In [28]:
Retriever(database, embeddata).search("sample_query")[0]

Execution time for the search: 0.1283 seconds


  result = self.vector_db.client.search(


ScoredPoint(id='1a5793a7-4ee6-4085-9ddb-3aad89b28d41', version=558, score=0.6062472462654114, payload={'context': 'Static analysis techniques for software verification can be applied also in the scenario of query languages. In particular, the *Abstract interpretation framework has been extended to the field of query languages for relational databases as a way to support sound approximation techniques. The semantics of query languages can be tuned according to suitable abstractions of the concrete domain of data. The abstraction of relational database system has many interesting applications, in particular, for security purposes, such as fine grained access control, watermarking, etc.'}, vector=None, shard_key=None, order_value=None)

# RAG class 

In [None]:
#pip install llama-index qdrant_client torch transformers

#pip install llama-index-embeddings-huggingface

#pip install llama-index-llms-ollama

#pip install llama-index-vector-stores-qdrant

In [29]:
from llama_index.llms.ollama import Ollama

class RAG:

    def __init__(self,
                 retriever,
                 llm_name="llama3.2:1b"):
        
        self.llm_name = llm_name
        self.llm = self._setup_llm()
        self.retriever = retriever
        self.qa_prompt_tmpl_str = """Context information is below.
                                     ---------------------
                                     {context}
                                     ---------------------
                                     
                                     Given the context information above I want you
                                     to think step by step to answer the query in a
                                     crisp manner, incase case you don't know the
                                     answer say 'I don't know!'
                                     
                                     ---------------------
                                     Query: {query}
                                     ---------------------
                                     Answer: """
        
        
    def _setup_llm(self):
        return Ollama(model=self.llm_name)
    
    
    def generate_context(self, query):
    
        result = self.retriever.search(query)
        context = [dict(data) for data in result]
        combined_prompt = []

        for entry in context:
            context = entry["payload"]["context"]

            combined_prompt.append(context)

        return "\n\n---\n\n".join(combined_prompt)
    
    def query(self, query):
        context = self.generate_context(query=query)
        
        prompt = self.qa_prompt_tmpl_str.format(context=context,
                                                query=query)
        
        response = self.llm.complete(prompt)
        
        return dict(response)['text']

In [30]:
retriever = Retriever(database, embeddata)

rag = RAG(retriever)

In [33]:
query = """The premium and VIP services in Airports
           are reserved for which type of passengers?"""

answer = rag.query(query)

  result = self.vector_db.client.search(


Execution time for the search: 0.0073 seconds


In [34]:
from IPython.display import Markdown, display

display(Markdown(str(answer)))

To determine the correct answer, let's break down the information provided:

1. Premium and VIP services are usually reserved for:
   - First class passengers
   - Business class passengers
   - Members of airline clubs

Given this information, we can conclude that the premium and VIP services in airports are typically reserved for **First and Business class passengers**.

If you're unsure or have any further questions, feel free to ask!

# Binary quantization 

Here We will only make change in quadrantdb and retriever class

In [None]:
class QdrantVDB:

    def __init__(self, collection_name, vector_dim=768, batch_size=512):
        self.collection_name = collection_name
        self.batch_size = batch_size
        self.vector_dim = vector_dim
        
    def define_client(self):
        self.client = QdrantClient(url="http://localhost:6333",
                                   prefer_grpc=True)
        
        
    def create_collection(self):
        
        if not self.client.collection_exists(collection_name=self.collection_name):

            self.client.create_collection(collection_name=self.collection_name,
                                          
                                          vectors_config=models.VectorParams(
                                                              size=self.vector_dim,
                                                              distance=models.Distance.DOT,
                                                              on_disk=True),
                                          
                                          optimizers_config=models.OptimizersConfigDiff(
                                                                            default_segment_number=5,
                                                                            indexing_threshold=0),
                                         quantization_config=models.BinaryQuantization(
                                                        binary=models.BinaryQuantizationConfig(always_ram=True)),
                                         )
            
    

In [35]:
class QdrantVDB:

    def __init__(self, collection_name, vector_dim=768, batch_size=512):
        self.collection_name = collection_name
        self.batch_size = batch_size
        self.vector_dim = vector_dim
        
    def define_client(self):
        self.client = QdrantClient(url="http://localhost:6333",
                                   prefer_grpc=True)
        
        
    def create_collection(self):
        
        if not self.client.collection_exists(collection_name=self.collection_name):

            self.client.create_collection(collection_name=self.collection_name,
                                          
                                          vectors_config=models.VectorParams(
                                                              size=self.vector_dim,
                                                              distance=models.Distance.DOT,
                                                              on_disk=True),
                                          
                                          optimizers_config=models.OptimizersConfigDiff(
                                                                            default_segment_number=5,
                                                                            indexing_threshold=0),
                                          quantization_config=models.BinaryQuantization(
                                                        binary=models.BinaryQuantizationConfig(always_ram=True)),
                                         )
            
    def ingest_data(self, embeddata):
        for batch_context, batch_embeddings in tqdm(zip(batch_iterate(embeddata.contexts, self.batch_size), 
                                                        batch_iterate(embeddata.embeddings, self.batch_size)), 
                                                    total=len(embeddata.contexts)//self.batch_size, 
                                                    desc="Ingesting in batches"):

            self.client.upload_collection(collection_name=self.collection_name,
                                          vectors=batch_embeddings,
                                          payload=[{"context": context} for context in batch_context])

        self.client.update_collection(collection_name=self.collection_name,
                                      optimizer_config=models.OptimizersConfigDiff(indexing_threshold=20000)
                                     )
            
    

In [36]:
class Retriever:

    def __init__(self, vector_db, embeddata):   
        self.vector_db = vector_db
        self.embeddata = embeddata
        
    def search(self, query):
        query_embedding = self.embeddata.embed_model.get_query_embedding(query)

        # Start the timer
        start_time = time.time()

        result = self.vector_db.client.search(
            collection_name=self.vector_db.collection_name,

            query_vector=query_embedding,

            search_params=models.SearchParams(
                quantization=models.QuantizationSearchParams(
                    ignore=False,
                    rescore=True,
                    oversampling=2.0,
                )
            ),

            timeout=1000,
        )

        # End the timer
        end_time = time.time()
        elapsed_time = end_time - start_time

        print(f"Execution time for the search: {elapsed_time:.4f} seconds")

        return result
        
    

In [41]:
database = QdrantVDB("squad_collection_qb")
database.define_client()
database.create_collection()
database.ingest_data(embeddata)
retriever = Retriever(database, embeddata)
rag = RAG(retriever)
query = """The premium and VIP services in Airports
           are reserved for which type of passengers?"""

answer = rag.query(query)

Ingesting in batches: 37it [00:20,  1.80it/s]                                                                          
  result = self.vector_db.client.search(


Execution time for the search: 0.1813 seconds


ReadTimeout: timed out

In [42]:
from IPython.display import Markdown, display

display(Markdown(str(answer)))

To determine the correct answer, let's break down the information provided:

1. Premium and VIP services are usually reserved for:
   - First class passengers
   - Business class passengers
   - Members of airline clubs

Given this information, we can conclude that the premium and VIP services in airports are typically reserved for **First and Business class passengers**.

If you're unsure or have any further questions, feel free to ask!