## load data

In [1]:
from datasets import load_dataset
dataset = load_dataset('squad')

In [2]:
contexts = [item['context'] for item in dataset['train']]
u_contexts = list(set(contexts))
truncated_contexts = u_contexts[:12800]

In [3]:
# utility functions
def chunky_iterate(data, chunk_size):
    for i in range(0, len(data), chunk_size):
        yield data[i: i + chunk_size]


## embeddings

In [4]:
from llama_index.embeddings.huggingface import HuggingFaceEmbedding


"""
    optimizations:
        * use a cache folder to cache the model
        * generate embeddings in a batch at a time.
"""
class Embedder:
    def __init__(self, model_name = 'nomic-ai/nomic-embed-text-v1.5',
                batch_size = 32):
        self.batch_size = batch_size
        self.model_name = model_name
        self.embed_model = self._load_embed_model()

    def _load_embed_model(self):
        return HuggingFaceEmbedding(model_name= self.model_name,
            trust_remote_code= True, cache_folder= './hf_cache')

    
    def generate_batch_embeddings(self, context_batch):
        return self.embed_model.get_text_embedding_batch(context_batch)
    

    def generate_embeddings(self, contexts):
        embeddings = []
        for batch_context in chunky_iterate(contexts, self.batch_size):
            embeddings.extend(self.generate_batch_embeddings(batch_context))
        return embeddings

    def generate_query_embeddings(self, query):
        return self.embed_model.get_query_embedding(query)

## vector db

In [5]:
from qdrant_client import QdrantClient, models
"""
    Optimizations:
        * use grpc
        * collection on disk for larger dataset
        * upload collection batches upload automatically.
        * set indexing to false / 0 when uploading large data for the first time
        
"""
class QdrantVectorDB:
    def __init__(self, collection_name = "contexts_collection",
                 vector_dim = 768, batch_size = 512,
                 host = 'localhost', port = 6333):
        
        self.batch_size = batch_size
        self.collection_name = collection_name
        self.client = self._setup_client(host, port)
        self._setup_collection(vector_dim)

    def _setup_client(self, host, port):
        return QdrantClient(host= host, port= port, prefer_grpc=True) 

    def _setup_collection(self, vector_dim):
        if not self.client.collection_exists(collection_name= self.collection_name):
            self.client.create_collection(
                collection_name= self.collection_name,
                vectors_config= models.VectorParams(size = vector_dim,
                    distance = models.Distance.DOT, on_disk=True
                ),
                optimizers_config=models.OptimizersConfigDiff(
                    default_segment_number = 5, indexing_threshold=0)
            ),
            
    def ingest_data(self, embeddings, contexts):
             
        self.client.upload_collection(collection_name= self.collection_name,
            vectors= embeddings, batch_size= self.batch_size,
            payload=[{"context": context} for context in contexts]
        )
        
        self.client.update_collection(collection_name=self.collection_name,
            optimizers_config=models.OptimizersConfigDiff(indexing_threshold=20000))
            
    def query_collection(self, query_embeddings):
        result = self.client.query_points(
            collection_name = self.collection_name,
            query= query_embeddings,
            search_params = models.SearchParams(
                    quantization = models.QuantizationSearchParams(
                        ignore = False,
                        rescore = True,
                        oversampling = 2.0
                    )
            ),
            timeout = 1000
        )
        return result
        

## retriever

In [6]:
import time

class Retriever:
    def __init__(self, vectordb, embedder):
        self.vectordb = vectordb
        self.embedder = embedder

    def search(self, query):
        query_embeddings = self.embedder.generate_query_embeddings(query)
        start_time = time.time()

        result = self.vectordb.query_collection(query_embeddings)
        end_time = time.time()
        elapsed_time = end_time - start_time
        print(f'Execution of the query took : {elapsed_time} seconds')
        return result

        

## Rag class

In [7]:
from llama_index.llms.ollama import Ollama

class RAG:
    def __init__(self, retriever, llm_model = 'llama3.2:1b'):
        self.retriever = retriever
        self.llm = self._setup_llm(llm_model)
        self.qa_prompt_template = self._setup_prompt_template()

    def _setup_llm(self, llm_model):
        return Ollama(model=llm_model)

    def _setup_prompt_template(self):
        return """
            Context information is below
            ----------------------------
            {context}
            ----------------------------

            Given the context information above i want you 
            to think step by step to answer the query in a 
            crisp manner, in case you don't know the answer,
            say 'I don't know'

            ------------------------------
            Query: {query}
            ------------------------------
            Answer:
            """
    def generate_context(self, query):
        result = self.retriever.search(query)
        contexts = [item.payload['context'] for item in result.points]
        return '\n\n---\n\n'.join(contexts)
       

    def query(self, query):
        contexts = self.generate_context(query=query)
        prompt = self.qa_prompt_template.format(context=contexts, query=query)
        response = self.llm.complete(prompt)
        return dict(response)['text']

In [8]:
embedder = Embedder()
embeddings = embedder.generate_embeddings(truncated_contexts)

vectordb = QdrantVectorDB()
vectordb.ingest_data(embeddings=embeddings, contexts=truncated_contexts)

retriever = Retriever(vectordb=vectordb, embedder=embedder)

rag = RAG(retriever)

<All keys matched successfully>


In [11]:
query = "The premium and VIP services in Airports are reserved for which type of passengers?"
result = rag.query(query)

Execution of the query took : 0.010528802871704102 seconds


In [12]:
from IPython.display import display, Markdown
display(Markdown(result))

To determine which type of passenger is eligible for premium and VIP services at airports, we need to consider the following steps:

1. Identify the definition of premium and VIP services.
2. Analyze the context where these services are mentioned (e.g., in the provided text).
3. Determine which passengers or groups are typically considered as such.

Based on the given information, we can infer that:

- Premium and VIP services often require a high level of customer service or luxury experience. This suggests that they might be more suitable for business travelers, first-class passengers, or those who value exceptional comfort and convenience.
- Business class passengers are typically willing to pay a premium for their travel experience, indicating that these individuals may be more likely to benefit from the services offered.

Considering these points, it can be inferred that:

- First-class passengers at airports would likely be eligible for premium and VIP services. They often pay a higher fare than business class passengers and may expect a more luxurious experience during their flight.
- Business class passengers, who are typically willing to pay a premium for their travel experience, might also benefit from the available premium and VIP services.

Therefore, based on this analysis, I would say that:

I don't know