In [1]:
# Setting up async IO handling in notebook to prevent runtime errors with async operations
import nest_asyncio
nest_asyncio.apply()

In [2]:
# setting up vector database client

import qdrant_client

collection_name="demo2"

client = qdrant_client.QdrantClient(
    host="localhost",
    port=6333
)

In [3]:
# Loading Documents

from llama_index.core import SimpleDirectoryReader

input_dir_path = './'

loader = SimpleDirectoryReader(
            input_dir = input_dir_path,
            required_exts=[".pdf"],
            recursive=True
        )
docs = loader.load_data()

In [4]:
# visualizing document
docs[0]

Document(id_='2fc7403e-991a-4828-b8a3-b690d9e65cac', embedding=None, metadata={'page_label': '1', 'file_name': 'fraud_scenarios.pdf', 'file_path': 'D:\\Personal Folder\\Vijender Project\\cyber-fraud-detection-chatbot\\fraud_scenarios.pdf', 'file_type': 'application/pdf', 'file_size': 45982, 'creation_date': '2025-02-10', 'last_modified_date': '2025-02-10'}, excluded_embed_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], excluded_llm_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], relationships={}, metadata_template='{key}: {value}', metadata_separator='\n', text_resource=MediaResource(embeddings=None, data=None, text="Scenario  1:  I  received  an  email  stating  that  I  won  a  lottery.  I  am  being  asked  to  provide  \ndocuments.\n Remediation:  Do  not  respond  to  the  email  or  share  any  documents.  This  is  a  classic  lottery  \nscam\n 

In [5]:
"""Creating index using new documents"""

from llama_index.core.node_parser import SimpleNodeParser
from llama_index.core import Settings, VectorStoreIndex, StorageContext
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.vector_stores.qdrant import QdrantVectorStore

# initializing embeding model
embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-large-en-v1.5",
                                  trust_remote_code=True)

# initializing node_parser deciding how data chunks would be created
node_parser = SimpleNodeParser.from_defaults(
    chunk_size=512,
    chunk_overlap=128
)

# setting up embeding model and node_parser
Settings.embed_model = embed_model
Settings.node_parser = node_parser

# creating index for similarity search from documents
def create_index(documents):
    vector_store = QdrantVectorStore(client=client, collection_name=collection_name)
    storage_context = StorageContext.from_defaults(vector_store=vector_store)
    index = VectorStoreIndex.from_documents(documents,
                                          storage_context=storage_context,
                                           )
    
    return index
    
index = create_index(docs)

In [6]:
"""creating index using existing collection"""
"""
from llama_index.core import Settings, VectorStoreIndex
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.vector_stores.qdrant import QdrantVectorStore

# initializing embeding model
embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-large-en-v1.5",
                                  trust_remote_code=True)

# setting up embeding model and node_parser
Settings.embed_model = embed_model


# creating index for similarity search from existing collection

def index_from_vector_store(vector_store):
    
    
    index = VectorStoreIndex.from_vector_store(vector_store=vector_store
                                           )
    return index


vector_store = QdrantVectorStore(client=client, collection_name=collection_name)    
index = index_from_vector_store(vector_store)
"""

'\nfrom llama_index.core import Settings, VectorStoreIndex\nfrom llama_index.embeddings.huggingface import HuggingFaceEmbedding\nfrom llama_index.vector_stores.qdrant import QdrantVectorStore\n\n# initializing embeding model\nembed_model = HuggingFaceEmbedding(model_name="BAAI/bge-large-en-v1.5",\n                                  trust_remote_code=True)\n\n# setting up embeding model and node_parser\nSettings.embed_model = embed_model\n\n\n# creating index for similarity search from existing collection\n\ndef index_from_vector_store(vector_store):\n    \n    \n    index = VectorStoreIndex.from_vector_store(vector_store=vector_store\n                                           )\n    return index\n\n\nvector_store = QdrantVectorStore(client=client, collection_name=collection_name)    \nindex = index_from_vector_store(vector_store)\n'

In [7]:
from llama_index.llms.openai import OpenAI
import os

# Set your OpenAI API key
os.environ["OPENAI_API_KEY"] = "YOUR_OPENAI_KEY"  # Replace with your actual OpenAI API key

# Initialize LLM
llm = OpenAI(
    model="gpt-3.5-turbo",  # OpenAI model
    temperature=0.7,
    max_tokens=512
)
# Update the global settings
Settings.llm = llm

In [8]:
# setting up query engine

# from llama_index.core import get_response_synthesizer
from llama_index.core.postprocessor import SentenceTransformerRerank

# reranking initially retrieved chunks/vectors based on relevancy
rerank = SentenceTransformerRerank(
    model="cross-encoder/ms-marco-MiniLM-L-2-v2", 
    top_n=3
)
# retrieving top_k similar chunks/vectors
query_engine = index.as_query_engine(
    similarity_top_k=10,
    node_postprocessors=[rerank]
)

In [9]:
# creating prompt template for context and query

from llama_index.core import PromptTemplate

template = """Context information is below:
              ---------------------
              {context_str}
              ---------------------
              Based on the context above, analyze the query and provide the response in the following format:
              
              Scenario: [Describe the situation from matching context]
              Remediation: [Provide specific prevention/remediation steps]
              Points of contact: [List relevant contact information/helplines]
              
              If no relevant information is found in the context, respond with "No matching scenario found."
              
              Query: {query_str}
              
              Response:"""

qa_prompt_tmpl = PromptTemplate(template)

# Update query engine with new template
query_engine.update_prompts(
    {"response_synthesizer:text_qa_template": qa_prompt_tmpl}
)

In [10]:
# debugging time taken to process user query
# if RateLimitError occurs, you may need to buy credits for using API

import time
from llama_index.core import get_response_synthesizer
from IPython.display import Markdown, display

class RAGDebugger:
    def __init__(self, query_engine):
        self.query_engine = query_engine
        self.timings = {}
    
    def _time_vector_search(self, query):
        """Time the vector search operation"""
        start = time.time()
        # Access the internal retriever to get timing for vector search
        retrieved_nodes = self.query_engine._retriever.retrieve(query)
        end = time.time()
        self.timings['vector_search'] = end - start
        return retrieved_nodes

    def _time_llm_response(self, query):
        """Time the LLM (OpenAI) response"""
        start = time.time()
        response = self.query_engine.query(query)
        end = time.time()
        self.timings['llm_response'] = end - start
        return response

    def print_debug_info(self, user_query):
        """
        Print debug information including timings for each step
        """
        print("\n" + "="*50)
        print("RAG DEBUG INFORMATION")
        print("="*50)

        # Time the entire process
        total_start = time.time()

        print("\nUser Query:")
        print("-"*50)
        print(user_query)

        # Time vector search
        print("\nRetrieving chunks from Vector DB...")
        retrieved_nodes = self._time_vector_search(user_query)
        print(f"Time taken for Vector Search: {self.timings['vector_search']:.3f} seconds")

        print("\nContext being sent to OpenAI:")
        print("-"*50)
        for i, node in enumerate(retrieved_nodes):
            print(f"\nChunk {i+1}:")
            print(f"{node.node.text.strip()}")
            print("\n" + "-"*30)

        # Time LLM response
        print("\nGetting response from OpenAI...")
        response = self._time_llm_response(user_query)
        print(f"Time taken for OpenAI Response: {self.timings['llm_response']:.3f} seconds")

        # Calculate total time
        total_time = time.time() - total_start
        self.timings['total'] = total_time

        print("\nFinal Response:")
        print("-"*50)
        print(str(response))

        print("\nTiming Summary:")
        print("-"*50)
        print(f"Vector Search Time: {self.timings['vector_search']:.3f} seconds")
        print(f"OpenAI Response Time: {self.timings['llm_response']:.3f} seconds")
        print(f"Total Time: {self.timings['total']:.3f} seconds")

        return response

# Example usage:
debugger = RAGDebugger(query_engine)
user_query = "Got an email regarding an investment scheme promising 50% returns in 3 months via WhatsApp"
response = debugger.print_debug_info(user_query)


RAG DEBUG INFORMATION

User Query:
--------------------------------------------------
Got an email regarding an investment scheme promising 50% returns in 3 months via WhatsApp

Retrieving chunks from Vector DB...
Time taken for Vector Search: 0.603 seconds

Context being sent to OpenAI:
--------------------------------------------------

Chunk 1:
Scenario  1:  I  received  an  email  stating  that  I  won  a  lottery.  I  am  being  asked  to  provide  
documents.
 Remediation:  Do  not  respond  to  the  email  or  share  any  documents.  This  is  a  classic  lottery  
scam
 
attempting
 
to
 
steal
 
your
 
information.
 Points  of  contact:  Cyber  Crime  Portal  (cybercrime.gov.in)  or  call  National  Cybercrime  Helpline  
1930
  Scenario  2:  Someone  called  claiming  to  be  from  my  bank  requesting  my  OTP  to  update  KYC.  Remediation:  Banks  never  ask  for  OTP  over  phone.  Never  share  OTP/PIN/CVV  with  anyone.  Points  of  contact:  File  complaint  with  loc

Retrying llama_index.llms.openai.base.OpenAI._chat in 1.0 seconds as it raised RateLimitError: Error code: 429 - {'error': {'message': 'You exceeded your current quota, please check your plan and billing details. For more information on this error, read the docs: https://platform.openai.com/docs/guides/error-codes/api-errors.', 'type': 'insufficient_quota', 'param': None, 'code': 'insufficient_quota'}}.
Retrying llama_index.llms.openai.base.OpenAI._chat in 1.6192885658252698 seconds as it raised RateLimitError: Error code: 429 - {'error': {'message': 'You exceeded your current quota, please check your plan and billing details. For more information on this error, read the docs: https://platform.openai.com/docs/guides/error-codes/api-errors.', 'type': 'insufficient_quota', 'param': None, 'code': 'insufficient_quota'}}.


RateLimitError: Error code: 429 - {'error': {'message': 'You exceeded your current quota, please check your plan and billing details. For more information on this error, read the docs: https://platform.openai.com/docs/guides/error-codes/api-errors.', 'type': 'insufficient_quota', 'param': None, 'code': 'insufficient_quota'}}