In [30]:
import nest_asyncio

In [31]:
nest_asyncio.apply()

In [32]:
import qdrant_client


collection_name="demo1"

client = qdrant_client.QdrantClient(
    host="localhost",
    port=6333
)

In [33]:
from llama_index.core import SimpleDirectoryReader

input_dir_path = './docs'

loader = SimpleDirectoryReader(
            input_dir = input_dir_path,
            required_exts=[".pdf"],
            recursive=True
        )
docs = loader.load_data()

In [34]:
print(docs[0])

Doc ID: 462e4b62-2cd3-4173-b2ac-0836bca3f270
Text: Scenario  1:  I  received  an  email  stating  that  I  won  a
lottery.  I  am  being  asked  to  provide   documents.  Remediation:
Do  not  respond  to  the  email  or  share  any  documents.  This  is
a  classic  lottery   scam   attempting   to   steal   your
information.  Points  of  contact:  Cyber  Crime  Portal
(cybercrime.gov.in) ...


In [41]:
from llama_index.core.node_parser import SimpleNodeParser
from llama_index.core.text_splitter import SentenceSplitter

node_parser = SimpleNodeParser.from_defaults(
    chunk_size=512,
    chunk_overlap=128
)

def create_index(documents):
    service_context = ServiceContext.from_defaults(node_parser=node_parser)
    vector_store = QdrantVectorStore(client=client, collection_name=collection_name)
    storage_context = StorageContext.from_defaults(vector_store=vector_store)
    index = VectorStoreIndex.from_documents(
        documents,
        storage_context=storage_context,
        service_context=service_context
    )
    return index

In [43]:
from llama_index.core import Settings, VectorStoreIndex
from llama_index.embeddings.huggingface import HuggingFaceEmbedding

embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-large-en-v1.5",
                                  trust_remote_code=True)
Settings.embed_model = embed_model
Settings.node_parser = node_parser

def create_index(documents):
    vector_store = QdrantVectorStore(client=client, collection_name=collection_name)
    storage_context = StorageContext.from_defaults(vector_store=vector_store)
    index = VectorStoreIndex.from_documents(documents,
                                          storage_context=storage_context)
    return index

index = create_index(docs)

In [44]:
from llama_index.llms.ollama import Ollama

llm = Ollama(model="llama3.2:1b", request_timeout=120.0)

Settings.llm = llm

In [57]:
from llama_index.core import PromptTemplate

template = """Context information is below:
              ---------------------
              {context_str}
              ---------------------
              Based on the context above, analyze the query and provide the response in the following format:
              
              Scenario: [Describe the situation from matching context]
              Remediation: [Provide specific prevention/remediation steps]
              Points of contact: [List relevant contact information/helplines]
              
              If no relevant information is found in the context, respond with "No matching scenario found."
              
              Query: {query_str}
              
              Response:"""

qa_prompt_tmpl = PromptTemplate(template)

# Update query engine with new template
query_engine.update_prompts(
    {"response_synthesizer:text_qa_template": qa_prompt_tmpl}
)

In [46]:
from llama_index.core.postprocessor import SentenceTransformerRerank

rerank = SentenceTransformerRerank(
    model="cross-encoder/ms-marco-MiniLM-L-2-v2", 
    top_n=3
)

In [86]:
from llama_index.core import get_response_synthesizer

query_engine = index.as_query_engine(
    similarity_top_k=10,
    node_postprocessors=[rerank]
)

def print_source_nodes(response):
    source_nodes = response.source_nodes
    print("\nRetrieved chunks:")
    for i, node in enumerate(source_nodes):
        print(f"\nChunk {i+1}:")
        print(node.text)

response = query_engine.query("Received job offer requiring payment for registration/training. What are the points of Contact?")
# print_source_nodes(response)

In [87]:
from IPython.display import Markdown, display

display(Markdown(str(response)))

Points of contact for receiving a job offer requiring payment for registration/training are:

1. Ministry of Labour & Employment portal (labour.gov.in)
2. Report to Consumer Affairs helpline (1915) or Cyber Crime Helpline (1930)