In [1]:
import nest_asyncio

nest_asyncio.apply()

In [2]:
from llama_index.core import SimpleDirectoryReader
reader= SimpleDirectoryReader(input_files=["../data/master-direction-rbi/Digital Payment Security Controls.pdf"])
docs= reader.load_data()

In [3]:
print(docs[0].metadata)

{'page_label': '1', 'file_name': 'Digital Payment Security Controls.pdf', 'file_path': '../data/master-direction-rbi/Digital Payment Security Controls.pdf', 'file_type': 'application/pdf', 'file_size': 267719, 'creation_date': '2025-01-17', 'last_modified_date': '2024-06-06'}


In [4]:
#Build RAG Pipeline, Define Structured Output Schema
from llama_index.core import VectorStoreIndex, StorageContext, load_index_from_storage
from llama_index.embeddings.ollama import OllamaEmbedding
from llama_index.core import Settings
from llama_index.llms.mistralai import MistralAI
import os
from dotenv import load_dotenv

load_dotenv()
api_key=os.getenv("MISTRAL_API_KEY")

llm=MistralAI(api_key=api_key)
embed_model=OllamaEmbedding(model_name="mxbai-embed-large")
Settings.llm=llm
Settings.embed_model=embed_model

In [5]:
#Building vectore store index
index_directory='../data/master-direction-rbi/index'
if not os.path.exists(index_directory+'/md-rbi-structured'):
    index=VectorStoreIndex(docs)
    index.storage_context.persist(persist_dir=index_directory+'/md-rbi-structured')
else:
    index= load_index_from_storage(StorageContext.from_defaults(persist_dir=index_directory+'/md-rbi-structured'))

In [6]:
from llama_index.postprocessor.flag_embedding_reranker import FlagEmbeddingReranker

reranker = FlagEmbeddingReranker(
    top_n=5,
    model="BAAI/bge-reranker-large"
)

In [7]:
from pydantic import BaseModel, Field
from typing import List

class Output(BaseModel):
    """Output containing the response, page numbers, and confidence."""

    response: str= Field(..., description="The answer to the question.")
    page_numbers: List[int]= Field(..., description="The page numbers of the sources used to answer this question. Do not include a page number if the context is irrelevant.")
    confidence: float= Field(..., description="Confidence value between 0-1 of the correctness of the result.")
    confidence_explanation: str= Field(..., description="Explanation for the confidence score.")

sllm=llm.as_structured_llm(output_cls=Output)

In [8]:
#Run queries
query_engine= index.as_query_engine(
    similarity_top_k= 5,
    node_postprocessors=[reranker],
    llm=sllm,
    response_mode="tree_summarize"
)

In [9]:
response=query_engine.query("What controls are suggested for card payments security?")
print(str(response))

You're using a XLMRobertaTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


{"response":"Several controls are suggested for card payments security:\n\n1. **Adherence to Payment Card Industry (PCI) Standards**: This includes following various payment card standards such as PCI-PIN, PCI-PTS, PCI-HSM, and PCI-P2PE to ensure comprehensive payment card security.\n\n2. **Terminal Validation**: Ensuring that terminals installed at merchants for capturing card details are validated against the PCI-P2PE program and that PoS terminals with PIN entry are approved by the PCI-PTS program.\n\n3. **Securing Card Payment Infrastructure**: Acquirers should secure their card payment infrastructure using methods like Unique Key Per Terminal (UKPT), Derived Unique Key Per Transaction (DUKPT), or Terminal Line Encryption (TLE).\n\n4. **Hardware Security Module (HSM) Controls**: Implementing specific security controls at HSM, including logging, access control, and secure backups.\n\n5. **Staff Training**: Educating staff, especially those in fraud control, about frauds and training

In [10]:
import json
import pandas as pd
raw_json=json.loads(str(response))
df=pd.DataFrame([raw_json])

In [11]:
print('#Response: '+raw_json['response'])
print('#Page Source: '+str(raw_json['page_numbers']))
print('#Confidence: '+str(raw_json['confidence']))
print('#Explanation: '+raw_json['confidence_explanation'])

#Response: Several controls are suggested for card payments security:

1. **Adherence to Payment Card Industry (PCI) Standards**: This includes following various payment card standards such as PCI-PIN, PCI-PTS, PCI-HSM, and PCI-P2PE to ensure comprehensive payment card security.

2. **Terminal Validation**: Ensuring that terminals installed at merchants for capturing card details are validated against the PCI-P2PE program and that PoS terminals with PIN entry are approved by the PCI-PTS program.

3. **Securing Card Payment Infrastructure**: Acquirers should secure their card payment infrastructure using methods like Unique Key Per Terminal (UKPT), Derived Unique Key Per Transaction (DUKPT), or Terminal Line Encryption (TLE).

4. **Hardware Security Module (HSM) Controls**: Implementing specific security controls at HSM, including logging, access control, and secure backups.

5. **Staff Training**: Educating staff, especially those in fraud control, about frauds and training them in rel

In [12]:
df['page_numbers']=df['page_numbers'].apply(lambda x: ', '.join(map(str, x)))
df

Unnamed: 0,response,page_numbers,confidence,confidence_explanation
0,Several controls are suggested for card paymen...,"18, 12",0.95,The response is derived directly from the prov...


In [13]:
from IPython.display import Markdown
display(Markdown('## Response: \n'+raw_json['response']))

## Response: 
Several controls are suggested for card payments security:

1. **Adherence to Payment Card Industry (PCI) Standards**: This includes following various payment card standards such as PCI-PIN, PCI-PTS, PCI-HSM, and PCI-P2PE to ensure comprehensive payment card security.

2. **Terminal Validation**: Ensuring that terminals installed at merchants for capturing card details are validated against the PCI-P2PE program and that PoS terminals with PIN entry are approved by the PCI-PTS program.

3. **Securing Card Payment Infrastructure**: Acquirers should secure their card payment infrastructure using methods like Unique Key Per Terminal (UKPT), Derived Unique Key Per Transaction (DUKPT), or Terminal Line Encryption (TLE).

4. **Hardware Security Module (HSM) Controls**: Implementing specific security controls at HSM, including logging, access control, and secure backups.

5. **Staff Training**: Educating staff, especially those in fraud control, about frauds and training them in relevant skills and areas of expertise.

In [14]:
display(Markdown('### Response:\n '+raw_json['response']))
display(Markdown('**Page Source:** '+str(raw_json['page_numbers'])))
display(Markdown('**Confidence:** '+str(raw_json['confidence'])))
display(Markdown('**Explanation**: '+raw_json['confidence_explanation']))

### Response:
 Several controls are suggested for card payments security:

1. **Adherence to Payment Card Industry (PCI) Standards**: This includes following various payment card standards such as PCI-PIN, PCI-PTS, PCI-HSM, and PCI-P2PE to ensure comprehensive payment card security.

2. **Terminal Validation**: Ensuring that terminals installed at merchants for capturing card details are validated against the PCI-P2PE program and that PoS terminals with PIN entry are approved by the PCI-PTS program.

3. **Securing Card Payment Infrastructure**: Acquirers should secure their card payment infrastructure using methods like Unique Key Per Terminal (UKPT), Derived Unique Key Per Transaction (DUKPT), or Terminal Line Encryption (TLE).

4. **Hardware Security Module (HSM) Controls**: Implementing specific security controls at HSM, including logging, access control, and secure backups.

5. **Staff Training**: Educating staff, especially those in fraud control, about frauds and training them in relevant skills and areas of expertise.

**Page Source:** [18, 12]

**Confidence:** 0.95

**Explanation**: The response is derived directly from the provided content on pages 18 and 12, which discuss various controls for card payments security.