In [3]:
import os
import boto3
from dotenv import load_dotenv

load_dotenv()

## We will be using Titan Embeddings Model To generate Embedding

from langchain_community.embeddings import BedrockEmbeddings
from langchain.llms.bedrock import Bedrock
from langchain_community.chat_models.bedrock import BedrockChat

## Data Ingestion

import numpy as np
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import PyPDFDirectoryLoader

# Vector Embedding And Vector Store
# from langchain_community.vectorstores import FAISS
from langchain.vectorstores import FAISS

## LLm Models
from langchain.prompts import PromptTemplate
from langchain.chains import RetrievalQA

from pypdf import PdfReader
from langchain.schema import Document


In [44]:

## Bedrock Clients
bedrock=boto3.client(service_name="bedrock-runtime")
bedrock_embeddings=BedrockEmbeddings(model_id="amazon.titan-embed-text-v1",client=bedrock)


#Extract PDF Data
def extract_pdf_docs(filename):
    reader = PdfReader(filename)
    docs = []
    for i in range(0,len(reader.pages)):
        page = reader.pages[i]
        docs.append(Document(page_content=page.extract_text()))
    return docs


In [12]:
src_dir = os.getcwd().replace('ipython','') 
src_dir

'/Users/narenjhabakh/Desktop/aws bedrock/'

In [45]:
test_docs = extract_pdf_docs(src_dir + 'B.pdf')
test_docs

[Document(page_content=' Bank Internal IT Policy  \n1. Introduction  This policy outlines the framework for managing ICT and security risks in \ncompliance with the EBA Guidelines on ICT and security risk management (EBA/GL/2019/04). \nIt aims to ensure the confidentiality, integrity, and availability of the bank’s ICT systems  and \ndata.  \n2. Scope  This policy applies to all ICT systems, services, processes, and staff within the bank, \nincluding third -party providers and contractors.  \n3. Governance and Strategy  \n• Management Body Responsibilities:  The management body is accountable for  the ICT \nstrategy, ensuring its alignment with the business strategy. It should establish an internal \ncontrol framework for ICT and security risks, set clear roles and responsibilities, and \nensure adequate resources and training.  \n• ICT Strategy:  The ICT stra tegy should define:  \no The evolution of ICT to support the business strategy.  \no Key dependencies on third parties.  \no In

In [16]:
path_index = src_dir + 'faiss_index_guidelines'
vectorstore = FAISS.load_local(path_index, bedrock_embeddings, allow_dangerous_deserialization=True)

In [22]:
retriever = vectorstore.as_retriever(search_kwargs={"k": 3})

In [25]:
docs = retriever.get_relevant_documents(test_docs[0].page_content)
docs

[Document(page_content='FINAL REPORT  ON GUIDELINES ON ICT AND SECURITY RISK MA NAGEMENT   \n \n4 \n \nThese guidelines provide detail s on how financial institutions should comply  in order  to address ICT \nand security risk s, with the following  provisions in the Capital Requirements Directive (CRD ) and \nPSD2: \n(i) Article  74 of Directive  2013/36/EU (CRD) , which strengthens the governance requirements for \ninstitutions , including the requirements to have robust governance arrangements with a clear \norganisational structure with well -defined, transparent and consistent lines of responsibility and  \neffective processes to identify, manage, monitor and report the risk they are or might be exposed \nto;  \n(ii) Article  95 of Directive  2015/2366/EU (PSD2) , which contains explicit provisions for the \nmanagement of operation and secu rity risks that requir e PSPs to have appropriate mitigation \nmeasures and control mechanisms to manage the operational and security risks an

In [26]:
docs = retriever.get_relevant_documents(test_docs[1].page_content)
docs

[Document(page_content='FINAL REPORT  ON GUIDELINES ON ICT AND SECURITY RISK MA NAGEMENT   \n \n4 \n \nThese guidelines provide detail s on how financial institutions should comply  in order  to address ICT \nand security risk s, with the following  provisions in the Capital Requirements Directive (CRD ) and \nPSD2: \n(i) Article  74 of Directive  2013/36/EU (CRD) , which strengthens the governance requirements for \ninstitutions , including the requirements to have robust governance arrangements with a clear \norganisational structure with well -defined, transparent and consistent lines of responsibility and  \neffective processes to identify, manage, monitor and report the risk they are or might be exposed \nto;  \n(ii) Article  95 of Directive  2015/2366/EU (PSD2) , which contains explicit provisions for the \nmanagement of operation and secu rity risks that requir e PSPs to have appropriate mitigation \nmeasures and control mechanisms to manage the operational and security risks an

In [50]:
prompt_template_compliance_chunks = """
Imagine you are a compliance officer for a bank checking if policies and guidelines are being met.
Check the sections of the following document on whether the policies and guidelines are being met.
<question>
{question}
</question

The following are the policies and guidelines to be checked against:
<context>
{context}
</context

Provide a high level response of the question with a single word - Yes, No or Partially Compliant.
Provide a detailed summary under the high level response for the non compliant or partially compliant 
sections of the context with quoted reference from the context above and suggested change. 
Please refer only to the document. 
Please be formal in your response. 
Please avoid any biases.
Assistant:"""

In [51]:
PROMPT1 = PromptTemplate(
    template=prompt_template_compliance_chunks, input_variables=["context", "question"]
)

In [37]:
def get_response_llm(llm,vectorstore_faiss,query, PROMPT):
    qa = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=vectorstore_faiss.as_retriever(
        search_type="similarity", search_kwargs={"k": 1}
    ),
    return_source_documents=True,
    chain_type_kwargs={"prompt": PROMPT}
)
    answer=qa({"query":query})
    return answer['result']

In [38]:
def get_claude_llm():
    ##create the Anthropic Model
    llm=BedrockChat(model_id="anthropic.claude-3-sonnet-20240229-v1:0",client=bedrock,
                model_kwargs={'max_tokens':1000})
    
    return llm

In [42]:
responses = {}
for chunk, doc in enumerate(test_docs):
    print(chunk+1)
    responses[f'Chunk{chunk+1}'] = get_response_llm(get_claude_llm(),vectorstore,doc.page_content, PROMPT1)

1
2
3


In [43]:
responses

{'Chunk1': 'Partially Compliant\n\nDetailed summary:\n\nThe provided context partially covers the policies outlined in the question. The following sections are not fully compliant or missing:\n\n1. Section 3 - Governance and Strategy:\nThe context does not explicitly mention the requirements stated in the question regarding the management body\'s accountability for the ICT strategy, ensuring its alignment with the business strategy, establishing an internal control framework, setting clear roles and responsibilities, and ensuring adequate resources and training.\n\nSuggested change: Include details on the management body\'s responsibilities as per the question.\n\n2. Section 4 - Risk Management Framework:\nThe context covers risk identification, assessment, and mitigation. However, it does not mention the specific requirements stated in the question, such as maintaining updated mappings of business functions, processes, and information assets, classifying assets based on confidentialit

In [46]:
def extract_pdf(filename):
    '''
    Extract all text of the PDF Data into one string
    args : 
        filname - name of the .pdf file
    returns:
        all_text - string output of all the text
    '''
    reader = PdfReader(filename)
    all_text = " "
    for i in range(0,len(reader.pages)):
        page = reader.pages[i]
        all_text = all_text + page.extract_text()
    return all_text

In [49]:
test_doc_txt = extract_pdf(src_dir + 'B.pdf')
print(test_doc_txt)

  Bank Internal IT Policy  
1. Introduction  This policy outlines the framework for managing ICT and security risks in 
compliance with the EBA Guidelines on ICT and security risk management (EBA/GL/2019/04). 
It aims to ensure the confidentiality, integrity, and availability of the bank’s ICT systems  and 
data.  
2. Scope  This policy applies to all ICT systems, services, processes, and staff within the bank, 
including third -party providers and contractors.  
3. Governance and Strategy  
• Management Body Responsibilities:  The management body is accountable for  the ICT 
strategy, ensuring its alignment with the business strategy. It should establish an internal 
control framework for ICT and security risks, set clear roles and responsibilities, and 
ensure adequate resources and training.  
• ICT Strategy:  The ICT stra tegy should define:  
o The evolution of ICT to support the business strategy.  
o Key dependencies on third parties.  
o Information security objectives focusing

In [52]:
prompt_template_compliance_all = """
Imagine you are a compliance officer for a bank checking if policies and guidelines are being met.
Check the sections of the following question on whether the policies and guidelines are being met.
<question>
{question}
</question

The following are the policies and guidelines to be checked against:
<context>
{context}
</context

Provide a high level response of the question with a single word - Yes, No or Partially Compliant for each section of the question.
Provide a detailed summary under the high level response for the non compliant or partially compliant 
sections of the context with quoted reference from the context above and suggested change. 
Please refer only to the document. 
Please be formal in your response. 
Please avoid any biases.
Assistant:"""

In [54]:
PROMPT2 = PromptTemplate(
    template=prompt_template_compliance_all, input_variables=["context", "question"]
)

In [55]:
response_all = get_response_llm(get_claude_llm(),vectorstore,test_doc_txt, PROMPT2)

In [56]:
print(response_all)

I will review each section of the provided question against the guidelines mentioned in the context and provide a high-level response (Yes, No, or Partially Compliant) along with a detailed summary for non-compliant or partially compliant sections.

<question>
  Bank Internal IT Policy   
1. Introduction  
</question>
High-level response: Yes

<question>
2. Scope  
</question>
High-level response: Yes  

<question>
3. Governance and Strategy
</question>
High-level response: Partially Compliant

Detailed summary:
The context states under Section 3.2: "It requires the establishment of the financial institutions' ICT strategy, which should be aligned with their overall business strategy." However, the question does not explicitly mention aligning the ICT strategy with the overall business strategy.

Suggested change: Mention the need to align the ICT strategy with the business strategy in Section 3 of the question.

<question>
4. Risk Management Framework
</question>
High-level response: 