In [1]:
import os
from dotenv import load_dotenv

load_dotenv()

MODEL = "gpt-4.1-mini-2025-04-14"

In [None]:
os.environ['OPENAI_API_KEY']="your-api-key"


In [None]:
import zipfile
import os
from langchain_community.document_loaders import PyPDFLoader, DirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter

#Extract zip files from data folder
DATA_PATH='Eval_Data/'

#Step-1: load all pdfs (even inside subfolders)
def load_pdf_files(data):
    loader = DirectoryLoader(data, glob="*.pdf", loader_cls=PyPDFLoader)
    documents = loader.load()
    return documents

documents=load_pdf_files(data=DATA_PATH)
print(len(documents))

Extracted: Data(eval).zip
📄 Loaded 1570 PDF pages.
Length of Text Chunks 9830


In [None]:
def create_chunks(extracted_data):
    text_splitter=RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=150)
    text_chunks=text_splitter.split_documents(extracted_data)
    return text_chunks

text_chunks=create_chunks(extracted_data=documents)
print('Length of Text Chunks', len(text_chunks))

In [5]:
from langchain_openai.embeddings import OpenAIEmbeddings
from langchain_community.vectorstores import FAISS

# Create embeddings
embedding_model = OpenAIEmbeddings()

# Create FAISS vector store from documents
vectorstore = FAISS.from_documents(
    text_chunks,
    embedding=embedding_model
)


In [6]:
import pandas as pd

df = pd.DataFrame([d.page_content for d in text_chunks], columns=["text"])
df.head(10)

Unnamed: 0,text
0,Insurance Authority Board of Directors' Resolu...
1,Evidence Act for Civil and Commercial Transact...
2,Telecommunications Sector. \n- The Federal Law...
3,- The Federal Law No.)6 ( of 2007 on Establis...
4,- The Federal Law No. (20) of 2018 Concerning ...
5,Law No. (6) of 2007 Concerning the Establishme...
6,"amendments thereof; \n- And, based on the rec..."
7,Law: Federal Law No. (6) of 2007 Cconcerning t...
8,Company: The insurance company incorporated in...
9,practice any of the activities of Insurance Ag...


In [7]:
from giskard.rag import KnowledgeBase

knowledge_base=KnowledgeBase(df)

2025-06-27 15:05:41,592 pid:10332 MainThread giskard.llm.embeddings INFO     No embedding model set though giskard.llm.set_embedding_model. Defaulting to openai/text-embedding-3-small since OPENAI_API_KEY is set.


Generate the test set

In [8]:
from giskard.rag import generate_testset

testset=generate_testset(
    knowledge_base,
    num_questions=60,
    agent_description="You are a banking assistant for a UAE-based bank. Answer questions accurately based on the provided documents. Stick to the information in the context and avoid guessing. If the answer is not available, respond with “I don’t know based on the provided information."
)

2025-06-27 15:35:50,872 pid:10332 MainThread giskard.rag  INFO     Finding topics in the knowledge base.
2025-06-27 15:47:52,423 pid:10332 MainThread giskard.rag  INFO     Found 3 topics in the knowledge base.


Generating questions:   0%|          | 0/60 [00:00<?, ?it/s]

In [9]:
test_set_df = testset.to_pandas()

for index, row in enumerate(test_set_df.head(3).iterrows()):
    print(f"Question {index + 1}: {row[1]['question']}")
    print(f"Reference answer: {row[1]['reference_answer']}")
    print("Reference context:")
    print(row[1]['reference_context'])
    print("******************", end="\n\n")

Question 1: What is required from a newly designated LVPS according to the Central Bank?
Reference answer: The Central Bank requires any SO and/or SI of a newly designated LVPS to inform the Central Bank in writing of the designation particulars within fourteen (14) working days after the notification of designation, including the name, place of business.
Reference context:
Document 3809: يعرضُعدمُالامتثالُلأيُمنُهذهُالمتطلباتُالطرفُ
المعنيُللجزاءاتُالمنصوصُعليهاُفيُقانونُالمصرفُ
المركزي.ُبالإضافةُالىُأحكامُمبادئُالبنيةُالتحتيةُللسوقُ
الماليُذاتُالصلة،ُنذكرُمنُالمتطلباتُالرئيسيةُالتالي: 
4.1. Submission of  particulars – the Central 
Bank requires any SO and/or SI of a newly 
designated LVPS to inform the Central 
Bank in writing of the designation 
particulars within fourteen (14) working 
days after the notification of designation, 
including the name, place of business,
******************

Question 2: What is the market value of Bank XYZ's long position in A Corp.?
Reference answer:

In [10]:

testset.save("test-set-after-hybridsearch.jsonl")

In [16]:

from langchain.prompts import PromptTemplate

template = """
Answer the question based on the context below. If you can't 
answer the question, reply "I don't know".

Context: {context}

Question: {question}
"""

prompt = PromptTemplate.from_template(template)
print(prompt.format(context="Here is some context", question="Here is a question"))


Answer the question based on the context below. If you can't 
answer the question, reply "I don't know".

Context: Here is some context

Question: Here is a question



In [None]:
from langchain.prompts import PromptTemplate
CUSTOM_PROMPT_TEMPLATE = """
your-customized-prompt

Context:
{context}

Question:
{question}

Start the answer directly. No small talk please.
"""

prompt = PromptTemplate.from_template(CUSTOM_PROMPT_TEMPLATE)
print(prompt.format(context="Here is some context", question="Here is a question"))




You are a helpful, professional, and knowledgeable banking assistant specializing in the UAE banking system. Your role is to provide clear, accurate, and up-to-date information about:

- UAE Central Bank regulations
- Islamic banking and conventional banking in UAE
- Accounts (current, savings, salary)
- Credit cards, loans, and financing options
- KYC (Know Your Customer) requirements
- Bank transfers (local, international, WPS)
- Digital banking and mobile apps
- Fees, charges, and interest rates
- Banking for residents vs. non-residents
- Bank working hours and public holidays
- Account opening for individuals and businesses

Always tailor your responses using the following guidelines:

1. **Context-Aware:** Respond based on the banking laws, financial institutions, and practices specific to the United Arab Emirates (UAE).
2. **Concise Yet Informative:** Give to-the-point responses but provide links to official resources if more detail is needed.
3. **Professional Tone:** Maintain 

In [13]:

vectorstore_retriever=vectorstore.as_retriever(search_kwargs={"k": 3})
vectorstore_retriever

from langchain.retrievers import BM25Retriever, EnsembleRetriever
keyword_retriever=BM25Retriever.from_documents(text_chunks)
keyword_retriever.k=3

ensemble_retriever=EnsembleRetriever(retrievers=[vectorstore_retriever, keyword_retriever], weights=[0.5, 0.5] )
ensemble_retriever.get_relevant_documents("how can i apply for car loans?")

  ensemble_retriever.get_relevant_documents("how can i apply for car loans?")


[Document(id='9c9dd8a2-af06-45fe-9c17-15cec1f91160', metadata={'producer': 'Adobe PDF Library 16.0', 'creator': 'Adobe InDesign 16.4 (Windows)', 'creationdate': '2023-05-29T21:14:10+05:30', 'moddate': '2024-05-02T13:02:59+05:30', 'trapped': '/False', 'source': 'Data/Data\\CBUAE_EN_1702_VER2.pdf', 'total_pages': 8, 'page': 3, 'page_label': '4'}, page_content='Page  \n4\nصفحة\nBank Loans & Other Services Offered to Individual Customers 29/2011\nArticle (3) Car Loan\na) Car Loan: Is a loan extended by the bank or the finance company to its customer for the purpose of \npurchasing a private car.\nb) Car loan shall be treated as separate from the personal consumer loan, and should not exceed (80%) \neighty percent of the value of the financed vehicle.\nc) Repayment Period: The maximum period for repayment of the loan shall be (60) months.'),
 Document(metadata={'producer': 'Adobe PDF Library 16.0', 'creator': 'Adobe InDesign 16.4 (Windows)', 'creationdate': '2023-05-29T21:14:10+05:30', 'mod

In [None]:
from langchain_openai.chat_models import ChatOpenAI
from langchain_core.output_parsers import StrOutputParser
from operator import itemgetter
OPENAI_API_KEY="your-api-key"

model = ChatOpenAI(openai_api_key=OPENAI_API_KEY, model=MODEL)

chain = (
    {
        "context": itemgetter("question") | ensemble_retriever,
        "question": itemgetter("question"),
    }
    | prompt
    | model
    | StrOutputParser()
)

In [15]:

chain.invoke({"question": "how can i apply for car loans?"})

'To apply for a car loan in the UAE, you should:\n\n1. **Choose a bank or finance company** that offers car loans; both conventional and Islamic banks provide such financing.\n2. **Prepare required documents**, typically including:\n   - Valid Emirates ID and passport copy\n   - UAE residency visa (for residents)\n   - Salary certificate or proof of income\n   - Bank statements (usually last 3-6 months)\n   - Car sales invoice or proforma invoice if purchasing from a dealer\n3. **Submit a formal application** either online via the bank’s website/app or in person at a branch.\n4. The bank will evaluate your application based on income, credit history, and debt-to-income ratio.\n5. Car loans in the UAE must not exceed 80% of the vehicle’s value.\n6. Maximum repayment period is 60 months (5 years).\n7. The loan will be secured by a lien or mortgage on the car until fully repaid.\n\nFor detailed eligibility criteria, documentation, and application process, check with your preferred bank or


Evaluating the Model on the Test Set


We need to create a function that invokes the chain with a specific question and returns the answer.

In [16]:

def answer_fn(question, history=None):
    return chain.invoke({"question": question})


In [17]:
from giskard.rag import evaluate

report = evaluate(answer_fn, testset=testset, knowledge_base=knowledge_base)

Asking questions to the agent:   0%|          | 0/60 [00:00<?, ?it/s]

CorrectnessMetric evaluation:   0%|          | 0/60 [00:00<?, ?it/s]

In [18]:
display(report)

In [19]:
report.to_html("giskard_report_after_hybridsearch.html")