# **Compliance GPT with LangChain**

## **Setup**

In [1]:
import nest_asyncio
nest_asyncio.apply()

from dotenv import load_dotenv
load_dotenv()

True

## **Config**

In [2]:
from utils.config import get_config
from utils.models import ModelName, get_model

config = get_config()

In [4]:
USER_ID = 'arkan'
CONVERSATION_ID = 'arkan-2021-07-01-01'

## **Define Model**

In [5]:
model_name = ModelName.AZURE_OPENAI
llm_model, embed_model = get_model(model_name=model_name, config=config)

## **Load Vector Store Index**

In [6]:
from databases.vector_store import RedisIndexManager

redis = RedisIndexManager(index_name='bi', embed_model=embed_model, config=config, db_id=0)
vector_store = redis.load_vector_index()

  from tqdm.autonotebook import tqdm


## **Get Retriever**

In [7]:
from retriever.retriever_bi.retriever_bi import get_retriever_bi

retriever = get_retriever_bi(vector_store=vector_store, top_n=5, top_k=16, llm_model=llm_model, embed_model=embed_model, config=config)

## **Create Chain**

In [8]:
from constant.bi.prompt import CONTEXTUALIZE_Q_PROMPT_STR, QA_SYSTEM_PROMPT_STR
from databases.chat_store import RedisChatStore
from chain.chain_bi.chain_bi import create_bi_chain
from chain.rag_chain import create_chain_with_chat_history

chat_store = RedisChatStore(k=3, config=config, db_id=1)

chain = create_bi_chain(
    contextualize_q_prompt_str=CONTEXTUALIZE_Q_PROMPT_STR,
    qa_system_prompt_str=QA_SYSTEM_PROMPT_STR,
    retriever=retriever,
    llm_model=llm_model,
    # chat_store=chat_store
)

chain_history = create_chain_with_chat_history(
    final_chain=chain,
    chat_store=chat_store
)

## **Evaluation**

#### **Get Response**

In [9]:
from chain.rag_chain import get_response

response = get_response(
    chain=chain_history,
    question="Berapa batas transaksi oleh qris?",
    user_id=USER_ID,
    conversation_id=CONVERSATION_ID
)

response

{'rewrited question': 'Apa batas transaksi oleh qris?',
 'answer': 'Tidak ada informasi yang tersedia dalam konteks dan metadata yang diberikan mengenai batas transaksi oleh QRIS di Indonesia.',
 'context': '[]'}

#### **Chat Store Monitor**

In [12]:
# to print chat history
print(chat_store.get_session_history(user_id=USER_ID, conversation_id=CONVERSATION_ID))




In [11]:
# to make the chat history empty
chat_store.clear_all()

#### **Evaluation**

In [11]:
from constant.bi.question import QA_PAIRS

eval_questions = []
eval_answers = []

for lang in ["EN", "ID"]:
  for k,v in QA_PAIRS[lang].items():
    eval_questions.append(v["question"])
    eval_answers.append(v["answer"])

In [12]:
import time
from datasets import Dataset
from chain.rag_chain import get_response

results = []
contexts = []
queries = eval_questions
ground_truths = eval_answers

for query in queries:
    result = get_response(
        chain=chain_history,
        question=query,
        user_id=USER_ID,
        conversation_id=CONVERSATION_ID
    )

    results.append(result['answer'])
    sources = result["context"]
    final_sources = list(eval(sources))
    contents = []
    for source in final_sources:
        contents.append(source.get('page_content'))
    contexts.append(contents)

    time.sleep(3)

d = {
    "question": queries,
    "answer": results,
    "contexts": contexts,
    "ground_truth": ground_truths
}

dataset = Dataset.from_dict(d)

Metadata key page_number not found in metadata. Setting to None. 
Metadata fields defined for this instance: ['file_id', 'title', 'file_name', 'file_link', 'date', 'type_of_regulation', 'sector', 'standardized_extracted_file_name', 'standardized_file_name', 'page_number']
Metadata key page_number not found in metadata. Setting to None. 
Metadata fields defined for this instance: ['file_id', 'title', 'file_name', 'file_link', 'date', 'type_of_regulation', 'sector', 'standardized_extracted_file_name', 'standardized_file_name', 'page_number']
Metadata key page_number not found in metadata. Setting to None. 
Metadata fields defined for this instance: ['file_id', 'title', 'file_name', 'file_link', 'date', 'type_of_regulation', 'sector', 'standardized_extracted_file_name', 'standardized_file_name', 'page_number']
Metadata key page_number not found in metadata. Setting to None. 
Metadata fields defined for this instance: ['file_id', 'title', 'file_name', 'file_link', 'date', 'type_of_regulati

KeyboardInterrupt: 

In [13]:
import json

with open("constant/bi/dict_eval.json", "w") as outfile: 
    json.dump(d, outfile)

NameError: name 'd' is not defined

In [None]:
with open('./constant/bi/dict_eval.json') as json_file:
    d = json.load(json_file)

In [None]:
dataset = Dataset.from_dict(d)

df = dataset.to_pandas()
df.head()

In [None]:
from ragas.metrics import faithfulness, answer_relevancy, context_precision, context_recall, answer_similarity, answer_correctness
from ragas.metrics.critique import harmfulness
from ragas import evaluate

score = evaluate(dataset, metrics=[faithfulness, answer_relevancy, context_precision, context_recall, answer_similarity, answer_correctness, harmfulness], llm=llm_model, embeddings=embed_model)

In [None]:
import numpy as np

result_dict = df.to_dict(orient='index')

# Convert any ndarray objects to list
for key in result_dict:
    for subkey in result_dict[key]:
        if isinstance(result_dict[key][subkey], (np.ndarray, list)):
            result_dict[key][subkey] = result_dict[key][subkey].tolist() if isinstance(result_dict[key][subkey], np.ndarray) else result_dict[key][subkey]

# Save the dictionary to a JSON file
with open("eval_results.json", "w") as file:
    json.dump(result_dict, file, indent=4)

In [None]:
for key,val in score.items():
    print(f"{key}: {val}")