In [13]:
import os
from dotenv import load_dotenv

load_dotenv()

OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
MODEL = "gpt-3.5-turbo"


In [14]:

from langchain_community.document_loaders import DirectoryLoader,PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=20)

DATA_PATH = './edata/'

loader = DirectoryLoader(DATA_PATH, glob='*.pdf', loader_cls=PyPDFLoader)
    
documents = loader.load()

documents

[Document(page_content='THE AYURVEDIC PHARMACOPOEIA \nOF INDIA\nPART - II (FORMULATIONS)\nVOLUME - I\nFirst Edition\nMONOGRAPHS\ne-BOOK V.1.0\nGOVERNMENT OF INDIA\nMINISTRY OF HEALTH AND FAMILY WELFARE\nDEPARTMENT OF AYURVEDA, YOGA & NATUROPATHY, UNANI, SIDDHA AND HOMOEOPATHY, \nNEW DELHI\n                  2008', metadata={'source': 'edata\\API-Vol-2.1.pdf', 'page': 0}),
 Document(page_content='AVALEHA\nAVALEHA\nGeneral Description:\nAvaleha or Lehya  is a semi-solid preparation  of drugs, prepared with addition  of  jaggery, sugar or sugar-candy and boiled  with  \nprescribed juices or decoction.\nThese preparations generally have \n(1) KaÀ¡ya or other liquids, \n(2) Jaggery, sugar or sugar-candy, \n(3) Powders or pulps of certain drugs,\n(4) Ghee or oil and \n(5) Honey.  \nJaggery, sugar or sugar-candy is dissolved in the liquid and strained to remove the foreign particles. This solution is boiled over a  \nmoderate fire. When pressed between two fingers if P¡ka becomes thready (Tan

In [15]:
from langchain_openai.embeddings import OpenAIEmbeddings
from langchain_community.vectorstores import DocArrayInMemorySearch

vectorstore = DocArrayInMemorySearch.from_documents(
    documents, embedding=OpenAIEmbeddings()    
)

In [25]:
import pandas as pd

df = pd.DataFrame([d.page_content for d in documents], columns=["text"])
df.head(10)

Unnamed: 0,text
0,THE AYURVEDIC PHARMACOPOEIA \nOF INDIA\nPART -...
1,AVALEHA\nAVALEHA\nGeneral Description:\nAvaleh...
2,"1.Astangavaleha\n1. AâÙË×GËVALEHA \n AFI, Par..."
3,Pack it in tightly closed containers to protec...
4,Thin layer chromatography: \nExtract 5 g of A...
5,2.Bhallataka modaka\n2. BHALLËTAKËDI MODAKA \...
6,Identification:\nMicroscopy: \nWeigh 5 g of th...
7,Water-soluble extractive: ...
8,Storage: Store in a cool place in tightly clos...
9,"3.Bilvadi leha\n3. BILVËDI LEHA \n AFI, Part-..."


In [26]:
from giskard.rag import KnowledgeBase

knowledge_base = KnowledgeBase(df)

In [62]:
from giskard.rag import generate_testset

testset = generate_testset(
    knowledge_base,
    num_questions=10,
    agent_description="A chatbot answering questions about Ayurvedic medicines and remedies for health problems",
)
testset.save("test-set.jsonl")

Generating questions: 100%|██████████| 10/10 [02:33<00:00, 15.33s/it]


In [66]:
from giskard.rag import QATestset

testset = QATestset.load("test-set.jsonl")
test_set_df = testset.to_pandas()

test_set_df.head()

Unnamed: 0_level_0,question,reference_answer,reference_context,conversation_history,metadata
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
eb403659-b025-4b7d-94a0-26b77a5e024b,What are the ingredients used in the preparati...,The ingredients used in the preparation of Mru...,Document 9: 3.Bilvadi leha\n3. BILVËDI LEHA \...,[],"{'question_type': 'simple', 'seed_document_id'..."
29da763c-d6b0-40c7-b261-d136c0453255,What is the final step in the preparation of t...,The final step in the preparation of these Ayu...,Document 125: Take the ingredients ( Kalka dra...,[],"{'question_type': 'simple', 'seed_document_id'..."
f3edf118-3dcc-410a-ac59-ab89baedfe55,Can you identify the shared ingredients used i...,The common ingredients in the preparation of P...,Document 75: 20.Panchasama churna\n20. PAØCASA...,[],"{'question_type': 'complex', 'seed_document_id..."
e5e5587c-6431-4f57-9288-7884c2f5ff7a,What is the primary component used in the form...,The main ingredient in the preparation of Vasa...,Document 45: 11.Vasavaleha\n11. VËSËVALEHA \n ...,[],"{'question_type': 'complex', 'seed_document_id..."
082afa2c-3bc5-4f08-bd1b-311aa3d56826,Considering the storage requirements and thera...,Balachaturbhadrika churna is made from Ghana (...,Document 60: 15.Balachaturbhadraka churna \n15...,[],"{'question_type': 'distracting element', 'seed..."
9e248fb1-37f2-483f-8838-86a08fb62aee,What are the main ingredients in the KALYËÛAKA...,The main ingredients in the KALYËÛAKA GHÎTA Ay...,Document 105: 28.Jaatyaadi ghrutha\n28. JËTYËD...,[],"{'question_type': 'distracting element', 'seed..."
2f5d4143-a7a4-4b3d-8af7-7a7e9f90062e,As a health-conscious individual seeking natur...,The major spots shown in the thin layer chroma...,Document 33: Identification:\nMicroscopy: \nTa...,[],"{'question_type': 'situational', 'seed_documen..."
5409fc8e-cb42-4d4f-bfda-24ab9eab4df2,As a lactose intolerant individual looking for...,The recommended Anupāna are warm milk and warm...,"Document 119: Anupāna: Warm milk, Warm water .",[],"{'question_type': 'situational', 'seed_documen..."
b56dd8f9-4ff1-4eae-8696-bc7de21b738f,"What is the recommended dose, therapeutic uses...",The recommended dose is 5 to 10 g daily in div...,Document 56: Alcohol-soluble extractive: ...,[],"{'question_type': 'double', 'original_question..."
585b4c06-67c2-4196-aeab-8e88d73bead0,What are the steps to do this?,"1. Sand, stone, plant debris, glass etc. are f...",Document 135: GUGGULU\nGUGGULU\nGeneral Descri...,"[{'role': 'user', 'content': 'I'm thinking abo...","{'question_type': 'conversational', 'seed_docu..."


In [98]:
from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate

custom_prompt_template = """
You are an Ayurveda Advisor. Use the following pieces of information to answer the user's question in detail. When discussing 
medicines and remedies, ensure to include precautions and exceptions where necessary. 
Context: {context}
Question: {question}
Helpful answer: Lets think step by step
"""
prompt = PromptTemplate(template=custom_prompt_template , input_variables=["context","question"])

retriever = vectorstore.as_retriever()


In [120]:
from langchain_community.llms import HuggingFaceEndpoint
from ctransformers import AutoModelForCausalLM
from langchain_core.output_parsers import StrOutputParser
from operator import itemgetter

def get_LLM(model_name):
    if(model_name=="TheBloke/Llama-2-7B-Chat-GGML"):
        llm = AutoModelForCausalLM.from_pretrained(
                                           model_path_or_repo_id="llama-2-7b-chat.ggmlv3.q8_0.bin",
                                           model_type="llama", 
                                           gpu_layers=50,
                                           max_new_tokens = 1000,
                                           context_length = 6000)
        return llm
    
    llm = HuggingFaceEndpoint(
        repo_id=model_name,
        huggingfacehub_api_token="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",
        max_new_tokens = 1024,
        temperature = 0.1,
        model_kwargs={"max_length": 64}
    )
    return llm

def update_chain(model_name):
    global chain
    chain = (
    {
        "context": itemgetter("query") | retriever,
        "question": itemgetter("query"),
    }
    | prompt
    | get_LLM(model_name)
    | StrOutputParser()
)

In [121]:
class EvalReport:
    def __init__(self, model_name, report):
        self.model_name = model_name
        self.report = report

def answer_fn(question,history=None,):
    return chain.invoke({"query": question})


In [122]:
from giskard.rag import evaluate
reports = []

def getResults(model_name):
    update_chain(model_name)
    report = evaluate(answer_fn,testset=testset, knowledge_base=knowledge_base)
    reportObj = EvalReport(model_name,report)
    reports.append(reportObj)

In [123]:
getResults("mistralai/Mistral-7B-Instruct-v0.2")

Token will not been saved to git credential helper. Pass `add_to_git_credential=True` if you want to set the git credential as well.
Token is valid (permission: write).
Your token has been saved to C:\Users\Anubhav Chachra\.cache\huggingface\token
Login successful


Asking questions to the agent: 100%|██████████| 10/10 [02:52<00:00, 17.29s/it]
Correctness evaluation: 100%|██████████| 10/10 [00:43<00:00,  4.34s/it]


In [136]:
getResults("HuggingFaceH4/zephyr-7b-beta")

Token will not been saved to git credential helper. Pass `add_to_git_credential=True` if you want to set the git credential as well.
Token is valid (permission: write).
Your token has been saved to C:\Users\Anubhav Chachra\.cache\huggingface\token
Login successful


Asking questions to the agent:  40%|████      | 4/10 [01:31<02:17, 22.93s/it]


HfHubHTTPError: 424 Client Error: Failed Dependency for url: https://api-inference.huggingface.co/models/HuggingFaceH4/zephyr-7b-beta (Request ID: aMBi0K8Xn0crLvxvdmn8h)

Request failed during generation: Server error: Out of available cache blocks: asked 100, only 11 free blocks

In [104]:
getResults("google/gemma-7b")

Token will not been saved to git credential helper. Pass `add_to_git_credential=True` if you want to set the git credential as well.
Token is valid (permission: write).
Your token has been saved to C:\Users\Anubhav Chachra\.cache\huggingface\token
Login successful


Asking questions to the agent: 100%|██████████| 10/10 [02:17<00:00, 13.79s/it]
Correctness evaluation: 100%|██████████| 10/10 [00:40<00:00,  4.05s/it]


In [124]:
reports

[<__main__.EvalReport at 0x23a7a45e5e0>]

0.0