In [3]:
# Import the os module to interact with the operating system environment variables
import os
from langchain_community.document_loaders import PyPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter

In [4]:
file_path = "heartstart RAG.pdf"
loader = PyPDFLoader(file_path)

docs = loader.load()

print(len(docs))

300


In [5]:
print(docs[0].page_content)
print(docs[0].metadata)

Instructions for Use
HeartStart Intrepid
Monitor /D efibrillator
867172 
 English

{'source': 'heartstart RAG.pdf', 'page': 0}


In [6]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
splits = text_splitter.split_documents(docs)

In [7]:
from huggingface_hub import InferenceClient
import json

repo_id = "mistralai/Mixtral-8x7B-Instruct-v0.1"

llm_client = InferenceClient(
    model=repo_id,
    timeout=120,
    api_key=os.getenv('HUGGING_FACE_API_TOKEN')
)


def call_llm(inference_client: InferenceClient, prompt: str):
    response = inference_client.post(
        json={
            "inputs": prompt,
            "parameters": {"max_new_tokens": 1000},
            "task": "text-generation",
        },
    )
    return json.loads(response.decode())[0]["generated_text"]


In [20]:
QA_generation_prompt = """
Your task is to write a factoid question and an answer given a context.
Your factoid question should be answerable with a specific, concise piece of factual information from the context.
Your factoid question should be formulated in the same style as questions users could ask in a search engine.
Provide one relevant question and answer for the given context

Provide your answer as follows:

Output:::
Factoid question: (your factoid question)
Answer: (your answer to the factoid question)

Now here is the context.

Context: {context}\n
Output:::"""

In [26]:
import random
from tqdm.auto import tqdm
import pandas as pd
N_GENERATIONS = 10  # We intentionally generate only 10 QA couples here for cost and time considerations

print(f"Generating {N_GENERATIONS} QA couples...")

outputs = []
for sampled_context in tqdm(random.sample(splits, N_GENERATIONS)):
    # Generate QA couple
    output_QA_couple = call_llm(llm_client, QA_generation_prompt.format(context=sampled_context.page_content))
    try:
        #print(output_QA_couple)
        keyword ="Factoid question: (your factoid question)"
        before_keyword, keyword, after_keyword = output_QA_couple.partition(keyword)
        #print(after_keyword)
        actual_keyword = "Factoid question:"
        before_actual_question,actual_keyword,after_actual_keyword = after_keyword.partition(actual_keyword)
        #print("actual_question", after_actual_keyword)
        question = output_QA_couple.split("Factoid question: ")[-1].split("Answer: ")[0]
        answer = output_QA_couple.split("Answer: ")[-1]
        assert len(answer) < 300, "Answer is too long"
        print(sampled_context.metadata)
        outputs.append(
            {
                "context": sampled_context.page_content,
                "question": question,
                "answer": answer,
                "source_doc": sampled_context.metadata["source"],
                "page_no":sampled_context.metadata["page"]
            }
        )
    except:
        continue

Generating 10 QA couples...


  0%|          | 0/10 [00:00<?, ?it/s]



{'source': 'heartstart RAG.pdf', 'page': 236}
{'source': 'heartstart RAG.pdf', 'page': 82}
{'source': 'heartstart RAG.pdf', 'page': 274}
{'source': 'heartstart RAG.pdf', 'page': 236}
{'source': 'heartstart RAG.pdf', 'page': 168}
{'source': 'heartstart RAG.pdf', 'page': 131}
{'source': 'heartstart RAG.pdf', 'page': 284}
{'source': 'heartstart RAG.pdf', 'page': 180}
{'source': 'heartstart RAG.pdf', 'page': 192}
{'source': 'heartstart RAG.pdf', 'page': 251}


In [27]:
display(pd.DataFrame(outputs))

Unnamed: 0,context,question,answer,source_doc,page_no
0, Q-CPR meter 2 Accessories . . ...,What are some accessories for the Q-CPR meter ...,Some accessories for the Q-CPR meter 2 include...,heartstart RAG.pdf,236
1,"chest hair, moisture and lotions or powders on...",What is the purpose of the AED mode-related in...,The AED mode-related information in the AED Vi...,heartstart RAG.pdf,82
2,Info” and selecting ‘yes’. This action will re...,What standard does the HeartStart Intrepid mee...,The HeartStart Intrepid meets the IEC 60601-2-...,heartstart RAG.pdf,274
3,223\n18\nSupplies and Accessories\nThis chapte...,What type of accessories are discussed in this...,This chapter discusses approved supplies and a...,heartstart RAG.pdf,236
4,(if configured) that no compressions are detec...,What is the target area for the compression ra...,The target area for the compression rate indic...,heartstart RAG.pdf,168
5,9: Monitoring Noninvasive Blood Pressure and T...,How do you disable temperature alarms on the m...,"To disable temperature alarms on the monitor, ...",heartstart RAG.pdf,131
6,Electromagnetic Compatibility 20: Specificatio...,What is the limit level for the radiated RF el...,The limit level for the radiated RF electromag...,heartstart RAG.pdf,284
7,Daylight Saving Time (DST) changes occur when ...,What is the time format setting on the device?\n,The time format setting on the device is defin...,heartstart RAG.pdf,180
8,Configurable Parameters 14: Configuration\n 17...,What are the EAP types available for WPA_AES_P...,The EAP types available for WPA_AES_PSK are PE...,heartstart RAG.pdf,192
9,some wires in a 5-Lead cable \nare disconnecte...,What could be the reason for a technical alarm...,An electrode may be disconnected or the analyz...,heartstart RAG.pdf,251


In [28]:
qa_dataset = pd.DataFrame(outputs)
qa_dataset.to_csv("qa_dataset.csv")

In [37]:
question_groundedness_critique_prompt = """
You will be given a context and a question.
Your task is to provide a 'total rating' scoring how well one can answer the given question unambiguously with the given context.
Give your answer on a scale of 1 to 5, where 1 means that the question is not answerable at all given the context, and 5 means that the question is clearly and unambiguously answerable with the context.

Provide your answer as follows:

Answer:::
Evaluation: (your rationale for the rating, as a text)
Total rating: (your rating, as a number between 1 and 5)

You MUST provide values for 'Evaluation:' and 'Total rating:' in your answer.

Now here are the question and context.

Question: {question}\n
Context: {context}\n
Answer::: """

question_relevance_critique_prompt = """
You will be given a question.
Your task is to provide a 'total rating' representing how useful this question can be to users using the "Philips Heart Start Intrepid Monitor"
Give your answer on a scale of 1 to 5, where 1 means that the question is not useful at all, and 5 means that the question is extremely useful.

Provide your answer as follows:

Answer:::
Evaluation: (your rationale for the rating, as a text)
Total rating: (your rating, as a number between 1 and 5)

You MUST provide values for 'Evaluation:' and 'Total rating:' in your answer.

Now here is the question.

Question: {question}\n
Answer::: """

question_standalone_critique_prompt = """
You will be given a question.
Your task is to provide a 'total rating' representing how context-independent this question is.
Give your answer on a scale of 1 to 5, where 1 means that the question depends on additional information to be understood, and 5 means that the question makes sense by itself.
For instance, if the question refers to a particular setting, like 'in the context' or 'in the document', the rating must be 1.

Provide your answer as follows:

Answer:::
Evaluation: (your rationale for the rating, as a text)
Total rating: (your rating, as a number between 1 and 5)

You MUST provide values for 'Evaluation:' and 'Total rating:' in your answer.

Now here is the question.

Question: {question}\n
Answer::: """

In [38]:
print("Generating critique for each QA couple...")
for output in tqdm(outputs):
    evaluations = {
        "groundedness": call_llm(
            llm_client,
            question_groundedness_critique_prompt.format(context=output["context"], question=output["question"]),
        ),
        "relevance": call_llm(
            llm_client,
            question_relevance_critique_prompt.format(question=output["question"]),
        ),
        "standalone": call_llm(
            llm_client,
            question_standalone_critique_prompt.format(question=output["question"]),
        ),
    }
    try:
        for criterion, evaluation in evaluations.items():
            score, eval = (
                int(evaluation.split("Total rating: ")[-1].strip()),
                evaluation.split("Total rating: ")[-2].split("Evaluation: ")[1],
            )
            output.update(
                {
                    f"{criterion}_score": score,
                    f"{criterion}_eval": eval,
                }
            )
    except Exception as e:
        continue

Generating critique for each QA couple...


  0%|          | 0/10 [00:00<?, ?it/s]



In [39]:
display(pd.DataFrame(outputs))

Unnamed: 0,context,question,answer,source_doc,page_no,groundedness_score,groundedness_eval,relevance_score,relevance_eval,standalone_score,standalone_eval
0, Q-CPR meter 2 Accessories . . ...,What are some accessories for the Q-CPR meter ...,Some accessories for the Q-CPR meter 2 include...,heartstart RAG.pdf,236,5,The context lists several items under the head...,4.0,This question is useful for users who want to ...,5.0,This question is context-independent as it ref...
1,"chest hair, moisture and lotions or powders on...",What is the purpose of the AED mode-related in...,The AED mode-related information in the AED Vi...,heartstart RAG.pdf,82,5,The context provides a description of the AED ...,5.0,This question is useful for users of the Phili...,3.0,This question assumes knowledge of an 'AED Vie...
2,Info” and selecting ‘yes’. This action will re...,What standard does the HeartStart Intrepid mee...,The HeartStart Intrepid meets the IEC 60601-2-...,heartstart RAG.pdf,274,5,The context clearly states that the HeartStart...,4.0,This question is useful for those who are inte...,2.0,This question is context-dependent because it ...
3,223\n18\nSupplies and Accessories\nThis chapte...,What type of accessories are discussed in this...,This chapter discusses approved supplies and a...,heartstart RAG.pdf,236,5,The context lists three sections under the hea...,2.0,This question is related to the content of the...,2.0,"This question is context-dependent, as it refe..."
4,(if configured) that no compressions are detec...,What is the target area for the compression ra...,The target area for the compression rate indic...,heartstart RAG.pdf,168,5,The context provides a clear description of th...,4.0,This question is useful for users of the Phili...,2.0,This question is context-dependent because it ...
5,9: Monitoring Noninvasive Blood Pressure and T...,How do you disable temperature alarms on the m...,"To disable temperature alarms on the monitor, ...",heartstart RAG.pdf,131,5,The context provides a clear and unambiguous a...,4.0,This question is useful for users who want to ...,4.0,"This question is context-independent, as it re..."
6,Electromagnetic Compatibility 20: Specificatio...,What is the limit level for the radiated RF el...,The limit level for the radiated RF electromag...,heartstart RAG.pdf,284,5,The context provides the limit level for the r...,4.0,This question is useful for those who are conc...,5.0,This question is asking about the limit level ...
7,Daylight Saving Time (DST) changes occur when ...,What is the time format setting on the device?\n,The time format setting on the device is defin...,heartstart RAG.pdf,180,1,The context provides information about the con...,4.0,This question is useful for users who want to ...,4.0,This question is context-independent because i...
8,Configurable Parameters 14: Configuration\n 17...,What are the EAP types available for WPA_AES_P...,The EAP types available for WPA_AES_PSK are PE...,heartstart RAG.pdf,192,1,The context provides information about the EAP...,,,,
9,some wires in a 5-Lead cable \nare disconnecte...,What could be the reason for a technical alarm...,An electrode may be disconnected or the analyz...,heartstart RAG.pdf,251,5,The context provides several possible reasons ...,5.0,This question is useful for users of the Phili...,2.0,This question is context-dependent because it ...


In [40]:
import pandas as pd

pd.set_option("display.max_colwidth", None)

generated_questions = pd.DataFrame.from_dict(outputs)

print("Evaluation dataset before filtering:")
display(
    generated_questions[
        [
            "question",
            "answer",
            "groundedness_score",
            "relevance_score",
            "standalone_score",
        ]
    ]
)
generated_questions = generated_questions.loc[
    (generated_questions["groundedness_score"] >= 4)
    & (generated_questions["relevance_score"] >= 4)
    & (generated_questions["standalone_score"] >= 4)
]
print("============================================")
print("Final evaluation dataset:")
display(
    generated_questions[
        [
            "question",
            "answer",
            "groundedness_score",
            "relevance_score",
            "standalone_score",
        ]
    ]
)



Evaluation dataset before filtering:


Unnamed: 0,question,answer,groundedness_score,relevance_score,standalone_score
0,What are some accessories for the Q-CPR meter 2?\n,"Some accessories for the Q-CPR meter 2 include SpO2 cables and sensors, temperature monitoring probes and cables, and test loads and shorted plugs.",5,4.0,5.0
1,What is the purpose of the AED mode-related information in the AED View?\n,The AED mode-related information in the AED View displays important messages for the user and the configured energy for the current patient category while in AED mode.,5,5.0,3.0
2,What standard does the HeartStart Intrepid meet in terms of safety and EMC?\n,The HeartStart Intrepid meets the IEC 60601-2-30 ed. 1.1 and (EN) ISO 80601 safety and EMC standards.,5,4.0,2.0
3,What type of accessories are discussed in this chapter?\n,"This chapter discusses approved supplies and accessories, ECG and defibrillation accessories, and blood pressure air hoses and cuffs for the HeartStart Intrepid.",5,2.0,2.0
4,What is the target area for the compression rate indicator on the Q-CPR meter 2?\n,The target area for the compression rate indicator on the Q-CPR meter 2 is the green target zone on the speedometer needle.,5,4.0,2.0
5,How do you disable temperature alarms on the monitor?\n,"To disable temperature alarms on the monitor, press the Smart Select knob. The alarms are on by default.",5,4.0,4.0
6,What is the limit level for the radiated RF electromagnetic field of the HeartStart Intrepid when powered by a DC-DC power supply?\n,The limit level for the radiated RF electromagnetic field of the HeartStart Intrepid when powered by a DC-DC power supply is 20 V/m for the frequency range of 80 MHz to 2.7 GHz.,5,4.0,5.0
7,What is the time format setting on the device?\n,The time format setting on the device is defined by the Time Format parameter in the Date/Time Format configuration. The available choices are 12-hour or 24-hour format.,1,4.0,4.0
8,What are the EAP types available for WPA_AES_PSK?\n,"The EAP types available for WPA_AES_PSK are PEAP-MSCHAP, EAP-TTLS, and EAP-TLS.",1,,
9,What could be the reason for a technical alarm in the ECG data?\n,An electrode may be disconnected or the analyzing algorithm cannot analyze the ECG signal.,5,5.0,2.0


Final evaluation dataset:


Unnamed: 0,question,answer,groundedness_score,relevance_score,standalone_score
0,What are some accessories for the Q-CPR meter 2?\n,"Some accessories for the Q-CPR meter 2 include SpO2 cables and sensors, temperature monitoring probes and cables, and test loads and shorted plugs.",5,4.0,5.0
5,How do you disable temperature alarms on the monitor?\n,"To disable temperature alarms on the monitor, press the Smart Select knob. The alarms are on by default.",5,4.0,4.0
6,What is the limit level for the radiated RF electromagnetic field of the HeartStart Intrepid when powered by a DC-DC power supply?\n,The limit level for the radiated RF electromagnetic field of the HeartStart Intrepid when powered by a DC-DC power supply is 20 V/m for the frequency range of 80 MHz to 2.7 GHz.,5,4.0,5.0


In [41]:
eval_dataset = pd.DataFrame(generated_questions)
eval_dataset

Unnamed: 0,context,question,answer,source_doc,page_no,groundedness_score,groundedness_eval,relevance_score,relevance_eval,standalone_score,standalone_eval
0, Q-CPR meter 2 Accessories . . . . . . . . . . . . . p. 229\n SpO2 Cables and Sensors . . . . . . . . . . . . . . p. 230\n Temperature Monitoring Probes and Cables . . . . . . . . . . p. 230\n Test Loads and Shorted Plugs . . . . . . . . . . . . . p. 231,What are some accessories for the Q-CPR meter 2?\n,"Some accessories for the Q-CPR meter 2 include SpO2 cables and sensors, temperature monitoring probes and cables, and test loads and shorted plugs.",heartstart RAG.pdf,236,5,"The context lists several items under the heading ""Q-CPR meter 2 Accessories,"" providing a clear and unambiguous answer to the question.\n\n",4.0,This question is useful for users who want to know more about the accessories for the Q-CPR meter 2. It can help them to use the device more effectively and efficiently.\n,5.0,"This question is context-independent as it refers to a specific product, the Q-CPR meter 2, and asks for accessories for it.\n"
5,"9: Monitoring Noninvasive Blood Pressure and Temperature Monitoring T emperature\n118\n NOTE: Alarm notification is configurable. See “Alarm Management and Configuration” on page 42.\nIf alarms are enabled, alarm limits appear next to the value. If alarms are off, the Alarms Off \nsymbol replaces the limits. \nChanging Temperature Alarm Limits\n T o change the temperature alarm limits for the current incident:\n1 Press the Smart Select knob.\n2 T urn the Smart Select knob to highlight  and press the Smart Select \nknob. \n3 Select and press the Smart Select knob. \n4 Select  and press the Smart Select knob.\n5 T urn the Smart Select knob to change the high limit and press the Smart Select knob. \n6 T urn the Smart Select knob to change the low limit and press the Smart Select knob. \nEnabling/Disabling Temperature Alarms\n T o disable temperature alarms (the alarms are on by default):\n1 Press the Smart Select knob.",How do you disable temperature alarms on the monitor?\n,"To disable temperature alarms on the monitor, press the Smart Select knob. The alarms are on by default.",heartstart RAG.pdf,131,5,"The context provides a clear and unambiguous answer to the question, with step-by-step instructions on how to disable temperature alarms on the monitor.\n\n",4.0,"This question is useful for users who want to customize the settings of their Philips Heart Start Intrepid Monitor, specifically to disable temperature alarms. Disabling alarms can be important for ensuring that the monitor is not providing unnecessary alerts, which can be distracting or confusing for users.\n\n",4.0,"This question is context-independent, as it refers to a general action that can be performed on any monitor with temperature alarms. However, it does not specify the type of monitor or the brand, which could lead to confusion if the user is not familiar with the specific monitor in question.\n"
6,Electromagnetic Compatibility 20: Specifications and Safety\n 271\nThe emission of the HeartStart Intrepid can meet Class I limit level of CISPR 25 when powered by \na DC-DC power supply.\nRadiated RF \nElectromagnetic \nField\nIEC 60601-2-4 \n(see Para. \n202.6.2.3)\n20 V/m (only \ndefibrillation)\n80 MHz to 2.7 GHz\n20 V/m (only \ndefibrillation)\n80 MHz to 2.7 GHz\nEmergency medical services \nenvironment\nProfessional healthcare facility \nenvironment\nProximity fields \nfrom RF wireless \ncommunications \nequipment\nIEC 61000-4-3\nRefer to table 126 \nbelow\nRefer to table 126 below Emergency medical services \nenvironment\nProfessional healthcare facility \nenvironment\nPower frequency \nmagnetic field\nIEC 61000-4-8\n30 A/m\n50 Hz or 60 Hz\n30 A/m\n50 Hz or 60 Hz\nEmergency medical services \nenvironment\nProfessional healthcare facility \nenvironment\nTable 126 Proximity Fields from RF Wireless Communications Equipment \nT est \nFrequency \n(MHz)\nBand a) \n(MHz)\nService a) Modulation \nb)\nModulation \nb) \n(W)\nDistance\n(m),What is the limit level for the radiated RF electromagnetic field of the HeartStart Intrepid when powered by a DC-DC power supply?\n,The limit level for the radiated RF electromagnetic field of the HeartStart Intrepid when powered by a DC-DC power supply is 20 V/m for the frequency range of 80 MHz to 2.7 GHz.,heartstart RAG.pdf,284,5,"The context provides the limit level for the radiated RF electromagnetic field of the HeartStart Intrepid when powered by a DC-DC power supply, which is 20 V/m for the frequency range of 80 MHz to 2.7 GHz. The context also specifies that the emission of the HeartStart Intrepid can meet Class I limit level of CISPR 25 when powered by a DC-DC power supply, which further supports the answer. Therefore, the question is clearly and unambiguously answerable with the context.\n\n",4.0,"This question is useful for those who are concerned about the electromagnetic compatibility of the HeartStart Intrepid when using a DC-DC power supply. The limit level for the radiated RF electromagnetic field is an important safety consideration, as it ensures that the device does not interfere with other medical equipment or devices in the vicinity. Knowing the limit level can also help in the proper installation and operation of the HeartStart Intrepid.\n",5.0,"This question is asking about the limit level for the radiated RF electromagnetic field of the HeartStart Intrepid when powered by a DC-DC power supply. It is clear what the question is asking, and it does not depend on any additional context. The question refers to a specific product, the HeartStart Intrepid, and a specific power source, a DC-DC power supply, but it is still clear what is being asked.\n\n"


# RAG Setup

In [42]:
from langchain.embeddings import HuggingFaceEmbeddings
embeddings = HuggingFaceEmbeddings()



In [43]:
from langchain_pinecone import PineconeVectorStore
INDEX_NAME = "chatbotqa-index"
docsearch = PineconeVectorStore(index_name=INDEX_NAME, embedding=embeddings)

In [44]:
retriver = docsearch.as_retriever(search_kwargs={"k": 3})

In [45]:
from langchain.llms import HuggingFaceHub
import os
# Define the repo ID and connect to Mixtral model on Huggingface
repo_id = "mistralai/Mixtral-8x7B-Instruct-v0.1"
llm_rag = HuggingFaceHub(
  repo_id=repo_id, 
  model_kwargs={"temperature": 0.8, "top_k": 50}, 
  huggingfacehub_api_token=os.getenv('HUGGING_FACE_API_TOKEN')
)

In [56]:
from langchain import PromptTemplate

template = """Use the following pieces of context to answer the question. Please follow the following rules:
1. Only share the final answer in the "result".
2. If you don't know the answer, don't try to make up an answer. Just say "I can't find the final answer but you may want to check the following links".
3. If you find the answer, write the answer in a concise way with five sentences maximum.

{context}

Question: {question}

Helpful Answer:
"""

prompt = PromptTemplate(
  template=template, 
  input_variables=["context", "question"]
)

In [57]:
from langchain.chains import ConversationalRetrievalChain, RetrievalQA
rag_chain = RetrievalQA.from_chain_type(
            llm_rag, retriever=docsearch.as_retriever(), chain_type_kwargs={"prompt": prompt}
        )

In [68]:
final_outputs = []
for idx,row in eval_dataset.iterrows():
    question = row["question"]
    print(question)
    response_rag = rag_chain.invoke(question)
    print(response_rag)
    keyword = "Helpful Answer:"
    before_keyword, keyword, after_keyword = response_rag['result'].partition(keyword)

    result = {
            "question": question,
            "true_answer": row["answer"],
            "source_page": row["page_no"],
            "generated_answer": after_keyword
        }
    final_outputs.append(result)

What are some accessories for the Q-CPR meter 2?





{'query': 'What are some accessories for the Q-CPR meter 2?\n', 'result': 'Use the following pieces of context to answer the question. Please follow the following rules:\n1. Only share the final answer in the "result".\n2. If you don\'t know the answer, don\'t try to make up an answer. Just say "I can\'t find the final answer but you may want to check the following links".\n3. If you find the answer, write the answer in a concise way with five sentences maximum.\n\nPaper  .      .      .      .      .      .      .      .      .      .      .      .      .      .      .      .      .      .      .      .      .      .      .      .      .      . 229\nPower Supply Accessories.      .      .      .      .      .      .      .      .      .      .      .      .      .      .      .      .      .      .      .      . 229\nQ-CPR meter 2 Accessories   .      .      .      .      .      .      .      .      .      .      .      .      .      .      .      .      .      .      .      . 229\nSp



{'query': 'How do you disable temperature alarms on the monitor?\n', 'result': 'Use the following pieces of context to answer the question. Please follow the following rules:\n1. Only share the final answer in the "result".\n2. If you don\'t know the answer, don\'t try to make up an answer. Just say "I can\'t find the final answer but you may want to check the following links".\n3. If you find the answer, write the answer in a concise way with five sentences maximum.\n\nEnabling/Disabling Temperature Alarms\n\uf0a5 T o disable temperature alarms (the alarms are on by default):\n1 Press the Smart Select knob.\n2 T urn the Smart Select knob to highlight \uf04d\uf065\uf061\uf073\uf075\uf072\uf065\uf06d\uf065\uf06e\uf074\uf073\uf02f\uf041\uf06c\uf061\uf072\uf06d\uf073 and press the Smart Select \nknob.\n3 Select \uf054\uf065\uf06d\uf070.\n4 Select \uf041\uf06c\uf061\uf072\uf06d\uf073\uf020\uf04f\uf06e or \uf041\uf06c\uf061\uf072\uf06d\uf073\uf020\uf04f\uf066\uf066 and press the Smart Selec



{'query': 'What is the limit level for the radiated RF electromagnetic field of the HeartStart Intrepid when powered by a DC-DC power supply?\n', 'result': 'Use the following pieces of context to answer the question. Please follow the following rules:\n1. Only share the final answer in the "result".\n2. If you don\'t know the answer, don\'t try to make up an answer. Just say "I can\'t find the final answer but you may want to check the following links".\n3. If you find the answer, write the answer in a concise way with five sentences maximum.\n\nElectromagnetic Compatibility 20: Specifications and Safety\n 271\nThe emission of the HeartStart Intrepid can meet Class I limit level of CISPR 25 when powered by \na DC-DC power supply.\nRadiated RF \nElectromagnetic \nField\nIEC 60601-2-4 \n(see Para. \n202.6.2.3)\n20 V/m (only \ndefibrillation)\n80 MHz to 2.7 GHz\n20 V/m (only \ndefibrillation)\n80 MHz to 2.7 GHz\nEmergency medical services \nenvironment\nProfessional healthcare facility \n

In [74]:
final_df = pd.DataFrame(final_outputs)
final_df

Unnamed: 0,question,true_answer,source_page,generated_answer
0,What are some accessories for the Q-CPR meter 2?\n,"Some accessories for the Q-CPR meter 2 include SpO2 cables and sensors, temperature monitoring probes and cables, and test loads and shorted plugs.",236,"\nThe Q-CPR meter 2 accessories include M3718A Adult/child radiotransparent multifunction electrode pads (10 sets/case) and M3719A Infant radiotransparent multifunction electrode pads (5 sets/case). Additionally, there are SMART Pads II for adults, children, and infants (1 set), SMART Pads III for adults, children, and infants (1 set and 5 sets), HeartStart adult multifunction electrode pads (1 set and 5 sets), Adult/child preconnect multifunction electrode pads (10 sets)."
1,How do you disable temperature alarms on the monitor?\n,"To disable temperature alarms on the monitor, press the Smart Select knob. The alarms are on by default.",131,\n\nTo disable temperature alarms on the monitor:\n\n1. Press the Smart Select knob.\n2. Turn the Smart Select knob to highlight  and press the Smart Select knob.\n3. Select .\n4. Select  or  and press the Smart Select knob.
2,What is the limit level for the radiated RF electromagnetic field of the HeartStart Intrepid when powered by a DC-DC power supply?\n,The limit level for the radiated RF electromagnetic field of the HeartStart Intrepid when powered by a DC-DC power supply is 20 V/m for the frequency range of 80 MHz to 2.7 GHz.,284,"\n\nThe limit level for the radiated RF electromagnetic field of the HeartStart Intrepid when powered by a DC-DC power supply is 20 V/m, as stated in the CISPR 25 standard for the frequency range of 80 MHz to 2.7 GHz. This limit applies only during defibrillation."


In [70]:
EVALUATION_PROMPT = """###Task Description:
An instruction (might include an Input inside it), a response to evaluate, a reference answer that gets a score of 5, and a score rubric representing a evaluation criteria are given.
1. Write a detailed feedback that assess the quality of the response strictly based on the given score rubric, not evaluating in general.
2. After writing a feedback, write a score that is an integer between 1 and 5. You should refer to the score rubric.
3. The output format should look as follows: \"Feedback: {{write a feedback for criteria}} [RESULT] {{an integer number between 1 and 5}}\"
4. Please do not generate any other opening, closing, and explanations. Be sure to include [RESULT] in your output.

###The instruction to evaluate:
{instruction}

###Response to evaluate:
{response}

###Reference Answer (Score 5):
{reference_answer}

###Score Rubrics:
[Is the response correct, accurate, and factual based on the reference answer?]
Score 1: The response is completely incorrect, inaccurate, and/or not factual.
Score 2: The response is mostly incorrect, inaccurate, and/or not factual.
Score 3: The response is somewhat correct, accurate, and/or factual.
Score 4: The response is mostly correct, accurate, and factual.
Score 5: The response is completely correct, accurate, and factual.

###Feedback:"""

from langchain.prompts.chat import (
    ChatPromptTemplate,
    HumanMessagePromptTemplate,
)
from langchain.schema import SystemMessage


evaluation_prompt_template = ChatPromptTemplate.from_messages(
    [
        SystemMessage(content="You are a fair evaluator language model."),
        HumanMessagePromptTemplate.from_template(EVALUATION_PROMPT),
    ]
)

In [73]:
from langchain.chat_models import ChatOpenAI

OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")

eval_chat_model = ChatOpenAI(model="gpt-4-1106-preview", temperature=0, openai_api_key=OPENAI_API_KEY)
evaluator_name = "GPT4"

In [79]:
final_eval_result = []
for idx,row in final_df.iterrows():
    eval_prompt = evaluation_prompt_template.format_messages(
            instruction=row["question"],
            response=row["generated_answer"],
            reference_answer=row["true_answer"],
        )
    eval_result = eval_chat_model.invoke(eval_prompt)
    feedback, score = [item.strip() for item in eval_result.content.split("[RESULT]")]
    result = {
            "question": row["question"],
            "true_answer": row["true_answer"], 
            "generated_answer": row["generated_answer"],
            "score": score,
            "feedback": feedback
        }
    final_eval_result.append(result)


In [80]:
final_eval_result

[{'question': 'What are some accessories for the Q-CPR meter 2?\n',
  'true_answer': 'Some accessories for the Q-CPR meter 2 include SpO2 cables and sensors, temperature monitoring probes and cables, and test loads and shorted plugs.',
  'generated_answer': '\nThe Q-CPR meter 2 accessories include M3718A Adult/child radiotransparent multifunction electrode pads (10 sets/case) and M3719A Infant radiotransparent multifunction electrode pads (5 sets/case). Additionally, there are SMART Pads II for adults, children, and infants (1 set), SMART Pads III for adults, children, and infants (1 set and 5 sets), HeartStart adult multifunction electrode pads (1 set and 5 sets), Adult/child preconnect multifunction electrode pads (10 sets).',
  'score': '3',
  'feedback': "Feedback: The response lists a variety of electrode pads and SMART Pads that are related to a CPR meter device, which suggests some familiarity with the types of accessories that might be used with such a device. However, the resp