# Evaluating Files declared in code


This tutorial highlights how to combine Humanloop decorated files.


### 1. Setting up imports


In [1]:
import os

from dotenv import load_dotenv
from humanloop import Humanloop
import chromadb
from openai import OpenAI
import pandas as pd

from levenshtein import compare_log_and_target
from exact_match import exact_match


load_dotenv()

True

### 2. Instantiating the Humanloop client


In [None]:
humanloop = Humanloop(api_key=os.getenv("HL_API_KEY"))

### 3. Instantiating the vector database


In [3]:
chroma = chromadb.chromadb.Client()
collection = chroma.get_or_create_collection(name="MedQA")

# init collection into which we will add documents
knowledge_base = pd.read_parquet("../../../assets/sources/textbooks.parquet")
knowledge_base = knowledge_base.sample(5, random_state=42)
collection.add(
    documents=knowledge_base["contents"].to_list(),
    ids=knowledge_base["id"].to_list(),
)

### 4. Loading the evaluation dataset


In [4]:
datapoints_df = pd.read_json("../../../assets/datapoints.jsonl", lines=True)
datapoints = [row.to_dict() for _, row in datapoints_df.iterrows()][:20]

### 5. Declare Humanloop Files via code


In [5]:
TEMPLATE = """Answer the following question factually.

Question: {question}

Options:
- {option_A}
- {option_B}
- {option_C}
- {option_D}
- {option_E}

---

Here is some retrieved information that might be helpful.
Retrieved data:
{retrieved_data}

---

Give you answer in 3 sections using the following format. Do not include the quotes or the brackets. Do include the "---" separators.
```
<chosen option verbatim>
---
<clear explanation of why the option is correct and why the other options are incorrect. keep it ELI5.>
---
<quote relevant information snippets from the retrieved data verbatim. every line here should be directly copied from the retrieved data>
```
"""

In [6]:
@humanloop.tool(
    path="Evaluations SDK Demo/Retrieval Tool",
)
def retrieval_tool(question: str) -> str:
    """Retrieve most relevant document from the vector db (Chroma) for the question."""
    response = collection.query(query_texts=[question], n_results=1)
    retrieved_doc = response["documents"][0][0]
    return retrieved_doc


@humanloop.prompt(
    path="Evaluations SDK Demo/MedQA Answer",
    model="gpt-4o",
    template=TEMPLATE,
    tools=[retrieval_tool.json_schema],
)
def ask_model(
    question: str,
    option_A: str,
    option_B: str,
    option_C: str,
    option_D: str,
    option_E: str,
) -> str:
    """Ask a question and get an answer using a simple RAG pipeline"""
    openai = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))

    # Retrieve context
    retrieved_data = retrieval_tool(question)
    inputs = {
        "question": question,
        "option_A": option_A,
        "option_B": option_B,
        "option_C": option_C,
        "option_D": option_D,
        "option_E": option_E,
        "retrieved_data": retrieved_data,
    }

    # Populate the Prompt template
    messages = [
        {
            "role": "user",
            "content": TEMPLATE.format(**inputs),
        }
    ]

    # Call OpenAI to get response
    chat_completion = openai.chat.completions.create(
        model="gpt-4o",
        temperature=0,
        messages=messages,
    )
    return chat_completion.choices[0].message.content


@humanloop.flow(
    path="Evaluations SDK Demo/MedQA Answer Flow",
    attributes={
        "prompt": {
            "model": "gpt-4o",
            "environment": "evaluation",
        }
    },
)
def entrypoint(
    question: str,
    option_A: str,
    option_B: str,
    option_C: str,
    option_D: str,
    option_E: str,
):
    return ask_model(
        question=question,
        option_A=option_A,
        option_B=option_B,
        option_C=option_C,
        option_D=option_D,
        option_E=option_E,
    )

### 6. Evaluate the Flow


In [7]:
humanloop.evaluations.run(
    file={
        "path": "Evaluations SDK Demo/MedQA Answer Flow",
        "callable": entrypoint,
        "type": "flow",
    },
    name="MedQA Evaluation Decorators",
    dataset={
        "datapoints": datapoints,
        "path": "Evaluations SDK Demo/Dataset",
    },
    evaluators=[
        {
            "path": "Evaluations SDK Demo/Levenshtein",
            "args_type": "target_required",
            "return_type": "number",
            "callable": compare_log_and_target,
        },
        {
            "path": "Evaluations SDK Demo/Exact Match",
            "args_type": "target_required",
            "return_type": "boolean",
            "callable": exact_match,
        },
    ],
    workers=8,
)

[96mEvaluating your flow function corresponding to `Evaluations SDK Demo/MedQA Answer Flow` on Humanloop[0m 



[96mNavigate to your Evaluation:[0m
http://localhost:3000/project/fl_f1LKeioSdpBr6vUXfFMgQ/evaluations/evr_TdSSqdd2e6SH48YI6bsT4/stats

[96mFlow Version ID: flv_GnVlk3RpLWCcFWCJN1flF[0m
[96mRun ID: rn_ZS3HRPc4fnGbcHeAX8YfY[0m
[96m
Running 'MedQA Answer Flow' over the Dataset 'Dataset' using 8 workers[0m 

[96m⏳ Evaluation Progress[0m
Total Logs: 17
Total Judgments: 30



[96m⏳ Evaluation Progress[0m
Total Logs: 17
Total Judgments: 30



[96m⏳ Evaluation Progress[0m
Total Logs: 19
Total Judgments: 37



[96m⏳ Evaluation Progress[0m
Total Logs: 20
Total Judgments: 40



[96m📊 Evaluation Results for Evaluations SDK Demo/MedQA Answer Flow [0m
+----------------------------------+---------------------+
|                                  |        Latest       |
+----------------------------------+---------------------+
|                           Run ID |        

IS_EVALUATED {'path': 'Evaluations SDK Demo/MedQA Answer Flow', 'flow': {'attributes': {'prompt': {'environment': 'evaluation', 'model': 'gpt-4o'}}}, 'output': "```\nCommon iliac artery aneurysm\n---\nThe patient's symptoms and ultrasound findings suggest a blockage or compression in the urinary tract, leading to dilation of the right ureter and renal pelvis. A common iliac artery aneurysm can compress the ureter, causing this dilation and resulting in flank pain. \n\n- Renal artery stenosis typically causes high blood pressure and kidney dysfunction, not ureteral dilation.\n- Benign prostatic hyperplasia usually causes lower urinary tract symptoms, not unilateral ureteral dilation.\n- Diabetic nephropathy affects kidney function but does not cause ureteral dilation.\n- Urethral stricture would cause difficulty urinating but not unilateral ureteral dilation.\n\n---\nNo relevant information from the retrieved data directly addresses the medical scenario or options provided.\n```", 'inpu

[##--------------------------------------] 1/20 (5.00%) | ETA: 0s

IS_EVALUATED {'path': 'Evaluations SDK Demo/MedQA Answer', 'prompt': {'temperature': 0, 'provider': 'openai', 'endpoint': 'chat', 'model': 'gpt-4o', 'template': 'Answer the following question factually.\n\nQuestion: {question}\n\nOptions:\n- {option_A}\n- {option_B}\n- {option_C}\n- {option_D}\n- {option_E}\n\n---\n\nHere is some retrieved information that might be helpful.\nRetrieved data:\n{retrieved_data}\n\n---\n\nGive you answer in 3 sections using the following format. Do not include the quotes or the brackets. Do include the "---" separators.\n```\n<chosen option verbatim>\n---\n<clear explanation of why the option is correct and why the other options are incorrect. keep it ELI5.>\n---\n<quote relevant information snippets from the retrieved data verbatim. every line here should be directly copied from the retrieved data>\n```\n', 'tools': [{'name': 'retrieval_tool', 'description': 'Retrieve most relevant document from the vector db (Chroma) for the question.', 'parameters': {'t

[####------------------------------------] 2/20 (10.00%) | ETA: 18s

IS_EVALUATED {'path': 'Evaluations SDK Demo/MedQA Answer', 'prompt': {'temperature': 0, 'provider': 'openai', 'endpoint': 'chat', 'model': 'gpt-4o', 'template': 'Answer the following question factually.\n\nQuestion: {question}\n\nOptions:\n- {option_A}\n- {option_B}\n- {option_C}\n- {option_D}\n- {option_E}\n\n---\n\nHere is some retrieved information that might be helpful.\nRetrieved data:\n{retrieved_data}\n\n---\n\nGive you answer in 3 sections using the following format. Do not include the quotes or the brackets. Do include the "---" separators.\n```\n<chosen option verbatim>\n---\n<clear explanation of why the option is correct and why the other options are incorrect. keep it ELI5.>\n---\n<quote relevant information snippets from the retrieved data verbatim. every line here should be directly copied from the retrieved data>\n```\n', 'tools': [{'name': 'retrieval_tool', 'description': 'Retrieve most relevant document from the vector db (Chroma) for the question.', 'parameters': {'t

[######----------------------------------] 3/20 (15.00%) | ETA: 21s

IS_EVALUATED {'path': 'Evaluations SDK Demo/MedQA Answer', 'prompt': {'temperature': 0, 'provider': 'openai', 'endpoint': 'chat', 'model': 'gpt-4o', 'template': 'Answer the following question factually.\n\nQuestion: {question}\n\nOptions:\n- {option_A}\n- {option_B}\n- {option_C}\n- {option_D}\n- {option_E}\n\n---\n\nHere is some retrieved information that might be helpful.\nRetrieved data:\n{retrieved_data}\n\n---\n\nGive you answer in 3 sections using the following format. Do not include the quotes or the brackets. Do include the "---" separators.\n```\n<chosen option verbatim>\n---\n<clear explanation of why the option is correct and why the other options are incorrect. keep it ELI5.>\n---\n<quote relevant information snippets from the retrieved data verbatim. every line here should be directly copied from the retrieved data>\n```\n', 'tools': [{'name': 'retrieval_tool', 'description': 'Retrieve most relevant document from the vector db (Chroma) for the question.', 'parameters': {'t

[########--------------------------------] 4/20 (20.00%) | ETA: 17s

IS_EVALUATED {'path': 'Evaluations SDK Demo/MedQA Answer Flow', 'flow': {'attributes': {'prompt': {'environment': 'evaluation', 'model': 'gpt-4o'}}}, 'output': "```\nGeneration of free radicals\n---\nThe patient's symptoms of ringing in the ears and sensorineural hearing loss are indicative of ototoxicity, a known side effect of certain chemotherapy drugs. Cisplatin, a common drug used in the treatment of transitional cell carcinoma of the bladder, is known to cause ototoxicity. The beneficial effect of cisplatin is primarily due to its ability to generate free radicals, which cause damage to the DNA of cancer cells, leading to cell death. \n\nThe other options are incorrect for the following reasons:\n- Inhibition of thymidine synthesis is a mechanism of action for drugs like methotrexate, which is not typically associated with ototoxicity.\n- Inhibition of proteasome is the mechanism of action for drugs like bortezomib, used in multiple myeloma, not bladder cancer.\n- Hyperstabilizat

[##############--------------------------] 7/20 (35.00%) | ETA: 17s

IS_EVALUATED {'path': 'Evaluations SDK Demo/MedQA Answer', 'prompt': {'temperature': 0, 'provider': 'openai', 'endpoint': 'chat', 'model': 'gpt-4o', 'template': 'Answer the following question factually.\n\nQuestion: {question}\n\nOptions:\n- {option_A}\n- {option_B}\n- {option_C}\n- {option_D}\n- {option_E}\n\n---\n\nHere is some retrieved information that might be helpful.\nRetrieved data:\n{retrieved_data}\n\n---\n\nGive you answer in 3 sections using the following format. Do not include the quotes or the brackets. Do include the "---" separators.\n```\n<chosen option verbatim>\n---\n<clear explanation of why the option is correct and why the other options are incorrect. keep it ELI5.>\n---\n<quote relevant information snippets from the retrieved data verbatim. every line here should be directly copied from the retrieved data>\n```\n', 'tools': [{'name': 'retrieval_tool', 'description': 'Retrieve most relevant document from the vector db (Chroma) for the question.', 'parameters': {'t

[################------------------------] 8/20 (40.00%) | ETA: 20s

IS_EVALUATED {'path': 'Evaluations SDK Demo/MedQA Answer Flow', 'flow': {'attributes': {'prompt': {'environment': 'evaluation', 'model': 'gpt-4o'}}}, 'output': '```\nRuxolitinib\n---\nThe patient in the scenario is likely suffering from myelofibrosis, a type of chronic leukemia characterized by bone marrow fibrosis, splenomegaly, and symptoms like fatigue, night sweats, and weight loss. The positive JAK2 mutation supports this diagnosis. Ruxolitinib is a JAK1/2 inhibitor and is specifically used to treat myelofibrosis by reducing symptoms and spleen size. \n\nCladribine is used for hairy cell leukemia, not myelofibrosis. Prednisone is a steroid and not a primary treatment for myelofibrosis. Imatinib is used for chronic myeloid leukemia with BCR-ABL translocation, not JAK2 mutations. Stem cell transplantation is a potential cure for myelofibrosis but is usually considered in younger patients or those with more aggressive disease due to its risks.\n\n---\nGynecology_Novak. 64. Berwick DM

[########################----------------] 12/20 (60.00%) | ETA: 12s

IS_EVALUATED {'path': 'Evaluations SDK Demo/MedQA Answer', 'prompt': {'temperature': 0, 'provider': 'openai', 'endpoint': 'chat', 'model': 'gpt-4o', 'template': 'Answer the following question factually.\n\nQuestion: {question}\n\nOptions:\n- {option_A}\n- {option_B}\n- {option_C}\n- {option_D}\n- {option_E}\n\n---\n\nHere is some retrieved information that might be helpful.\nRetrieved data:\n{retrieved_data}\n\n---\n\nGive you answer in 3 sections using the following format. Do not include the quotes or the brackets. Do include the "---" separators.\n```\n<chosen option verbatim>\n---\n<clear explanation of why the option is correct and why the other options are incorrect. keep it ELI5.>\n---\n<quote relevant information snippets from the retrieved data verbatim. every line here should be directly copied from the retrieved data>\n```\n', 'tools': [{'name': 'retrieval_tool', 'description': 'Retrieve most relevant document from the vector db (Chroma) for the question.', 'parameters': {'t

[##########################--------------] 13/20 (65.00%) | ETA: 12s

IS_EVALUATED {'path': 'Evaluations SDK Demo/MedQA Answer Flow', 'flow': {'attributes': {'prompt': {'environment': 'evaluation', 'model': 'gpt-4o'}}}, 'output': "```\nIL-4\n---\nThe mediator described in the experimental study is IL-4. IL-4 is known to promote the class switching of B cells to produce IgE antibodies, which are involved in allergic responses and asthma. By targeting IL-4, the experimental therapy aims to reduce the production of IgE, thereby decreasing the exaggerated immune response during asthmatic attacks triggered by allergens like pollen.\n\n- IL-5 is primarily involved in the growth and activation of eosinophils, which are also related to asthma but not directly involved in antibody class switching.\n- IL-2 is mainly involved in the growth and proliferation of T cells, not in antibody class switching.\n- IL-10 is an anti-inflammatory cytokine that helps regulate immune responses but does not directly influence antibody class switching.\n- IL-13 is involved in asthm

[############################------------] 14/20 (70.00%) | ETA: 9s

IS_EVALUATED {'path': 'Evaluations SDK Demo/MedQA Answer Flow', 'flow': {'attributes': {'prompt': {'environment': 'evaluation', 'model': 'gpt-4o'}}}, 'output': '```\nBenzodiazepine intoxication\n---\nThe symptoms presented by the patient, such as altered mental status, somnolence, slurred speech, diminished deep tendon reflexes, and ataxic gait, are consistent with benzodiazepine intoxication. Benzodiazepines are central nervous system depressants that can cause these symptoms. The blood alcohol concentration is relatively low at 0.04%, which is not typically associated with severe intoxication symptoms, making ethanol intoxication less likely. Hypoglycemia could cause altered mental status but would not typically cause diminished reflexes and ataxia without other symptoms like sweating or palpitations. Cerebral ischemia would likely present with focal neurological deficits rather than generalized symptoms. Cannabis intoxication usually does not cause significant somnolence or diminish

[################################--------] 16/20 (80.00%) | ETA: 6s

IS_EVALUATED {'path': 'Evaluations SDK Demo/MedQA Answer', 'prompt': {'temperature': 0, 'provider': 'openai', 'endpoint': 'chat', 'model': 'gpt-4o', 'template': 'Answer the following question factually.\n\nQuestion: {question}\n\nOptions:\n- {option_A}\n- {option_B}\n- {option_C}\n- {option_D}\n- {option_E}\n\n---\n\nHere is some retrieved information that might be helpful.\nRetrieved data:\n{retrieved_data}\n\n---\n\nGive you answer in 3 sections using the following format. Do not include the quotes or the brackets. Do include the "---" separators.\n```\n<chosen option verbatim>\n---\n<clear explanation of why the option is correct and why the other options are incorrect. keep it ELI5.>\n---\n<quote relevant information snippets from the retrieved data verbatim. every line here should be directly copied from the retrieved data>\n```\n', 'tools': [{'name': 'retrieval_tool', 'description': 'Retrieve most relevant document from the vector db (Chroma) for the question.', 'parameters': {'t

[##################################------] 17/20 (85.00%) | ETA: 4s

IS_EVALUATED {'path': 'Evaluations SDK Demo/MedQA Answer', 'prompt': {'temperature': 0, 'provider': 'openai', 'endpoint': 'chat', 'model': 'gpt-4o', 'template': 'Answer the following question factually.\n\nQuestion: {question}\n\nOptions:\n- {option_A}\n- {option_B}\n- {option_C}\n- {option_D}\n- {option_E}\n\n---\n\nHere is some retrieved information that might be helpful.\nRetrieved data:\n{retrieved_data}\n\n---\n\nGive you answer in 3 sections using the following format. Do not include the quotes or the brackets. Do include the "---" separators.\n```\n<chosen option verbatim>\n---\n<clear explanation of why the option is correct and why the other options are incorrect. keep it ELI5.>\n---\n<quote relevant information snippets from the retrieved data verbatim. every line here should be directly copied from the retrieved data>\n```\n', 'tools': [{'name': 'retrieval_tool', 'description': 'Retrieve most relevant document from the vector db (Chroma) for the question.', 'parameters': {'t

[####################################----] 18/20 (90.00%) | ETA: 3s

IS_EVALUATED {'path': 'Evaluations SDK Demo/MedQA Answer', 'prompt': {'temperature': 0, 'provider': 'openai', 'endpoint': 'chat', 'model': 'gpt-4o', 'template': 'Answer the following question factually.\n\nQuestion: {question}\n\nOptions:\n- {option_A}\n- {option_B}\n- {option_C}\n- {option_D}\n- {option_E}\n\n---\n\nHere is some retrieved information that might be helpful.\nRetrieved data:\n{retrieved_data}\n\n---\n\nGive you answer in 3 sections using the following format. Do not include the quotes or the brackets. Do include the "---" separators.\n```\n<chosen option verbatim>\n---\n<clear explanation of why the option is correct and why the other options are incorrect. keep it ELI5.>\n---\n<quote relevant information snippets from the retrieved data verbatim. every line here should be directly copied from the retrieved data>\n```\n', 'tools': [{'name': 'retrieval_tool', 'description': 'Retrieve most relevant document from the vector db (Chroma) for the question.', 'parameters': {'t

[######################################--] 19/20 (95.00%) | ETA: 1s

IS_EVALUATED {'path': 'Evaluations SDK Demo/MedQA Answer', 'prompt': {'temperature': 0, 'provider': 'openai', 'endpoint': 'chat', 'model': 'gpt-4o', 'template': 'Answer the following question factually.\n\nQuestion: {question}\n\nOptions:\n- {option_A}\n- {option_B}\n- {option_C}\n- {option_D}\n- {option_E}\n\n---\n\nHere is some retrieved information that might be helpful.\nRetrieved data:\n{retrieved_data}\n\n---\n\nGive you answer in 3 sections using the following format. Do not include the quotes or the brackets. Do include the "---" separators.\n```\n<chosen option verbatim>\n---\n<clear explanation of why the option is correct and why the other options are incorrect. keep it ELI5.>\n---\n<quote relevant information snippets from the retrieved data verbatim. every line here should be directly copied from the retrieved data>\n```\n', 'tools': [{'name': 'retrieval_tool', 'description': 'Retrieve most relevant document from the vector db (Chroma) for the question.', 'parameters': {'t

[########################################] 20/20 (100.00%) | DONE


IS_EVALUATED {'path': 'Evaluations SDK Demo/MedQA Answer', 'prompt': {'temperature': 0, 'provider': 'openai', 'endpoint': 'chat', 'model': 'gpt-4o', 'template': 'Answer the following question factually.\n\nQuestion: {question}\n\nOptions:\n- {option_A}\n- {option_B}\n- {option_C}\n- {option_D}\n- {option_E}\n\n---\n\nHere is some retrieved information that might be helpful.\nRetrieved data:\n{retrieved_data}\n\n---\n\nGive you answer in 3 sections using the following format. Do not include the quotes or the brackets. Do include the "---" separators.\n```\n<chosen option verbatim>\n---\n<clear explanation of why the option is correct and why the other options are incorrect. keep it ELI5.>\n---\n<quote relevant information snippets from the retrieved data verbatim. every line here should be directly copied from the retrieved data>\n```\n', 'tools': [{'name': 'retrieval_tool', 'description': 'Retrieve most relevant document from the vector db (Chroma) for the question.', 'parameters': {'t

[]