In [1]:
import numpy as np
from llama_cpp import Llama
from tooluniverse.execute_function import ToolUniverse
from sklearn.preprocessing import normalize
import time
from typing import List
import json
import pandas as pd
import pdb

In [2]:

engine = ToolUniverse()
engine.load_tools()
tool_name_list, tool_desc_list = engine.refresh_tool_name_desc()


Tool files:
{'opentarget': '/Users/awxlong/anaconda3/envs/ai/lib/python3.8/site-packages/tooluniverse/data/opentarget_tools.json', 'fda_drug_label': '/Users/awxlong/anaconda3/envs/ai/lib/python3.8/site-packages/tooluniverse/data/fda_drug_labeling_tools.json', 'special_tools': '/Users/awxlong/anaconda3/envs/ai/lib/python3.8/site-packages/tooluniverse/data/special_tools.json', 'monarch': '/Users/awxlong/anaconda3/envs/ai/lib/python3.8/site-packages/tooluniverse/data/monarch_tools.json'}
Number of tools before load tools: 0
Number of tools after load tools: 214


In [3]:
# ==============================================================================
# Step 1: Create a Wrapper for the GGUF Embedding Model
# This makes our llama-cpp model behave like a standard embedding model.
# ==============================================================================

class LlamaCppEmbeddings:
    """
    A wrapper class to provide a standardized interface for creating embeddings
    using a GGUF model loaded with llama-cpp-python.
    """
    def __init__(self, repo_id, filename, **kwargs):
        """
        Initializes the Llama-based embedding model.
        
        Args:
            repo_id (str): The Hugging Face repository ID of the model.
            filename (str): The GGUF filename in the repository.
            **kwargs: Additional arguments for Llama.from_pretrained (e.g., n_gpu_layers, n_ctx).
        """
        print("Loading GGUF embedding model...")
        # Set embedding=True to use the model for embeddings
        self.llm = Llama.from_pretrained(
            repo_id=repo_id,
            filename=filename,
            embedding=True, 
            verbose=False,
            **kwargs
        )
        print("Model loaded successfully.")

    def _embed_and_average(self, text: str) -> List[float]:
        """
        Internal helper method to embed a single piece of text and return a
        single, averaged embedding vector.
        """
        # Get the raw embedding result from llama.cpp
        embedding_result = self.llm.create_embedding(text)

        # The 'data' key contains a list of dictionaries, one for each chunk
        chunk_embeddings =  embedding_result['data'][0]['embedding'] # [item['embedding'] for item in embedding_result['data'][0]['embedding']]
        
        # If the text was chunked, we get multiple embedding vectors.
        # We'll average them to get a single representative vector.
        # pdb.set_trace()
        if len(chunk_embeddings) > 1:
            # For debugging: let the user know that averaging is happening
            # print(f"    (Note: Input was split into {len(chunk_embeddings)} chunks. Averaging embeddings.)")
            avg_embedding = np.mean(np.array(chunk_embeddings), axis=0)
        else:
            avg_embedding = np.array(chunk_embeddings[0])
            
        return avg_embedding #.tolist()

    def embed_documents(self, texts: List[str]) -> List[List[float]]:
        """
        Generates a single, averaged embedding for each document in a list.
        """
        print(f"Generating embeddings for {len(texts)} documents...")
        # Process each text individually to handle potential chunking
        return [self._embed_and_average(text) for text in texts]

    def embed_query(self, text: str) -> List[float]:
        """
        Generates a single, averaged embedding for a single query text.
        """
        return self._embed_and_average(text)


In [4]:
# ==============================================================================
# Step 2: Implement the Tool Retriever using NumPy
# This class will manage the tool index and perform the similarity search.
# ==============================================================================

class ToolRetriever:
    """
    Retrieves relevant tools based on semantic similarity to a query.
    Uses a provided embedding model and NumPy for calculations.
    """
    def __init__(self, tool_names: List[str], tool_descriptions: List[str], embedding_model):
        """
        Initializes the retriever and builds the tool index.
        
        Args:
            tool_names (list[str]): A list of tool names.
            tool_descriptions (list[str]): A list of corresponding tool descriptions.
            embedding_model: An instance of a class with `embed_documents` method.
        """
        if not tool_names or not tool_descriptions:
            raise ValueError("Tool names and descriptions cannot be empty.")
            
        self.tool_names = tool_names
        self.tool_descriptions = tool_descriptions
        self.embedding_model = embedding_model
        
        # This will store the tool data for easy lookup
        self.tools = [
            {"name": name, "description": desc}
            for name, desc in zip(tool_names, tool_descriptions)
        ]

        print("Building tool index...")
        start_time = time.time()
        # Embed all tool descriptions and store them
        self.tool_embeddings = np.array(self.embedding_model.embed_documents(self.tool_descriptions))
        
        # Normalize the embeddings to unit vectors for efficient cosine similarity
        self.tool_embeddings = normalize(self.tool_embeddings, axis=1, norm='l2')
        
        end_time = time.time()
        print(f"Tool index built successfully in {end_time - start_time:.2f} seconds.")
        print(f"Indexed {len(self.tools)} tools with embedding dimension {self.tool_embeddings.shape[1]}.")

    def retrieve(self, query: str, top_k: int = 5) -> List[dict]:
        """
        Retrieves the top_k most relevant tools for a given query.
        
        Args:
            query (str): The user's query.
            top_k (int): The number of tools to retrieve.
            
        Returns:
            list[dict]: A list of the top_k most relevant tools, each a dict with 'name' and 'description'.
        """
        print(f"\nRetrieving top {top_k} tools for query: '{query}'")
        
        # 1. Embed the query
        query_embedding = np.array(self.embedding_model.embed_query(query)).reshape(1, -1)
        
        # 2. Normalize the query embedding
        query_embedding = normalize(query_embedding, axis=1, norm='l2')
        
        # 3. Calculate cosine similarity
        # Since both vectors are normalized, dot product equals cosine similarity
        similarities = np.dot(query_embedding, self.tool_embeddings.T)[0]
        
        # 4. Get the indices of the top_k most similar tools
        # `np.argsort` returns indices of the sorted array in ascending order,
        # so we take the last `top_k` elements and reverse them for descending order.
        top_k_indices = np.argsort(similarities)[-top_k:][::-1]
        
        # 5. Get the actual tool data for the top indices
        retrieved_tools = [self.tools[i] for i in top_k_indices]
        
        print("Retrieved tools:")
        for i, tool in enumerate(retrieved_tools):
            print(f"  {i+1}. {tool['name']} (Similarity: {similarities[top_k_indices[i]]:.4f})")
            
        return retrieved_tools

In [5]:
# ==============================================================================
# Step 3: Implement the Prompt Formatter
# This function creates the final string to inject into your agent's prompt.
# ==============================================================================

def format_tools_for_prompt(retrieved_tools: List[dict]) -> str:
    """
    Formats a list of retrieved tools into a structured string for an LLM prompt.
    """
    if not retrieved_tools:
        return "No tools available for this query."
    
    prompt_str = "You have access to the following tools. Use them if necessary to answer the user's question.\n\n"
    prompt_str += "[AVAILABLE_TOOLS]\n"
    for tool in retrieved_tools:
        # Using a JSON-like format is often effective for models
        prompt_str += f"- Tool: {tool['name']}\n"
        prompt_str += f"  Description: {tool['description']}\n"
    prompt_str += "[END_OF_TOOLS]"
    
    return prompt_str

In [6]:
embedding_model = LlamaCppEmbeddings(
        repo_id="second-state/gte-Qwen2-1.5B-instruct-GGUF",
        filename="gte-Qwen2-1.5B-instruct-Q5_K_S.gguf",
        # n_ctx=2048 # Context window for the embedding model
    )



Loading GGUF embedding model...


llama_context: n_ctx_per_seq (512) < n_ctx_train (131072) -- the full capacity of the model will not be utilized
ggml_metal_init: skipping kernel_get_rows_bf16                     (not supported)
ggml_metal_init: skipping kernel_set_rows_bf16                     (not supported)
ggml_metal_init: skipping kernel_mul_mv_bf16_f32                   (not supported)
ggml_metal_init: skipping kernel_mul_mv_bf16_f32_c4                (not supported)
ggml_metal_init: skipping kernel_mul_mv_bf16_f32_1row              (not supported)
ggml_metal_init: skipping kernel_mul_mv_bf16_f32_l4                (not supported)
ggml_metal_init: skipping kernel_mul_mv_bf16_bf16                  (not supported)
ggml_metal_init: skipping kernel_mul_mv_id_bf16_f32                (not supported)
ggml_metal_init: skipping kernel_mul_mm_bf16_f32                   (not supported)
ggml_metal_init: skipping kernel_mul_mm_id_bf16_f16                (not supported)
ggml_metal_init: skipping kernel_flash_attn_ext_bf16_h64 

Model loaded successfully.


In [7]:
# --- 2. Load Your Tools ---
# In your real code, you would use:
# from your_tooluniverse_module import ToolUniverse
engine = ToolUniverse()
engine.load_tools()
tool_name_list, tool_desc_list = engine.refresh_tool_name_desc()
tool_desc_list = [tool.split(': ')[1] for tool in tool_desc_list]


Tool files:
{'opentarget': '/Users/awxlong/anaconda3/envs/ai/lib/python3.8/site-packages/tooluniverse/data/opentarget_tools.json', 'fda_drug_label': '/Users/awxlong/anaconda3/envs/ai/lib/python3.8/site-packages/tooluniverse/data/fda_drug_labeling_tools.json', 'special_tools': '/Users/awxlong/anaconda3/envs/ai/lib/python3.8/site-packages/tooluniverse/data/special_tools.json', 'monarch': '/Users/awxlong/anaconda3/envs/ai/lib/python3.8/site-packages/tooluniverse/data/monarch_tools.json'}
Number of tools before load tools: 0
Number of tools after load tools: 214


In [8]:
# --- 3. Create the Tool Retriever ---
# This will automatically build the index upon initialization.
retriever = ToolRetriever(
    tool_names=tool_name_list,
    tool_descriptions=tool_desc_list,
    embedding_model=embedding_model
)

Building tool index...
Generating embeddings for 214 documents...
Tool index built successfully in 40.60 seconds.
Indexed 214 tools with embedding dimension 1536.


In [11]:
# --- 4. Define a Query and Retrieve Tools ---
# This simulates a real user query from your CUREBench context.
user_query = "Can a patient taking warfarin also take aspirin? I need to know the risks."
user_query = "Okay, so I'm trying to figure out why Ogivri should be discontinued permanently if a patient experiences a severe infusion reaction. I remember that Ogivri is a medication used for treating infections, maybe something like a viral infection or a fungal infection. It's important to note that it's not a life-saving drug, but it's used in specific cases.\n\nFirst, I think about what a severe infusion reaction means. That usually happens when the body is trying to recover from a severe infection but isn't responding properly. The body's immune system isn't working well, and maybe the infection is too strong. So, if the patient is in a critical condition, Ogivri might not be the best option.\n\nI also recall that Ogivri is often used in cases where there's a viral infection, like HIV, or when there's a fungal infection that's causing severe symptoms. But if the reaction is severe, the body might not be able to handle it. So, discontinuing the medication would make sense because the patient's condition is too severe to allow the drug to work properly.\n\nLooking at the options, option A is to decrease the rate of infusion. But if the reaction is severe, reducing the rate might not help because the infection is already too advanced. Option B is to interrupt the infusion temporarily. That could help the body recover, but if the reaction is severe, the body might not be able to recover, so interrupting it might not be enough. Option D is to administer a higher dose. That seems counterproductive because if the reaction is severe, the body might not need a higher dose to recover.\n\nOption C is to discontinuate Ogivri permanently. This seems like the best option because if the reaction is severe, Ogivri isn't effective anymore, and the patient's condition is too critical. Discontinuing it would prevent further complications and allow the patient to receive proper care.\n\nI'm a bit confused about why option B isn't the answer. Maybe if the reaction is severe, the body can't recover, so interrupting the infusion temporarily might not be enough. The body might not be able to handle the infection anymore, so continuing the infusion would be worse. So, option B might not be the right choice.\n\nI think the key here is that Ogivri is a treatment for severe infections, and if the reaction is severe, it's not effective. Therefore, discontinuing it is the correct action. So, the answer should be option C."
user_query = "Okay, so I'm trying to figure out the best treatment for this 10-year-old with juvenile rheumatoid arthritis (JRA) who's a poor CYP2C9 metabolizer. Let me break this down step by step.\n\nFirst, I know that JRA is a chronic inflammatory condition, and it's often treated with non-steroidal anti-inflammatory drugs (NSAIDs) because they help reduce inflammation. But this child is a 10-year-old, so maybe the treatment isn't as intensive as adults, but still needs to be appropriate.\n\nNow, the genetic test shows poor CYP2C9 metabolism. CYP stands for Coenzyme Q10, and CYP2C9 is one of the eight enzymes in the CoA cycle. These enzymes are crucial for the body's immune response to inflammation. If a drug metabolizes CYP2C9, it can interfere with the body's ability to fight off the infection caused by JRA.\n\nI remember that drugs like Celecoxib are used for JRA. Celecoxib is a CYP2C9 dehydrogenase inhibitor, which means it prevents the body from breaking down CYP2C9. This would help reduce inflammation because the body can't effectively combat the infection anymore.\n\nLooking at the options, option A is Celecoxib 200 mg. Option B is First Aid Aspirin, which is an NSAID. Option C is Florexa, which I think is a CYP2C9 dehydrogenase inhibitor as well. Option D is none of the above.\n\nWait, Celecoxib is a CYP2C9 inhibitor, and Florexa is also a CYP2C9 inhibitor. So both A and C are possible candidates. But Celecoxib is a medication that's specifically used for JRA. First Aid Aspirin is more for pain relief and isn't specific to JRA. Florexa is a brand name for Celecoxib, I think.\n\nSo, if the child is a poor CYP2C9 metabolizer, Celecoxib would be the most appropriate because it targets the enzyme that's causing the issue. Florexa is the same drug, so it's not a separate option. Therefore, the correct answer should be Celecoxib."
retrieved_tools1 = retriever.retrieve(query=user_query.strip(), top_k=3)



Retrieving top 3 tools for query: 'Okay, so I'm trying to figure out the best treatment for this 10-year-old with juvenile rheumatoid arthritis (JRA) who's a poor CYP2C9 metabolizer. Let me break this down step by step.

First, I know that JRA is a chronic inflammatory condition, and it's often treated with non-steroidal anti-inflammatory drugs (NSAIDs) because they help reduce inflammation. But this child is a 10-year-old, so maybe the treatment isn't as intensive as adults, but still needs to be appropriate.

Now, the genetic test shows poor CYP2C9 metabolism. CYP stands for Coenzyme Q10, and CYP2C9 is one of the eight enzymes in the CoA cycle. These enzymes are crucial for the body's immune response to inflammation. If a drug metabolizes CYP2C9, it can interfere with the body's ability to fight off the infection caused by JRA.

I remember that drugs like Celecoxib are used for JRA. Celecoxib is a CYP2C9 dehydrogenase inhibitor, which means it prevents the body from breaking down CY

In [15]:
user_question = "A 10-year-old child diagnosed with juvenile rheumatoid arthritis (JRA) requires treatment. Genetic testing reveals the child is a poor CYP2C9 metabolizer. Which drug is the most appropriate for this patient? \nOptions: {'A': 'Celecoxib 200 mg', 'B': 'First Aid Direct Chewable Aspirin', 'C': 'Florexa', 'D': 'None of the above'}"
user_question = "A 70-year-old male with familial chylomicronemia syndrome (FCS) presents with memory issues, recurrent pancreatitis, and mild renal impairment (eGFR 60 mL/min). He has no known history of hypersensitivity reactions. What is the most suitable treatment option for this patient? \nOptions: {'A': 'TRYNGOLZA (Olezarsen Sodium) without dose adjustment', 'B': 'TRYNGOLZA with dose adjustment due to renal impairment', 'C': 'A triglyceride-lowering drug contraindicated for geriatric patients', 'D': 'A drug targeting APOC-III but requiring pediatric dosing'}"
retrieved_tools3 = retriever.retrieve(query=user_question.strip(), top_k=3)


Retrieving top 3 tools for query: 'A 70-year-old male with familial chylomicronemia syndrome (FCS) presents with memory issues, recurrent pancreatitis, and mild renal impairment (eGFR 60 mL/min). He has no known history of hypersensitivity reactions. What is the most suitable treatment option for this patient? 
Options: {'A': 'TRYNGOLZA (Olezarsen Sodium) without dose adjustment', 'B': 'TRYNGOLZA with dose adjustment due to renal impairment', 'C': 'A triglyceride-lowering drug contraindicated for geriatric patients', 'D': 'A drug targeting APOC-III but requiring pediatric dosing'}'
Retrieved tools:
  1. get_disease_therapeutic_areas_by_efoId (Similarity: 0.7154)
  2. get_overdosage_info_by_drug_name (Similarity: 0.7128)
  3. get_drug_names_by_overdosage_info (Similarity: 0.7039)


In [32]:
retrieved_tools3
tools = [tool['name'] for tool in retrieved_tools3]
tools

['get_disease_therapeutic_areas_by_efoId',
 'get_overdosage_info_by_drug_name',
 'get_drug_names_by_overdosage_info']

In [34]:
picked_tools = engine.get_tool_by_name(tools)
picked_tools_prompt = engine.prepare_tool_prompts(picked_tools)

In [37]:
picked_tools_prompt

[{'name': 'get_disease_therapeutic_areas_by_efoId',
  'description': 'Retrieve the therapeutic areas associated with a specific disease efoId.',
  'parameter': {'type': 'object',
   'properties': {'efoId': {'type': 'string',
     'description': 'The EFO ID of the disease.',
     'required': True}}}},
 {'name': 'get_overdosage_info_by_drug_name',
  'description': 'Retrieve information about signs, symptoms, and laboratory findings of acute overdosage based on the drug name.',
  'parameter': {'type': 'object',
   'properties': {'drug_name': {'type': 'string',
     'description': 'The name of the drug.',
     'required': True},
    'limit': {'type': 'integer',
     'description': 'The number of records to return.',
     'required': False},
    'skip': {'type': 'integer',
     'description': 'The number of records to skip.',
     'required': False}}}},
 {'name': 'get_drug_names_by_overdosage_info',
  'description': 'Retrieve drug names based on information about signs, symptoms, and labora

In [45]:
engine.extract_function_call_json(picked_tools_prompt[0])

{'name': 'get_disease_therapeutic_areas_by_efoId',
 'description': 'Retrieve the therapeutic areas associated with a specific disease efoId.',
 'parameter': {'type': 'object',
  'properties': {'efoId': {'type': 'string',
    'description': 'The EFO ID of the disease.',
    'required': True}}}}

In [62]:
engine.run_one_function({
    "name": "get_disease_therapeutic_areas_by_efoId",
    "arguments": {
        "efoId": "EFO_0020034"
    }
})

loaded function call json {'name': 'get_disease_therapeutic_areas_by_efoId', 'arguments': {'efoId': 'EFO_0020034'}}


{'data': {'disease': {'id': 'EFO_0020034',
   'name': 'familial Behcet-like autoinflammatory syndrome',
   'therapeuticAreas': [{'id': 'EFO_0000540', 'name': 'immune system disease'},
    {'id': 'OTAR_0000018',
     'name': 'genetic, familial or congenital disease'}]}}}