In [None]:
# Importing required libraries
import pandas as pd
import json
import os

from groq import Groq

###############################################################################
# 1. Setup: load CSV/TSV data, merge them, etc.
###############################################################################
# Provide your own API key here:
api_key = "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"
client = Groq(api_key=api_key)

# Model name (example placeholder)
MODEL = "llama3-70b-8192"

In [5]:
# Load the two CSV/TSV files
df_targets = pd.read_csv(
    "drugbank.xml.drug_target.txt",
    sep="\t",
    names=["drug_id", "target_id", "gene_name"],
    header=0
)

df_drugs = pd.read_csv(
    "drugbank.xml.drugs.txt",
    sep="\t",
    names=["drug_id", "drug_name", "restrictions", "drug_description", "pathways", "general_references"],
    header=0
)

# Merge the two data frames
df = pd.merge(df_drugs, df_targets, on="drug_id", how="left")

In [8]:
df.head()

Unnamed: 0,drug_id,drug_name,restrictions,drug_description,pathways,general_references,target_id,gene_name
0,DB03846,5-Hydroxymethyluridine-2'-Deoxy-5'-Monophosphate,experimental,,,,BE0001218,tmk
1,DB09189,Daledalin,experimental,Daledalin (UK-3557-15) is an antidepressant wh...,,,,
2,DB07504,(2R)-1-{4-[(4-anilino-5-bromopyrimidin-2-yl)am...,experimental,,,,BE0001072,CDK2
3,DB07569,CIS-4-METHYL-N-[(1S)-3-(METHYLSULFANYL)-1-(PYR...,experimental,,,,BE0001730,ERG11
4,DB12540,Lecozotan,investigational,Lecozotan has been used in trials studying the...,,,,


In [16]:
def search_table(gene_name="", pathway="", drug_name=""):
    """
    Filters the merged dataframe by gene_name, pathway, or drug_name (case-insensitive).
    Returns the matching rows as a list of dict records.
    """
    filtered_df = df.copy()
    
    # Filter by gene_name
    if gene_name:
        filtered_df = filtered_df[
            filtered_df["gene_name"].str.contains(gene_name, case=False, na=False)
        ]
    
    # Filter by pathway
    if pathway:
        filtered_df = filtered_df[
            filtered_df["pathways"].str.contains(pathway, case=False, na=False)
        ]
    
    # Filter by drug_name
    if drug_name:
        filtered_df = filtered_df[
            filtered_df["drug_name"].str.contains(drug_name, case=False, na=False)
        ]

    return filtered_df.to_dict(orient="records")

###############################################################################
# 3. Define the "tool" the LLM can call: drugbank_search
###############################################################################
def drugbank_search(gene_name: str = "", pathway: str = "", drug_name: str = ""):
    """
    Searches the DrugBank data by gene_name, pathway, or drug_name.
    Returns JSON with the matching records.
    """
    results = search_table(gene_name=gene_name, pathway=pathway, drug_name=drug_name)
    return json.dumps({"results": results}, indent=2)

###############################################################################
# 4. Declare the function schema for function-calling
###############################################################################
tools = [
    {
        "type": "function",
        "function": {
            "name": "drugbank_search",
            "description": "Search for drug information filtered by gene_name, pathway, or drug_name",
            "parameters": {
                "type": "object",
                "properties": {
                    "gene_name": {
                        "type": "string",
                        "description": "Gene name to filter on (e.g. 'ADRA1D')"
                    },
                    "pathway": {
                        "type": "string",
                        "description": "Pathway substring to filter on (e.g. 'Oxytetracycline Action Pathway')"
                    },
                    "drug_name": {
                        "type": "string",
                        "description": "Drug name or partial name to filter on (e.g. 'Oxytetracycline')"
                    }
                },
                # None of them are strictly required. The LLM can provide any subset.
                "required": []
            },
        },
    }
]

###############################################################################
# 5. Conversation logic
###############################################################################
def run_conversation(user_prompt):
    """
    Orchestrates the conversation. The LLM can call 'drugbank_search' if needed.
    """
    messages = [
        {
            "role": "system",
            "content": (
                "You are a function-calling LLM that answers questions about drug–gene–pathway "
                "relationships and also drug descriptions. If relevant, call the 'drugbank_search' "
                "function with the appropriate arguments (gene_name, pathway, drug_name). "
                "If no function is needed, simply respond."
            )
        },
        {
            "role": "user",
            "content": user_prompt,
        }
    ]

    # First LLM call
    response = client.chat.completions.create(
        model=MODEL,
        messages=messages,
        tools=tools,
        tool_choice="auto",  # let the model decide
        max_tokens=1024
    )

    response_message = response.choices[0].message
    tool_calls = response_message.tool_calls

    # If the model decides to call the drugbank_search function:
    if tool_calls:
        available_functions = {
            "drugbank_search": drugbank_search
        }

        # Append the tool call message
        messages.append(response_message)

        for tool_call in tool_calls:
            function_name = tool_call.function.name
            function_to_call = available_functions[function_name]
            function_args = json.loads(tool_call.function.arguments)

            # Execute the function
            function_response = function_to_call(**function_args)

            # Put the function result into the conversation
            messages.append({
                "tool_call_id": tool_call.id,
                "role": "tool",
                "name": function_name,
                "content": function_response,
            })

        # Second call to the model, allowing it to process the function result
        second_response = client.chat.completions.create(
            model=MODEL,
            messages=messages
        )
        final_response = second_response.choices[0].message.content

    else:
        # No function call, so just take the LLM's direct answer
        final_response = response_message.content

    return final_response



In [18]:
#Example usage
user_prompt = "Show me drugs related to the CSF1R gene."
print(run_conversation(user_prompt))

The tool yielded the following drugs related to the CSF1R gene:

1. 5-CYANO-FURAN-2-CARBOXYLIC ACID [5-HYDROXYMETHYL-2-(4-METHYL-PIPERIDIN-1-YL)-PHENYL]-AMIDE (DB07167) - Experimental
2. Imatinib (DB00619) - Approved
   Description: Imatinib is a small molecule kinase inhibitor used to treat certain types of cancer.
3. Sunitinib (DB01268) - Approved, Investigational
   Description: Sunitinib is an oral, small-molecule, multi-targeted receptor tyrosine kinase (RTK) inhibitor that was approved by the FDA for the treatment of renal cell carcinoma (RCC) and imatinib-resistant gastrointestinal stromal tumor (GIST).
4. 6-CHLORO-3-(3-METHYLISOXAZOL-5-YL)-4-PHENYLQUINOLIN-2(1H)-ONE (DB07202) - Experimental
5. ABT-869 (DB06080) - Investigational
   Description: ABT-869 is a small molecule vascular endothelial growth factor (VEGF) receptor-based kinase inhibitor that is designed to suppress tumor growth by preventing the formation of new blood vessels that supply the tumor with oxygen and nutrie

In [24]:
user_prompt = "What is the description for Oxytetracycline?"
print(run_conversation(user_prompt))

According to the results, the description for Oxytetracycline is:

"A tetracycline analog isolated from the actinomycete streptomyces rimosus and used in a wide variety of clinical conditions."
