In [8]:
from smolagents import ToolCollection, ToolCallingAgent, OpenAIServerModel
from mcp import StdioServerParameters
# from pydantic import BaseModel, Field
import os
# from typing import Optional, List, Literal

### Configuration

In [9]:
os.environ['NEBIUS_API_KEY'] = open('secret.txt', 'r').read().strip()

In [10]:
server = StdioServerParameters(
    command="uvx",
    args=["opengenes-mcp", "stdio"]
)

In [11]:
MODEL = "Qwen/Qwen3-235B-A22B-Instruct-2507"

model = OpenAIServerModel(
    model_id=MODEL,
    api_key=os.environ["NEBIUS_API_KEY"],
    api_base="https://api.studio.nebius.com/v1/",
    temperature=0,
)

### Checks

In [16]:
with ToolCollection.from_mcp(
        server_parameters=server,
        trust_remote_code=True,
        structured_output=True
    ) as tools:
        # Проверить, что тулы действительно подхватились:
        print([t.name for t in tools.tools])

['opengenes_get_schema_info', 'opengenes_example_queries', 'opengenes_db_query']


In [115]:
def check_tool(tool):
    with ToolCollection.from_mcp(server, trust_remote_code=True, structured_output=False) as tc:
        # print([t.name for t in tc.tools])  # должны быть: opengenes_db_query, opengenes_get_schema_info, opengenes_example_queries
        ex = [t for t in tc.tools if t.name==tool][0]
        out = ex() 
        print(out)

In [116]:
check_tool("opengenes_example_queries")

[{"category":"Lifespan Effects - Ordered by Magnitude","description":"Genes that increase lifespan, ordered by greatest extension first","query":"SELECT HGNC, model_organism, effect_on_lifespan, lifespan_percent_change_mean FROM lifespan_change WHERE effect_on_lifespan = 'increases lifespan' AND lifespan_percent_change_mean IS NOT NULL ORDER BY lifespan_percent_change_mean DESC","key_concept":"Always order lifespan results by magnitude for relevance. Use LIMIT only when user specifically asks for 'top N' or similar"},{"category":"Lifespan Effects - Ordered by Magnitude","description":"Genes that decrease lifespan, ordered by greatest reduction first","query":"SELECT HGNC, model_organism, effect_on_lifespan, lifespan_percent_change_mean FROM lifespan_change WHERE effect_on_lifespan = 'decreases lifespan' AND lifespan_percent_change_mean IS NOT NULL ORDER BY lifespan_percent_change_mean ASC","key_concept":"Use ASC ordering for lifespan reductions to show largest decreases first. Use LIMI

In [117]:
check_tool("opengenes_get_schema_info")

{"database_overview":{"description":"OpenGenes database contains aging and lifespan research data with 4 main tables linked by HGNC gene symbols","total_tables":4,"primary_key":"HGNC (gene symbol) - links all tables together"},"critical_query_guidelines":{"multi_value_fields":{"description":"Some columns contain comma-separated values. ALWAYS use LIKE queries with wildcards for these fields.","fields":["gene_hallmarks.'hallmarks of aging' - contains multiple aging hallmarks per gene","lifespan_change.intervention_deteriorates - multiple biological processes that deteriorate","lifespan_change.intervention_improves - multiple biological processes that improve"],"example_syntax":"WHERE \"hallmarks of aging\" LIKE '%stem cell exhaustion%'"},"lifespan_metrics":{"description":"Database contains both mean and maximum lifespan change metrics. When user asks about lifespan effects without specifying, show both.","mean_vs_max":"lifespan_percent_change_mean shows average effect, lifespan_percent_

### Prompts

In [12]:
SYSTEM_PROMPT = open('opengenes_system_prompt.txt', 'r').read()

In [25]:
GENE = "NRF2"

USER_PROMPT = f"""
What are:
- Longevity/lifespan/healthspan association
- Modification effects
- Known genetic interventions
- Disease involvement
for the gene: {GENE} across different species?
If there are any known synonyms for the Gene use them to .
If some information is not available in the base for this name of the gene try to find synonyms and find and fetch data for them.
If this attampt alse fails return "Info not available".
"""

### Run

In [24]:
with ToolCollection.from_mcp(
    server_parameters=server,
    trust_remote_code=True,
    structured_output=False
) as tools:
    agent = ToolCallingAgent(
        model=model,
        tools=[*tools.tools],
        add_base_tools=False,
        max_steps=5,
    )
    agent.prompt_templates["system_prompt"] = SYSTEM_PROMPT
    result = agent.run(USER_PROMPT)

print(result)

The gene NRF2 (also known as NFE2L2) has been analyzed across multiple dimensions of aging and longevity research. Here is a comprehensive summary based on the available data:

### 1. Longevity/Lifespan/Healthspan Association
- **Lifespan Effects**: No direct experimental data from model organisms (e.g., mouse, C. elegans, fly) is available in the database for either NRF2 or its synonym NFE2L2 regarding changes in lifespan (no entries in the `lifespan_change` table).
- **Healthspan Protection**: Despite the lack of lifespan extension data, NFE2L2 is strongly associated with protection against age-related impairment. This suggests a role in improving healthspan rather than extending maximum lifespan.

### 2. Modification Effects
- **Gene Activity Changes**: Alterations in NFE2L2 activity are linked to protective effects against age-related functional decline. Specifically, changes in its activity help protect against age-related impairments, indicating that upregulation or activation of

### Test

In [29]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [33]:
from opengenes import run_query

In [None]:
output = run_query("NRF2")

In [None]:
output