In [8]:
from langchain_mcp_adapters.client import MultiServerMCPClient  
from langchain.agents import create_agent
from langchain_openai import ChatOpenAI
from dotenv import load_dotenv
from langchain.agents import create_agent


load_dotenv("../.env")
model_name ="gpt-5-nano"

# 1. Setup Database and LLM
llm = ChatOpenAI(model=model_name)

client = MultiServerMCPClient(  
    {
        "math": {
            "transport": "stdio",  # Local subprocess communication
            "command": "python",
            # Absolute path to your math_server.py file
            "args": ["/home/bipin/Documents/genai/bot/learning/notebook/pandas_csv.py"],
        },
    }
)

tools = await client.get_tools()  
system_prompt = f"""
You are an agent that answers questions using CSV files via pandas tools.

Workflow (MANDATORY):
1. Always call `pd_list_tables` first to see available CSV files.
2. Inspect the relevant CSV using `pd_schema` before querying.
3. Answer questions ONLY by calling `pd_query` with valid pandas `.query()` syntax.

Rules:
- Never assume column names or data types.
- Use pandas query syntax only (no SQL).
- Select and filter only what is needed.
- If row count is not specified, limit results to 5.
- If a query fails, fix the pandas logic and retry.
- Do NOT modify, save, or overwrite any CSV.
- If the question is ambiguous, ask for clarification.

Return a concise, data-backed answer.
"""

agent = create_agent(
    model=llm,
    tools=tools,
    system_prompt=system_prompt,
)




In [9]:
query1="show all the csv files"

result = await agent.ainvoke(
    {
        "messages": [
            {"role": "user", "content": query1}
        ]
    }
)


In [10]:

result


{'messages': [HumanMessage(content='show all the csv files', additional_kwargs={}, response_metadata={}, id='99d76386-12b4-4c4e-a3ae-5eb8aa920021'),
  AIMessage(content='', additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 213, 'prompt_tokens': 336, 'total_tokens': 549, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 192, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_provider': 'openai', 'model_name': 'gpt-5-nano-2025-08-07', 'system_fingerprint': None, 'id': 'chatcmpl-CuUZHAlmztI2i5yJv35zklY2cJEez', 'service_tier': 'default', 'finish_reason': 'tool_calls', 'logprobs': None}, id='lc_run--019b8bf8-f11d-7612-b072-8b4950d0dbbb-0', tool_calls=[{'name': 'pd_list_tables', 'args': {}, 'id': 'call_EHGgyvUPhNsi8VhuXmfwcXyF', 'type': 'tool_call'}], usage_metadata={'input_tokens': 336, 'output_tokens': 213, 'total_tokens': 549, 'input_token

In [11]:
result['messages'][-1].pretty_print()



Available CSV files:
- hospital_visits.csv
- flipkard.csv
- Heart_disease.csv
- college_students.csv
- logistics_shipments.csv

Which file would you like to inspect (pd_schema) or query (pd_query)? If you want a quick preview, I can show the first few rows as well.


In [13]:
query2="which student have highest cgpa , iwant to know collegname and student name"

result = await agent.ainvoke(
    {
        "messages": [
            {"role": "user", "content": query2}
        ]
    }
)

In [15]:
result

{'messages': [HumanMessage(content='which student have highest cgpa , iwant to know collegname and student name', additional_kwargs={}, response_metadata={}, id='1fd49656-7e67-4ab1-99b1-57b88fed4c50'),
  AIMessage(content='', additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 1045, 'prompt_tokens': 347, 'total_tokens': 1392, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 1024, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_provider': 'openai', 'model_name': 'gpt-5-nano-2025-08-07', 'system_fingerprint': None, 'id': 'chatcmpl-CuUbbfY3aGlVIzVJMUoC9YQu8q0KT', 'service_tier': 'default', 'finish_reason': 'tool_calls', 'logprobs': None}, id='lc_run--019b8bfb-2703-7e82-b871-278593aa8220-0', tool_calls=[{'name': 'pd_list_tables', 'args': {}, 'id': 'call_uRST5e77WPnu22dJyj8QIf98', 'type': 'tool_call'}], usage_metadata={'input_tokens': 347,

In [20]:
result['messages'][-2].pretty_print()


Name: pd_query

[{'type': 'text', 'text': '[{"student_id":"STU2006","student_name":"Sneha Joshi","college_name":"Delhi University","course":"Civil","year_of_study":4,"cgpa":9.76}]', 'id': 'lc_0c185bb7-2dd6-44b0-9a99-49e62cd49489'}]


In [14]:
result['messages'][-1].pretty_print()



- Student name: Sneha Joshi
- College name: Delhi University
- CGPA: 9.76


In [22]:
query3="which patient in hospital_visits.csv file have highest bill amount and its value"

result = await agent.ainvoke(
    {
        "messages": [
            {"role": "user", "content": query3}
        ]
    }
)

In [23]:
result['messages'][-1].pretty_print()



Highest bill amount in hospital_visits.csv:
- Patient ID: PAT1050
- Patient name: Rahul Singh
- Hospital: Narayana Health
- Department: Neurology
- Diagnosis: Back Pain
- Visit date: 2025-06-17
- Bill amount (INR): 49,592.44
