In [1]:
import sqlite3
from langchain_community.utilities import SQLDatabase
from langchain_community.agent_toolkits import SQLDatabaseToolkit
from langchain_openai import ChatOpenAI
from dotenv import load_dotenv
model_name ="gpt-5-nano"
llm = ChatOpenAI(model=model_name)

# Load environment variables (ensure OPENAI_API_KEY is set)
load_dotenv("../.env")

True

In [2]:
db_name="banking_customers_indian.db"
# Setup LangChain SQLDatabase
db = SQLDatabase.from_uri(f"sqlite:///{db_name}")
llm = ChatOpenAI(model=model_name)
print(f"Dialect: {db.dialect}")
print(f"Available tables: {db.get_usable_table_names()}")
print(f'Sample output: {db.run("SELECT * FROM consumers LIMIT 5;")}')

Dialect: sqlite
Available tables: ['consumers']
Sample output: [(1, 'Riya Malhotra', '623056607556', 'IndusInd Bank', 67937.01, 2003699.05, 'Jaipur, RJ', 'Dining'), (2, 'Diya Mehta', '177962195503', 'IndusInd Bank', 97708.29, 392321.56, 'Hyderabad, TG', 'Travel'), (3, 'Saanvi Verma', '099452241336', 'Axis Bank', 150740.89, 1229736.46, 'Hyderabad, TG', 'Groceries'), (4, 'Riya Iyer', '783172869001', 'Punjab National Bank (PNB)', 40197.47, 704246.41, 'Kolkata, WB', 'Fuel'), (5, 'Suresh Saxena', '291992070489', 'Punjab National Bank (PNB)', 155855.37, 1903204.13, 'Hyderabad, TG', 'Travel')]


In [3]:
toolkit = SQLDatabaseToolkit(db=db, llm=llm)

tools = toolkit.get_tools()

for tool in tools:
    print(f"{tool.name}: {tool.description}\n")

sql_db_query: Input to this tool is a detailed and correct SQL query, output is a result from the database. If the query is not correct, an error message will be returned. If an error is returned, rewrite the query, check the query, and try again. If you encounter an issue with Unknown column 'xxxx' in 'field list', use sql_db_schema to query the correct table fields.

sql_db_schema: Input to this tool is a comma-separated list of tables, output is the schema and sample rows for those tables. Be sure that the tables actually exist by calling sql_db_list_tables first! Example Input: table1, table2, table3

sql_db_list_tables: Input is an empty string, output is a comma-separated list of tables in the database.

sql_db_query_checker: Use this tool to double check if your query is correct before executing it. Always use this tool before executing a query with sql_db_query!



In [6]:
tools

[QuerySQLDatabaseTool(description="Input to this tool is a detailed and correct SQL query, output is a result from the database. If the query is not correct, an error message will be returned. If an error is returned, rewrite the query, check the query, and try again. If you encounter an issue with Unknown column 'xxxx' in 'field list', use sql_db_schema to query the correct table fields.", db=<langchain_community.utilities.sql_database.SQLDatabase object at 0x751e4339cec0>),
 InfoSQLDatabaseTool(description='Input to this tool is a comma-separated list of tables, output is the schema and sample rows for those tables. Be sure that the tables actually exist by calling sql_db_list_tables first! Example Input: table1, table2, table3', db=<langchain_community.utilities.sql_database.SQLDatabase object at 0x751e4339cec0>),
 ListSQLDatabaseTool(db=<langchain_community.utilities.sql_database.SQLDatabase object at 0x751e4339cec0>),
 QuerySQLCheckerTool(description='Use this tool to double check

In [5]:
prompt_template = """
You are an expert banking assistant. 
Follow these steps for every query:
1. List tables to see what is available.
2. Query the schema of relevant tables.
3. Execute the SQL query.

If a query for 'HDFC' returns no results, try 'HDFC Bank' or check unique values in the `bank_name` column.
Limit results to {top_k}.
"""
system_prompt = prompt_template.format(top_k=5)

In [9]:


# Define Custom System Prompt
prompt_template = """
You are an expert banking assistant designed to interact with a SQL database containing customer financial data.
Your goal is to help users understand customer demographics, spending habits, and financial status.

The database has a table named `consumers` with the following columns:
- `customer_name`: Name of the customer.
- `account_number`: Unique identifier for the account.
- `bank_name`: Name of the bank where the account is held (e.g., SBI, HDFC).
- `monthly_average_balance`: Customer's average monthly balance in INR.
- `yearly_income`: Customer's annual income in INR.
- `place`: City and State of the customer (e.g., Mumbai, MH).
- `top_spending_category`: The category where the customer spends the most.

Given an input question, create a syntactically correct {dialect} query to run,
then look at the results of the query and return the answer.
Unless the user specifies a specific number of examples they wish to obtain, always limit your
query to at most {top_k} results.

Rules:
1. You can order the results by relevant columns (`yearly_income`, `monthly_average_balance`) to return interesting examples.
2. Never query for all columns from a specific table, only ask for the relevant columns given the question.
3. You MUST double check your query before executing it. If you get an error while executing a query, rewrite the query and try again.
4. DO NOT make any DML statements (INSERT, UPDATE, DELETE, DROP etc.) to the database.
5. If asked about "rich" customers, assume it refers to high `yearly_income` or `monthly_average_balance`.

To start you should ALWAYS look at the tables in the database to see what you can query. Do NOT skip this step.
Then you should query the schema of the most relevant tables.
"""

system_prompt = prompt_template.format(dialect=db.dialect, top_k=5)

In [10]:
print(system_prompt)


You are an expert banking assistant designed to interact with a SQL database containing customer financial data.
Your goal is to help users understand customer demographics, spending habits, and financial status.

The database has a table named `consumers` with the following columns:
- `customer_name`: Name of the customer.
- `account_number`: Unique identifier for the account.
- `bank_name`: Name of the bank where the account is held (e.g., SBI, HDFC).
- `monthly_average_balance`: Customer's average monthly balance in INR.
- `yearly_income`: Customer's annual income in INR.
- `place`: City and State of the customer (e.g., Mumbai, MH).
- `top_spending_category`: The category where the customer spends the most.

Given an input question, create a syntactically correct sqlite query to run,
then look at the results of the query and return the answer.
Unless the user specifies a specific number of examples they wish to obtain, always limit your
query to at most 5 results.

Rules:
1. You ca

In [11]:
from langchain.agents import create_agent


agent = create_agent(
    llm,
    tools,
    system_prompt=system_prompt,
)

In [12]:
#query1 = "Which customer in Mumbai has the highest yearly income?"
query1= "what is the average balance of mumbai customers?"

for step in agent.stream(
    {"messages": [{"role": "user", "content": query1}]},
    stream_mode="values",
):
    step["messages"][-1].pretty_print()


what is the average balance of mumbai customers?
Tool Calls:
  sql_db_list_tables (call_QhRqFojOkAYGdKM7dLkx0Gvi)
 Call ID: call_QhRqFojOkAYGdKM7dLkx0Gvi
  Args:
Name: sql_db_list_tables

consumers
Tool Calls:
  sql_db_schema (call_ZpbPeOFCbzRIR52UHx9jlWFs)
 Call ID: call_ZpbPeOFCbzRIR52UHx9jlWFs
  Args:
    table_names: consumers
Name: sql_db_schema


CREATE TABLE consumers (
	id INTEGER, 
	customer_name TEXT, 
	account_number TEXT, 
	bank_name TEXT, 
	monthly_average_balance REAL, 
	yearly_income REAL, 
	place TEXT, 
	top_spending_category TEXT, 
	PRIMARY KEY (id)
)

/*
3 rows from consumers table:
id	customer_name	account_number	bank_name	monthly_average_balance	yearly_income	place	top_spending_category
1	Riya Malhotra	623056607556	IndusInd Bank	67937.01	2003699.05	Jaipur, RJ	Dining
2	Diya Mehta	177962195503	IndusInd Bank	97708.29	392321.56	Hyderabad, TG	Travel
3	Saanvi Verma	099452241336	Axis Bank	150740.89	1229736.46	Hyderabad, TG	Groceries
*/
Tool Calls:
  sql_db_query_checker (

In [13]:
query2= "which is the spending category which having highest frequncy or count?"

output =agent.invoke(
    {"messages": [{"role": "user", "content": query2}]})


In [14]:
output

{'messages': [HumanMessage(content='which is the spending category which having highest frequncy or count?', additional_kwargs={}, response_metadata={}, id='4dd28d04-55bb-4008-b955-2a6e14f3f4af'),
  AIMessage(content='', additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 405, 'prompt_tokens': 790, 'total_tokens': 1195, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 384, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_provider': 'openai', 'model_name': 'gpt-5-nano-2025-08-07', 'system_fingerprint': None, 'id': 'chatcmpl-CrxMoctjxuj1RaiqtGkkaJZj1Occc', 'service_tier': 'default', 'finish_reason': 'tool_calls', 'logprobs': None}, id='lc_run--019b67f4-5830-72d1-a60f-51c961d0e9ac-0', tool_calls=[{'name': 'sql_db_list_tables', 'args': {}, 'id': 'call_n5HLuDSLjcVbmeXrRhukNI9x', 'type': 'tool_call'}], usage_metadata={'input_tokens': 790, 'o

In [15]:
print(output['messages'][-1].content)

The highest-frequency spending categories are Electronics and Healthcare, tied with 17 customers each.

Top categories by frequency:
- Electronics: 17
- Healthcare: 17
- Groceries: 14
- Utilities: 13
- Dining: 11

Would you like me to break this down further by bank, place, or other filters?


In [27]:
output['messages'][9].pretty_print()


The highest-frequency spending categories are Electronics and Healthcare, tied with 17 customers each.

Top categories by frequency:
- Electronics: 17
- Healthcare: 17
- Groceries: 14
- Utilities: 13
- Dining: 11

Would you like me to break this down further by bank, place, or other filters?


In [None]:
query2 = "What is the average monthly balance of HDFC Bank customers?"

query3 = "List the names of customers who spend most on 'Groceries' and live in Bangalore."
