In [None]:
# pip install \
#   groq requests \
#   langgraph langchain langchain-community langchain-groq \
#   llama-index llama-index-llms-groq

In [3]:
import requests
import dotenv

dotenv.load_dotenv()

True

In [None]:
from langchain_groq import ChatGroq

llm = ChatGroq(
    model="meta-llama/llama-4-scout-17b-16e-instruct",
    temperature=0
)

llm.invoke("hello")

AIMessage(content="Hello! It's nice to meet you. Is there something I can help you with or would you like to chat?", additional_kwargs={}, response_metadata={'token_usage': {'completion_tokens': 24, 'prompt_tokens': 11, 'total_tokens': 35, 'completion_time': 0.054431395, 'completion_tokens_details': None, 'prompt_time': 0.000319605, 'prompt_tokens_details': None, 'queue_time': 0.065704875, 'total_time': 0.054751}, 'model_name': 'meta-llama/llama-4-scout-17b-16e-instruct', 'system_fingerprint': 'fp_37da608fc1', 'service_tier': 'on_demand', 'finish_reason': 'stop', 'logprobs': None, 'model_provider': 'groq'}, id='lc_run--019b7434-0d7b-7d21-8f03-b638e2a63bf6-0', usage_metadata={'input_tokens': 11, 'output_tokens': 24, 'total_tokens': 35})

In [18]:
from IPython.display import display, Markdown

# SELECT *
# FROM customers
# WHERE kyc_status = 'VERIFIED';

# LLM:
# SELECT * 
# FROM customer 
# WHERE kyc_status = 'VERIFIED';

prompt1 = str("generate an sql query to get everything from customer table whose kyc status is 'VERIFIED'")

# EXPECTED:
# SELECT *
# FROM loans
# WHERE disbursed_at BETWEEN '2023-01-01' AND '2023-12-31';

# LLM:
# SELECT * 
# FROM loans 
# WHERE disbursement_date BETWEEN '2023-01-01' AND '2023-06-30';

prompt2 = str("I need info of loans which were disbursed between jan 2023 and june 2023")

response = llm.invoke([{
            "role": "system",
            "content": "You're an experience developer with vast knowledge in SQL and Python. You have to generate SQL queries for the user, SQL queries no headings, explanations, or anything.",
        },
        {
            "role": "user",
            "content": f"{prompt2}",
        }])

display(Markdown(response.content))

```sql
SELECT * 
FROM loans 
WHERE disbursement_date BETWEEN '2023-01-01' AND '2023-06-30';
```

In [38]:
from langchain_community.utilities import SQLDatabase

POSTGRES_URI = "postgresql+psycopg2://postgres:Nst%401995@localhost:5432/postgres"

db = SQLDatabase.from_uri(
    POSTGRES_URI,
    include_tables=["branches", "customers", "deposit_accounts", "loans", "repayments"],  # important
    sample_rows_in_table_info=2
)


In [39]:
db.run("""SELECT *
FROM customers
WHERE kyc_status = 'VERIFIED' limit 5;""")

"[(1, 'Customer_1', 'VERIFIED', datetime.datetime(2024, 1, 6, 0, 0)), (5, 'Customer_5', 'VERIFIED', datetime.datetime(2023, 11, 30, 0, 0)), (8, 'Customer_8', 'VERIFIED', datetime.datetime(2021, 11, 20, 0, 0)), (9, 'Customer_9', 'VERIFIED', datetime.datetime(2020, 7, 8, 0, 0)), (14, 'Customer_14', 'VERIFIED', datetime.datetime(2022, 2, 14, 0, 0))]"

In [40]:
from langchain_community.agent_toolkits import SQLDatabaseToolkit

toolkit = SQLDatabaseToolkit(db=db, llm=llm)

tools = toolkit.get_tools()

for tool in tools:
    print(f"{tool.name}: {tool.description}\n")

sql_db_query: Input to this tool is a detailed and correct SQL query, output is a result from the database. If the query is not correct, an error message will be returned. If an error is returned, rewrite the query, check the query, and try again. If you encounter an issue with Unknown column 'xxxx' in 'field list', use sql_db_schema to query the correct table fields.

sql_db_schema: Input to this tool is a comma-separated list of tables, output is the schema and sample rows for those tables. Be sure that the tables actually exist by calling sql_db_list_tables first! Example Input: table1, table2, table3

sql_db_list_tables: Input is an empty string, output is a comma-separated list of tables in the database.

sql_db_query_checker: Use this tool to double check if your query is correct before executing it. Always use this tool before executing a query with sql_db_query!



In [43]:
db.dialect

'postgresql'

In [41]:
dialect=db.dialect
top_k=5

system_prompt = f"""
You are an agent designed to interact with a SQL database.
Given an input question, create a syntactically correct {dialect} query to run,
then look at the results of the query and return the answer. Unless the user
specifies a specific number of examples they wish to obtain, always limit your
query to at most {top_k} results."""

# You can order the results by a relevant column to return the most interesting
# examples in the database. Never query for all the columns from a specific table,
# only ask for the relevant columns given the question.

# You MUST double check your query before executing it. If you get an error while
# executing a query, rewrite the query and try again.

# DO NOT make any DML statements (INSERT, UPDATE, DELETE, DROP etc.) to the
# database.

# To start you should ALWAYS look at the tables in the database to see what you
# can query. Do NOT skip this step.

# Then you should query the schema of the most relevant tables.
# """

In [42]:
from langchain.agents import create_agent


agent = create_agent(
    llm,
    tools,
    system_prompt=system_prompt,
)

question = "I need info of loans which were disbursed between jan 2023 and june 2023"

for step in agent.stream(
    {"messages": [{"role": "user", "content": question}]},
    stream_mode="values",
):
    step["messages"][-1].pretty_print()


I need info of loans which were disbursed between jan 2023 and june 2023

To get the information about loans disbursed between January 2023 and June 2023, I will need to know the exact table and column names in the database. 

First, I will list all the tables in the database.
Tool Calls:
  sql_db_list_tables (8wzrdds9b)
 Call ID: 8wzrdds9b
  Args:
    tool_input:
Name: sql_db_list_tables

branches, customers, deposit_accounts, loans, repayments
Tool Calls:
  sql_db_query (hep7scqm1)
 Call ID: hep7scqm1
  Args:
    query: SELECT * FROM loans WHERE disbursed_date BETWEEN '2023-01-01' AND '2023-06-30' LIMIT 5
Name: sql_db_query

Error: (psycopg2.errors.UndefinedColumn) column "disbursed_date" does not exist
LINE 1: SELECT * FROM loans WHERE disbursed_date BETWEEN '2023-01-01...
                                  ^
HINT:  Perhaps you meant to reference the column "loans.disbursed_at".

[SQL: SELECT * FROM loans WHERE disbursed_date BETWEEN '2023-01-01' AND '2023-06-30' LIMIT 5]
(Backgroun

### GOOD OUTPUT

```
================================[1m Human Message [0m=================================

I need info of loans which were disbursed between jan 2023 and june 2023
==================================[1m Ai Message [0m==================================

To get the information about loans disbursed between January 2023 and June 2023, I will need to know the exact table and column names in the database. 

First, I will list all the tables in the database.
Tool Calls:
  sql_db_list_tables (8wzrdds9b)
 Call ID: 8wzrdds9b
  Args:
    tool_input:
=================================[1m Tool Message [0m=================================
Name: sql_db_list_tables

branches, customers, deposit_accounts, loans, repayments
==================================[1m Ai Message [0m==================================
Tool Calls:
  sql_db_query (hep7scqm1)
 Call ID: hep7scqm1
  Args:
    query: SELECT * FROM loans WHERE disbursed_date BETWEEN '2023-01-01' AND '2023-06-30' LIMIT 5
=================================[1m Tool Message [0m=================================
Name: sql_db_query

Error: (psycopg2.errors.UndefinedColumn) column "disbursed_date" does not exist
LINE 1: SELECT * FROM loans WHERE disbursed_date BETWEEN '2023-01-01...
                                  ^
HINT:  Perhaps you meant to reference the column "loans.disbursed_at".

[SQL: SELECT * FROM loans WHERE disbursed_date BETWEEN '2023-01-01' AND '2023-06-30' LIMIT 5]
(Background on this error at: https://sqlalche.me/e/20/f405)
==================================[1m Ai Message [0m==================================
Tool Calls:
  sql_db_schema (qp4zxc69r)
 Call ID: qp4zxc69r
  Args:
    table_names: loans
=================================[1m Tool Message [0m=================================
Name: sql_db_schema


CREATE TABLE loans (
	loan_id SERIAL NOT NULL, 
	customer_id INTEGER, 
	branch_id INTEGER, 
	loan_type TEXT, 
	principal NUMERIC(12, 2), 
	interest_rate NUMERIC(5, 2), 
	tenure_months INTEGER, 
	disbursed_at DATE, 
	status TEXT, 
	CONSTRAINT loans_pkey PRIMARY KEY (loan_id), 
	CONSTRAINT loans_branch_id_fkey FOREIGN KEY(branch_id) REFERENCES branches (branch_id), 
	CONSTRAINT loans_customer_id_fkey FOREIGN KEY(customer_id) REFERENCES customers (customer_id), 
	CONSTRAINT loans_loan_type_check CHECK (loan_type = ANY (ARRAY['HOME'::text, 'PERSONAL'::text, 'AUTO'::text])), 
	CONSTRAINT loans_status_check CHECK (status = ANY (ARRAY['ACTIVE'::text, 'CLOSED'::text, 'NPA'::text]))
)

/*
2 rows from loans table:
loan_id	customer_id	branch_id	loan_type	principal	interest_rate	tenure_months	disbursed_at	status
1	42	10	PERSONAL	1458636.00	11.16	12	2020-01-20	CLOSED
2	37	4	HOME	1352665.00	13.38	120	2021-06-27	ACTIVE
*/
==================================[1m Ai Message [0m==================================
Tool Calls:
  sql_db_query (9e3q9n0bq)
 Call ID: 9e3q9n0bq
  Args:
    query: SELECT * FROM loans WHERE disbursed_at BETWEEN '2023-01-01' AND '2023-06-30' LIMIT 5
=================================[1m Tool Message [0m=================================
Name: sql_db_query

[(3, 5, 8, 'PERSONAL', Decimal('781924.00'), Decimal('10.85'), 60, datetime.date(2023, 1, 17), 'NPA'), (9, 28, 18, 'HOME', Decimal('1529891.00'), Decimal('9.12'), 24, datetime.date(2023, 1, 22), 'ACTIVE'), (19, 28, 12, 'AUTO', Decimal('1301100.00'), Decimal('15.86'), 60, datetime.date(2023, 5, 26), 'NPA'), (21, 35, 7, 'PERSONAL', Decimal('1991071.00'), Decimal('12.31'), 36, datetime.date(2023, 6, 30), 'CLOSED'), (28, 13, 5, 'HOME', Decimal('521105.00'), Decimal('8.46'), 60, datetime.date(2023, 6, 5), 'ACTIVE')]
==================================[1m Ai Message [0m==================================

The loans disbursed between January 2023 and June 2023 are:

- Loan ID: 3, Customer ID: 5, Branch ID: 8, Loan Type: PERSONAL, Principal: 781924.00, Interest Rate: 10.85, Tenure: 60 months, Disbursed Date: 2023-01-17, Status: NPA
- Loan ID: 9, Customer ID: 28, Branch ID: 18, Loan Type: HOME, Principal: 1529891.00, Interest Rate: 9.12, Tenure: 24 months, Disbursed Date: 2023-01-22, Status: ACTIVE
- Loan ID: 19, Customer ID: 28, Branch ID: 12, Loan Type: AUTO, Principal: 1301100.00, Interest Rate: 15.86, Tenure: 60 months, Disbursed Date: 2023-05-26, Status: NPA
- Loan ID: 21, Customer ID: 35, Branch ID: 7, Loan Type: PERSONAL, Principal: 1991071.00, Interest Rate: 12.31, Tenure: 36 months, Disbursed Date: 2023-06-30, Status: CLOSED
- Loan ID: 28, Customer ID: 13, Branch ID: 5, Loan Type: HOME, Principal: 521105.00, Interest Rate: 8.46, Tenure: 60 months, Disbursed Date: 2023-06-05, Status: ACTIVE
```

In [45]:
def call_agent(question):
    agent = create_agent(
        llm,
        tools,
        system_prompt=system_prompt,
    )

    # question = "fetch the genre of all musics in music table"

    for step in agent.stream(
        {"messages": [{"role": "user", "content": question}]},
        stream_mode="values",
    ):
        step["messages"][-1].pretty_print()

In [46]:
call_agent("fetch the genre of all musics in music table")


fetch the genre of all musics in music table
Tool Calls:
  sql_db_query (bsynjsqvt)
 Call ID: bsynjsqvt
  Args:
    query: SELECT genre FROM music
Name: sql_db_query

Error: (psycopg2.errors.UndefinedTable) relation "music" does not exist
LINE 1: SELECT genre FROM music
                          ^

[SQL: SELECT genre FROM music]
(Background on this error at: https://sqlalche.me/e/20/f405)
Tool Calls:
  sql_db_list_tables (vf0m4xec4)
 Call ID: vf0m4xec4
  Args:
    tool_input:
Name: sql_db_list_tables

branches, customers, deposit_accounts, loans, repayments
Tool Calls:
  sql_db_query (a39p8ad6x)
 Call ID: a39p8ad6x
  Args:
    query: SELECT genre FROM music
Name: sql_db_query

Error: (psycopg2.errors.UndefinedTable) relation "music" does not exist
LINE 1: SELECT genre FROM music
                          ^

[SQL: SELECT genre FROM music]
(Background on this error at: https://sqlalche.me/e/20/f405)

It seems that there is no table named "music" in the database. The available tables are

In [None]:
call_agent("")