Create Sql Agent

In [17]:
print("Sql Agent")

Sql Agent


In [18]:
%%capture --no-stderr
%pip install --upgrade --quiet langchain-community langgraph langchain-openai  python-dotenv


In [19]:
!curl -s https://raw.githubusercontent.com/lerocha/chinook-database/master/ChinookDatabase/DataSources/Chinook_Sqlite.sql | sqlite3 Chinook.db


In [20]:

from langchain_community.utilities import SQLDatabase

db = SQLDatabase.from_uri("sqlite:///Chinook.db")
print(db.dialect)
print(db.get_usable_table_names())
db.run("SELECT * FROM Artist LIMIT 10;")

sqlite
['Album', 'Artist', 'Customer', 'Employee', 'Genre', 'Invoice', 'InvoiceLine', 'MediaType', 'Playlist', 'PlaylistTrack', 'Track']


"[(1, 'AC/DC'), (2, 'Accept'), (3, 'Aerosmith'), (4, 'Alanis Morissette'), (5, 'Alice In Chains'), (6, 'Antônio Carlos Jobim'), (7, 'Apocalyptica'), (8, 'Audioslave'), (9, 'BackBeat'), (10, 'Billy Cobham')]"

In [21]:
# Application State
from typing_extensions import TypedDict

class State(TypedDict):
    question: str
    query: str
    result: str
    answer: str

In [22]:
from langchain_openai import ChatOpenAI
# from dotenv import load_dotenv

# load_dotenv()  # Load variables from .env file

llm = ChatOpenAI(model="gpt-4o-mini", temperature=0.0)



In [23]:
from langchain_core.prompts import ChatPromptTemplate

system_message = """
Given an input question, create a syntactically correct {dialect} query to
run to help find the answer. Unless the user specifies in his question a
specific number of examples they wish to obtain, always limit your query to
at most {top_k} results. You can order the results by a relevant column to
return the most interesting examples in the database.

Never query for all the columns from a specific table, only ask for a the
few relevant columns given the question.

Pay attention to use only the column names that you can see in the schema
description. Be careful to not query for columns that do not exist. Also,
pay attention to which column is in which table.

Only use the following tables:
{table_info}
"""

user_prompt = "Question: {input}"

query_prompt_template = ChatPromptTemplate(
    [("system", system_message), ("user", user_prompt)]
)

for message in query_prompt_template.messages:
    message.pretty_print()



Given an input question, create a syntactically correct [33;1m[1;3m{dialect}[0m query to
run to help find the answer. Unless the user specifies in his question a
specific number of examples they wish to obtain, always limit your query to
at most [33;1m[1;3m{top_k}[0m results. You can order the results by a relevant column to
return the most interesting examples in the database.

Never query for all the columns from a specific table, only ask for a the
few relevant columns given the question.

Pay attention to use only the column names that you can see in the schema
description. Be careful to not query for columns that do not exist. Also,
pay attention to which column is in which table.

Only use the following tables:
[33;1m[1;3m{table_info}[0m


Question: [33;1m[1;3m{input}[0m


In [24]:
from typing_extensions import Annotated


class QueryOutput(TypedDict):
    """Generated SQL query."""

    query: Annotated[str, ..., "Syntactically valid SQL query."]


def write_query(state: State):
    """Generate SQL query to fetch information."""
    prompt = query_prompt_template.invoke(
        {
            "dialect": db.dialect,
            "top_k": 10,
            "table_info": db.get_table_info(),
            "input": state["question"],
        }
    )
    structured_llm = llm.with_structured_output(QueryOutput)
    result = structured_llm.invoke(prompt)
    return {"question": state["question"], "query": result["query"]}

In [25]:
write_query({"question": "How many Employees are there?"})

{'question': 'How many Employees are there?',
 'query': 'SELECT COUNT(*) AS EmployeeCount FROM Employee;'}

In [26]:
def execute_query(state: State):
    """Execute SQL query."""
    return {
        "question": state["question"], 
        "query": state["query"], 
        "result": db.run(state["query"])
    }

In [27]:
execute_query({
    "question": "How many employees are there?", 
    "query": "SELECT COUNT(EmployeeId) AS EmployeeCount FROM Employee;"
})

{'question': 'How many employees are there?',
 'query': 'SELECT COUNT(EmployeeId) AS EmployeeCount FROM Employee;',
 'result': '[(8,)]'}

In [28]:
def generate_answer(state: State):
    """Answer question using retrieved information as context."""
    prompt = (
        "Given the following user question, corresponding SQL query, "
        "and SQL result, answer the user question.\n\n"
        f'Question: {state["question"]}\n'
        f'SQL Query: {state["query"]}\n'
        f'SQL Result: {state["result"]}'
    )
    response = llm.invoke(prompt)
    return {"answer": response.content}

generate_answer({
    "question": "How many employees are there?", 
    "query": "SELECT COUNT(EmployeeId) AS EmployeeCount FROM Employee;",
    "result": "[(8,)]"
})

{'answer': 'There are 8 employees.'}

In [29]:
from langchain_core.runnables import RunnableLambda

chain = RunnableLambda(write_query) | RunnableLambda(execute_query) | RunnableLambda(generate_answer)

In [30]:
results = chain.invoke({"question": "How many employees are there?"})


In [31]:

print("Results:", results)
print("\nAnswer:", results["answer"])

Results: {'answer': 'There are 8 employees.'}

Answer: There are 8 employees.


Building a SQL Agent



In [32]:
from langchain_community.agent_toolkits import SQLDatabaseToolkit

toolkit = SQLDatabaseToolkit(db=db, llm=llm)

tools = toolkit.get_tools()

for tool in tools:
    print(f"Tool: {tool.__class__.__name__}")
    print(f"Description: {tool.description}\n{'-'*60}")

Tool: QuerySQLDatabaseTool
Description: Input to this tool is a detailed and correct SQL query, output is a result from the database. If the query is not correct, an error message will be returned. If an error is returned, rewrite the query, check the query, and try again. If you encounter an issue with Unknown column 'xxxx' in 'field list', use sql_db_schema to query the correct table fields.
------------------------------------------------------------
Tool: InfoSQLDatabaseTool
Description: Input to this tool is a comma-separated list of tables, output is the schema and sample rows for those tables. Be sure that the tables actually exist by calling sql_db_list_tables first! Example Input: table1, table2, table3
------------------------------------------------------------
Tool: ListSQLDatabaseTool
Description: Input is an empty string, output is a comma-separated list of tables in the database.
------------------------------------------------------------
Tool: QuerySQLCheckerTool
Descr

In [33]:
system_message = """
You are an agent designed to interact with a SQL database.
Given an input question, create a syntactically correct {dialect} query to run,
then look at the results of the query and return the answer. Unless the user
specifies a specific number of examples they wish to obtain, always limit your
query to at most {top_k} results.

You can order the results by a relevant column to return the most interesting
examples in the database. Never query for all the columns from a specific table,
only ask for the relevant columns given the question.

You MUST double check your query before executing it. If you get an error while
executing a query, rewrite the query and try again.

DO NOT make any DML statements (INSERT, UPDATE, DELETE, DROP etc.) to the
database.

To start you should ALWAYS look at the tables in the database to see what you
can query. Do NOT skip this step.

Then you should query the schema of the most relevant tables.
""".format(
    dialect="SQLite",
    top_k=5,
)

In [34]:
#Initializing agent
# We will use a prebuilt LangGraph agent to build our agent

from langchain_core.messages import HumanMessage
from langgraph.prebuilt import create_react_agent

agent_executor = create_react_agent(llm, tools, prompt=system_message)

In [35]:
question = "Hi, Which country's customers spent the most?"

results = agent_executor.invoke(
    {"messages": [HumanMessage(content=question)]}
)

for msg in results["messages"]:
    print("="*40)
    print(f"Type: {type(msg).__name__}")
    if hasattr(msg, "content"):
        print("Content:")
        print(msg.content)
    else:
        print(msg)
    print()

Type: HumanMessage
Content:
Hi, Which country's customers spent the most?

Type: AIMessage
Content:


Type: ToolMessage
Content:
Album, Artist, Customer, Employee, Genre, Invoice, InvoiceLine, MediaType, Playlist, PlaylistTrack, Track

Type: AIMessage
Content:


Type: ToolMessage
Content:

CREATE TABLE "Customer" (
	"CustomerId" INTEGER NOT NULL, 
	"FirstName" NVARCHAR(40) NOT NULL, 
	"LastName" NVARCHAR(20) NOT NULL, 
	"Company" NVARCHAR(80), 
	"Address" NVARCHAR(70), 
	"City" NVARCHAR(40), 
	"State" NVARCHAR(40), 
	"Country" NVARCHAR(40), 
	"PostalCode" NVARCHAR(10), 
	"Phone" NVARCHAR(24), 
	"Fax" NVARCHAR(24), 
	"Email" NVARCHAR(60) NOT NULL, 
	"SupportRepId" INTEGER, 
	PRIMARY KEY ("CustomerId"), 
	FOREIGN KEY("SupportRepId") REFERENCES "Employee" ("EmployeeId")
)

/*
3 rows from Customer table:
CustomerId	FirstName	LastName	Company	Address	City	State	Country	PostalCode	Phone	Fax	Email	SupportRepId
1	Luís	Gonçalves	Embraer - Empresa Brasileira de Aeronáutica S.A.	Av. Brigadeiro Fa