In [1]:
import requests, pathlib

url = "https://storage.googleapis.com/benchmarks-artifacts/chinook/Chinook.db"

filepath = pathlib.Path("Chinook.db")

if not filepath.exists():
    response = requests.get(url)
    if response.status_code == 200:
        filepath.write_bytes(response.content)
        print(f"Downloaded {filepath}")
    else:
        print(f"Failed to download file: {response.status_code}")
else:
    print(f"File {filepath} already exists.")

File Chinook.db already exists.


In [None]:
import os
from langchain_community.utilities import SQLDatabase
from mypackage import docroot

_data_dir = docroot.get_data_dir() + os.sep
db_path = f"{_data_dir}{filepath}"

db = SQLDatabase.from_uri(f"sqlite:///{db_path}")

print(f"Dialect: {db.dialect}")
print(f"Available tables: {db.get_usable_table_names()}")
print(f'Sample output: {db.run("SELECT * FROM Artist LIMIT 5;")}')

Dialect: sqlite
Available tables: ['Album', 'Artist', 'Customer', 'Employee', 'Genre', 'Invoice', 'InvoiceLine', 'MediaType', 'Playlist', 'PlaylistTrack', 'Track']
Sample output: [(1, 'AC/DC'), (2, 'Accept'), (3, 'Aerosmith'), (4, 'Alanis Morissette'), (5, 'Alice In Chains')]


In [None]:
from langchain_ollama import ChatOllama

llm = ChatOllama(
    model="llama3.2",
    temperature=0,
)

In [30]:
from langchain.chat_models import init_chat_model

llm = init_chat_model(
    "llama3.2",
    model_provider="ollama",
    temperature=0.7,
    max_tokens=1024,
)

In [31]:
from langchain_community.agent_toolkits import SQLDatabaseToolkit

toolkit = SQLDatabaseToolkit(db=db, llm=llm)  # Replace None with your LLM instance

tools = toolkit.get_tools()

for tool in tools:
    print(f"Tool name: {tool.name}, description: {tool.description}")

Tool name: sql_db_query, description: Input to this tool is a detailed and correct SQL query, output is a result from the database. If the query is not correct, an error message will be returned. If an error is returned, rewrite the query, check the query, and try again. If you encounter an issue with Unknown column 'xxxx' in 'field list', use sql_db_schema to query the correct table fields.
Tool name: sql_db_schema, description: Input to this tool is a comma-separated list of tables, output is the schema and sample rows for those tables. Be sure that the tables actually exist by calling sql_db_list_tables first! Example Input: table1, table2, table3
Tool name: sql_db_list_tables, description: Input is an empty string, output is a comma-separated list of tables in the database.
Tool name: sql_db_query_checker, description: Use this tool to double check if your query is correct before executing it. Always use this tool before executing a query with sql_db_query!


In [27]:
system_prompt = """
You are an agent designed to interact with a SQL database.
Given an input question, create a syntactically correct {dialect} query to run,
then look at the results of the query and return the answer. Unless the user
specifies a specific number of examples they wish to obtain, always limit your
query to at most {top_k} results.

You can order the results by a relevant column to return the most interesting
examples in the database. Never query for all the columns from a specific table,
only ask for the relevant columns given the question.

You MUST double check your query before executing it. If you get an error while
executing a query, rewrite the query and try again.

DO NOT make any DML statements (INSERT, UPDATE, DELETE, DROP etc.) to the
database.

To start you should ALWAYS look at the tables in the database to see what you
can query. Do NOT skip this step.

Then you should query the schema of the most relevant tables.
""".format(
    dialect=db.dialect,
    top_k=5,
)

In [32]:
from langchain.agents import create_agent
agent = create_agent(
    llm,
    tools,
    system_prompt=system_prompt
)

In [None]:
from mypackage import userinput

user_query = userinput.get_user_input("Stelle deine Frage!", default="List the 10 longest tracks in the database.")


for step in agent.stream(
    {"messages": [{"role": "user", "content": user_query}]},
    stream_mode="values",
):
    step["messages"][-1].pretty_print()


List the 10 longest tracks in the database.
Tool Calls:
  sql_db_list_tables (9047c936-e542-4d67-b16c-6988f3a9104a)
 Call ID: 9047c936-e542-4d67-b16c-6988f3a9104a
  Args:
  sql_db_schema (4c0d88f5-47e7-4099-85a0-a16dec8b5e8f)
 Call ID: 4c0d88f5-47e7-4099-85a0-a16dec8b5e8f
  Args:
    table_names: track
  sql_db_query_checker (92bac3ac-4a09-4dca-af96-adf59108466b)
 Call ID: 92bac3ac-4a09-4dca-af96-adf59108466b
  Args:
    query: SELECT length(track_name) FROM track ORDER BY length(track_name) DESC LIMIT 10
Name: sql_db_query_checker

SELECT length(track_name) FROM track ORDER BY length(track_name) DESC LIMIT 10

In the database schema for 'track', there are no columns that can provide us with information on track length. The Track table contains columns such as track_id, album_id, artist_id, title, and duration (in milliseconds). We can use this to answer the question.
{"name": "sql_db_schema", "parameters": {"table_names":"['album', 'artist']"}}
