## SQL AGENT

In [1]:
import warnings
import os
from dotenv import load_dotenv
load_dotenv()

warnings.filterwarnings("ignore")
load_dotenv()


True

In [None]:
from langchain_community.utilities.sql_database import SQLDatabase
import os

current_dir = os.getcwd()

db_path = os.path.join(current_dir, "data", "northwind.sqlite")
print(db_path)


if not os.path.exists(db_path):
    raise Exception("데이터베이스 파일이 존재하지 않습니다.")

# SQLite 데이터베이스 URI 생성
db_uri = f"sqlite:///{db_path}"

# LangChain SQLDatabase 객체 생성
db = SQLDatabase.from_uri(db_uri, sample_rows_in_table_info=0)

print(db)

print("Databases dialect:", db.dialect)

# # 사용가능한 테이블 이름
# print("Usable table names:", db.get_table_names())

# employees 테이블에서 모든 데이터 조회
# result = db.run("SELECT * FROM employees")
# print(result)

## OpenAI langchain text to SQL

In [3]:
from langchain_community.utilities.sql_database import SQLDatabase
from langchain.chains import create_sql_query_chain
from langchain_openai import ChatOpenAI
import os


llm = ChatOpenAI(temperature=0)
chain = create_sql_query_chain(llm, db)
response = chain.invoke({"question": "처음 10명의 직원의 이름을 표시해줘"})

response

'SELECT "employee_id", "first_name", "last_name" \nFROM employees \nLIMIT 10;'

In [4]:
db.run(response)

"[('employee_id', 'Nancy', 'Freehafer'), ('employee_id', 'Andrew', 'Cencini'), ('employee_id', 'Jan', 'Kotas'), ('employee_id', 'Mariya', 'Sergienko'), ('employee_id', 'Steven', 'Thorpe'), ('employee_id', 'Michael', 'Neipper'), ('employee_id', 'Robert', 'Zare'), ('employee_id', 'Laura', 'Giussani'), ('employee_id', 'Anne', 'Hellung-Larsen')]"

In [5]:
response = chain.invoke({"question": "직원은 모두 몇명이야?"})
db.run(response)

'[(9,)]'

In [6]:
# SQL 데이터베이스와 상호 작용하기 위한 도구
from langchain_community.tools.sql_database.tool import QuerySQLDataBaseTool

execute_query = QuerySQLDataBaseTool(db=db)

# LLM 써서 자연어 질문을 SQL 쿼리로 변환하는 체인 생성
write_query = create_sql_query_chain(llm, db)

# SQL 쿼리 생성과 실행을하는 체인 생성|
chain = write_query | execute_query

chain.invoke({"question": "직원은 모두 몇명이야?"})

'[(9,)]'

In [7]:
# LangChain agent를 활용해서 SQL query를 실행하고 결과를 출력하는 코드

# SQL 데이터베이스와 상호작용할 수 있는 agent를 생성하는 함수를 임포트합니다
from langchain_community.agent_toolkits import create_sql_agent

# SQL agent를 생성합니다
agent_executor = create_sql_agent(
    llm,  # 언어 모델 (Large Language Model)
    db=db,  # 연결할 데이터베이스
    agent_type="openai-tools",  # agent 유형을 OpenAI 도구로 지정
    verbose=True,  # 상세한 출력을 비활성화
)

In [8]:
response = agent_executor.invoke("직원이 많이 있는 지역은?")["output"]
print("Generated Query:", response)



[1m> Entering new SQL Agent Executor chain...[0m
[32;1m[1;3m
Invoking: `sql_db_list_tables` with `{}`


[0m[38;5;200m[1;3mcustomers, employee_privileges, employees, inventory_transaction_types, inventory_transactions, invoices, order_details, order_details_status, orders, orders_status, orders_tax_status, privileges, products, purchase_order_details, purchase_order_status, purchase_orders, sales_reports, shippers, strings, suppliers[0m[32;1m[1;3m
Invoking: `sql_db_schema` with `{'table_names': 'employees'}`


[0m[33;1m[1;3m
CREATE TABLE employees (
)[0m[32;1m[1;3m
Invoking: `sql_db_query` with `{'query': 'SELECT COUNT(*) AS employee_count, city FROM employees GROUP BY city ORDER BY employee_count DESC LIMIT 10'}`
responded: It seems like the schema information for the `employees` table is not available. Let me try querying the table again to get more details.

[0m[36;1m[1;3m[(4, 'Seattle'), (3, 'Redmond'), (1, 'Kirkland'), (1, 'Bellevue')][0m[32;1m[1;3mThe regio

In [9]:
# 주문이 가장 많은 지역을 찾는 질문을 영어로
response = agent_executor.invoke("What is the region with the most orders?")["output"]
print("Generated Query:", response)



[1m> Entering new SQL Agent Executor chain...[0m
[32;1m[1;3m
Invoking: `sql_db_list_tables` with `{}`


[0m[38;5;200m[1;3mcustomers, employee_privileges, employees, inventory_transaction_types, inventory_transactions, invoices, order_details, order_details_status, orders, orders_status, orders_tax_status, privileges, products, purchase_order_details, purchase_order_status, purchase_orders, sales_reports, shippers, strings, suppliers[0m[32;1m[1;3m
Invoking: `sql_db_schema` with `{'table_names': 'orders, customers'}`


[0m[33;1m[1;3m
CREATE TABLE customers (
)


CREATE TABLE orders (
)[0m[32;1m[1;3m
Invoking: `sql_db_query` with `{'query': 'SELECT region, COUNT(*) AS order_count FROM orders GROUP BY region ORDER BY order_count DESC LIMIT 1'}`
responded: I will query the "orders" table to find the region with the most orders.

[0m[36;1m[1;3mError: (sqlite3.OperationalError) no such column: region
[SQL: SELECT region, COUNT(*) AS order_count FROM orders GROUP BY region

In [10]:
# 필요한 열 정보가 있다고 가정한 올바른 쿼리
correct_query = """
SELECT customers.city, COUNT(orders.id) AS order_count
FROM customers
JOIN orders ON customers.id = orders.customer_id
GROUP BY customers.city
ORDER BY order_count DESC
LIMIT 1;
"""
result = db.run(correct_query)
print("Correct Query Result:", result)


Correct Query Result: [('Portland', 6)]


In [11]:
from langchain_community.agent_toolkits import create_sql_agent
from langchain_community.utilities.sql_database import SQLDatabase
from langchain_openai import ChatOpenAI
import os
from  dotenv import load_dotenv

load_dotenv()

# 현재 디렉토리 경로
current_dir = os.getcwd()

# 데이터베이스 파일 경로
db_path = os.path.join(current_dir, "data", "northwind.sqlite")

# SQLite 데이터베이스 URI 생성
db_uri = f"sqlite:///{db_path}"

# LangChain SQLDatabase 객체 생성
db = SQLDatabase.from_uri(db_uri)

# LLM 모델 초기화
llm = ChatOpenAI(temperature=0)

# 수동으로 스키마 정보 제공
schema_info = """
customers table:
- id (integer, primary key)
- company (varchar)
- last_name (varchar)
- first_name (varchar)
- email_address (varchar)
- job_title (varchar)
- business_phone (varchar)
- home_phone (varchar)
- mobile_phone (varchar)
- fax_number (varchar)
- address (text)
- city (varchar)
- state_province (varchar)
- zip_postal_code (varchar)
- country_region (varchar)

orders table:
- id (integer, primary key)
- employee_id (integer, foreign key to employees.id)
- customer_id (integer, foreign key to customers.id)
- order_date (datetime)
- shipped_date (datetime)
- shipper_id (integer, foreign key to shippers.id)
- ship_name (varchar)
- ship_address (text)
- ship_city (varchar)
- ship_state_province (varchar)
- ship_zip_postal_code (varchar)
- ship_country_region (varchar)
- shipping_fee (decimal)
- taxes (decimal)
- payment_type (varchar)
- paid_date (datetime)
- notes (text)
- tax_rate (double)
- tax_status_id (tinyint)
- status_id (tinyint)
"""

# SQL agent를 생성합니다
agent_executor = create_sql_agent(
    llm,
    db=db,
    agent_type="openai-tools",
    verbose=True,
    top_k=10,
    extra_tools=[],
    suffix=f"When analyzing the database, use this schema information:\n{schema_info}\n\nHuman: ",
)

# 주문이 가장 많은 지역을 찾는 질문
response = agent_executor.invoke("v")["output"]
print("Generated Query:", response)

# 실제 쿼리 실행
correct_query = """
SELECT customers.city, COUNT(orders.id) AS order_count
FROM customers
JOIN orders ON customers.id = orders.customer_id
GROUP BY customers.city
ORDER BY order_count DESC
LIMIT 1;
"""
result = db.run(correct_query)
print("Correct Query Result:", result)



[1m> Entering new SQL Agent Executor chain...[0m
[32;1m[1;3m
Invoking: `sql_db_query` with `{'query': 'SELECT city, COUNT(*) as order_count FROM customers JOIN orders ON customers.id = orders.customer_id GROUP BY city ORDER BY order_count DESC LIMIT 10'}`


[0m[36;1m[1;3m[('Portland', 6), ('Milwaukee', 6), ('Chicago', 6), ('New York', 5), ('Miami', 4), ('Memphis', 4), ('Las Vegas', 4), ('Denver', 4), ('Los Angelas', 3), ('Seattle', 2)][0m[32;1m[1;3mThe regions with the highest number of orders are:
1. Portland - 6 orders
2. Milwaukee - 6 orders
3. Chicago - 6 orders
4. New York - 5 orders
5. Miami - 4 orders
6. Memphis - 4 orders
7. Las Vegas - 4 orders
8. Denver - 4 orders
9. Los Angeles - 3 orders
10. Seattle - 2 orders[0m

[1m> Finished chain.[0m
Generated Query: The regions with the highest number of orders are:
1. Portland - 6 orders
2. Milwaukee - 6 orders
3. Chicago - 6 orders
4. New York - 5 orders
5. Miami - 4 orders
6. Memphis - 4 orders
7. Las Vegas - 4 orders