In [2]:
import boto3
import json

from langchain_community.utilities import SQLDatabase
from langchain.chains import create_sql_query_chain
from langchain_aws import ChatBedrock
from langchain_community.tools.sql_database.tool import QuerySQLDataBaseTool
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
from langchain.schema import StrOutputParser
from langchain.callbacks.tracers import ConsoleCallbackHandler
from langchain_core.prompts import FewShotPromptTemplate, PromptTemplate

In [8]:
### Connect DB & DB Info

db = SQLDatabase.from_uri("sqlite:///data/department_store.sqlite", sample_rows_in_table_info=3)
db.get_usable_table_names()

db.run("SELECT * FROM Customers LIMIT 10;")

context = db.get_context()
# print(list(context))
print(context["table_info"])


CREATE TABLE "Addresses" (
	address_id INTEGER, 
	address_details VARCHAR(255), 
	PRIMARY KEY (address_id)
)

/*
3 rows from Addresses table:
address_id	address_details
1	28481 Crist Circle
East Burdettestad, IA 21232
2	0292 Mitchel Pike
Port Abefurt, IA 84402-4249
3	4062 Mante Place
West Lindsey, DE 76199-8015
*/


CREATE TABLE "Customer_Addresses" (
	customer_id INTEGER NOT NULL, 
	address_id INTEGER NOT NULL, 
	date_from DATETIME NOT NULL, 
	date_to DATETIME, 
	PRIMARY KEY (customer_id, address_id), 
	FOREIGN KEY(customer_id) REFERENCES "Customers" (customer_id), 
	FOREIGN KEY(address_id) REFERENCES "Addresses" (address_id)
)

/*
3 rows from Customer_Addresses table:
customer_id	address_id	date_from	date_to
2	9	2017-12-11 05:00:22	2018-03-20 20:52:34
1	6	2017-10-07 23:00:26	2018-02-28 14:53:52
10	8	2017-04-04 20:00:27	2018-02-27 20:08:33
*/


CREATE TABLE "Customer_Orders" (
	order_id INTEGER, 
	customer_id INTEGER NOT NULL, 
	order_status_code VARCHAR(10) NOT NULL, 
	order_date DA

### Langchain을 통한 DB 연결

https://python.langchain.com/v0.1/docs/use_cases/sql/prompting/

In [4]:
region_name = "us-west-2"

model_kwargs = {  # anthropic
    "anthropic_version": "bedrock-2023-05-31",
    "max_tokens": 2048,
    "temperature": 0,
    "stop_sequences": ["\n\nHuman"]
}

llm = ChatBedrock(
    model_id="anthropic.claude-3-sonnet-20240229-v1:0",  # 파운데이션 모델 지정
    model_kwargs=model_kwargs,
    region_name=region_name,
    streaming=True,
    callbacks=[StreamingStdOutCallbackHandler()]
)  # Claude 속성 구성

### Fewshot

In [5]:
from langchain_core.output_parsers import JsonOutputParser
from langchain_core.pydantic_v1 import BaseModel, Field


class SQLOutput(BaseModel):
    question: str = Field(description="User Question")
    sql_query: str = Field(description="SQL Query to run")
    
output_parser = JsonOutputParser(pydantic_object=SQLOutput)

In [6]:
examples = [
    {
        "input": "What are the ids of the top three products that were purchased in the largest amount?", 
         "query": "SELECT product_id FROM product_suppliers ORDER BY total_amount_purchased DESC LIMIT 3"
    },
    {
        "input": "Give the ids of the three products purchased in the largest amounts.",
        "query": "SELECT product_id FROM product_suppliers ORDER BY total_amount_purchased DESC LIMIT 3"
    },
    {
        "input": "What are the product id and product type of the cheapest product?",
        "query": "SELECT product_id ,  product_type_code FROM products ORDER BY product_price LIMIT 1"
    },
    {
        "input": "Give the id and product type of the product with the lowest price.",
        "query": "SELECT product_id ,  product_type_code FROM products ORDER BY product_price LIMIT 1",
    },
    {
        "input": "Find the number of different product types.",
        "query": "SELECT count(DISTINCT product_type_code) FROM products"
    }
]

In [9]:
prefix_template = """
You are a SQLite expert. Given an input question, first create a syntactically correct {dialect} query to run, then look at the results of the query and return the answer to the input question.
Unless the user specifies in the question a specific number of examples to obtain, query for at most {top_k} results using the LIMIT clause as per SQLite. You can order the results to return the most informative data in the database.
Never query for all columns from a table. You must query only the columns that are needed to answer the question. Wrap each column name in double quotes (") to denote them as delimited identifiers.
Pay attention to use only the column names you can see in the tables below. Be careful to not query for columns that do not exist. Also, pay attention to which column is in which table.
Pay attention to use date('now') function to get the current date, if the question involves "today".

Only use the following tables:
{table_info}

Write an initial draft of the query. Then double check the {dialect} query for common mistakes, including:
- Using NOT IN with NULL values
- Using UNION when UNION ALL should have been used
- Using BETWEEN for exclusive ranges
- Data type mismatch in predicates
- Properly quoting identifiers
- Using the correct number of arguments for functions
- Casting to the correct data type
- Using the proper columns for joins

Below are a number of examples of questions and their corresponding SQL queries.
"""

suffix_template = """
User input: {input}
SQL query: 

{output_format_instruction}
SQL query part should be written in one line.

If you don't have any appropriate answer, just return json with empty string values.

Skip the preamble and go straight into json
"""

test = '''
CREATE TABLE "Addresses" (
	address_id INTEGER, 
	address_details VARCHAR(255), 
	PRIMARY KEY (address_id)
)
/*
Table containing the addresses of suppliers and customers.
*/

/*
3 rows from Addresses table:
address_id	address_details
1	28481 Crist Circle, East Burdettestad, IA 21232
2	0292 Mitchel Pike, Port Abefurt, IA 84402-4249
3	4062 Mante Place, West Lindsey, DE 76199-8015
*/

CREATE TABLE "Customer_Addresses" (
	customer_id INTEGER NOT NULL, 
	address_id INTEGER NOT NULL, 
	date_from DATETIME NOT NULL, 
	date_to DATETIME, 
	PRIMARY KEY (customer_id, address_id), 
	FOREIGN KEY(customer_id) REFERENCES "Customers" (customer_id), 
	FOREIGN KEY(address_id) REFERENCES "Addresses" (address_id)
)
/*
A table that maps customers to their addresses with the duration of each address association.
*/

/*
3 rows from Customer_Addresses table:
customer_id	address_id	date_from	date_to
2	9	2017-12-11 05:00:22	2018-03-20 20:52:34
1	6	2017-10-07 23:00:26	2018-02-28 14:53:52
10	8	2017-04-04 20:00:27	2018-02-27 20:08:33
*/

CREATE TABLE "Customer_Orders" (
	order_id INTEGER, 
	customer_id INTEGER NOT NULL, 
	order_status_code VARCHAR(10) NOT NULL, 
	order_date DATETIME NOT NULL, 
	PRIMARY KEY (order_id), 
	FOREIGN KEY(customer_id) REFERENCES "Customers" (customer_id)
)
/*
Table containing information about the orders placed by customers.
*/

/*
3 rows from Customer_Orders table:
order_id	customer_id	order_status_code	order_date
1	12	Completed	2018-02-10 15:44:48
2	4	New	2018-01-31 17:49:18
3	1	PartFilled	2018-02-26 12:39:33
*/

CREATE TABLE "Customers" (
	customer_id INTEGER, 
	payment_method_code VARCHAR(10) NOT NULL, 
	customer_code VARCHAR(20), 
	customer_name VARCHAR(80), 
	customer_address VARCHAR(255), 
	customer_phone VARCHAR(80), 
	customer_email VARCHAR(80), 
	PRIMARY KEY (customer_id)
)
/*
Table containing personal information and contact details of customers.
*/

/*
3 rows from Customers table:
customer_id	payment_method_code	customer_code	customer_name	customer_address	customer_phone	customer_email
1	Credit Card	401	Ahmed	75099 Tremblay Port Apt. 163, South Norrisland, SC 80546	254-072-4068x33935	margarett.vonrueden@example.com
2	Credit Card	665	Chauncey	8408 Lindsay Court, East Dasiabury, IL 72656-3552	+41(8)1897032009	stiedemann.sigrid@example.com
3	Direct Debit	844	Lukas	7162 Rodolfo Knoll Apt. 502, Lake Annalise, TN 35791-8871	197-417-3557	joelle.monahan@example.com
*/

CREATE TABLE "Department_Store_Chain" (
	dept_store_chain_id INTEGER, 
	dept_store_chain_name VARCHAR(80), 
	PRIMARY KEY (dept_store_chain_id)
)
/*
Table containing the names of department store chains.
*/

/*
3 rows from Department_Store_Chain table:
dept_store_chain_id	dept_store_chain_name
1	South
2	West
3	East
*/

CREATE TABLE "Department_Stores" (
	dept_store_id INTEGER, 
	dept_store_chain_id INTEGER, 
	store_name VARCHAR(80), 
	store_address VARCHAR(255), 
	store_phone VARCHAR(80), 
	store_email VARCHAR(80), 
	PRIMARY KEY (dept_store_id), 
	FOREIGN KEY(dept_store_chain_id) REFERENCES "Department_Store_Chain" (dept_store_chain_id)
)
/*
Table containing information about individual department stores including their chain affiliation.
*/

/*
3 rows from Department_Stores table:
dept_store_id	dept_store_chain_id	store_name	store_address	store_phone	store_email
1	1	store_name	01290 Jeremie Parkway Suite 753, North Arielle, MS 51249	(948)944-5099x2027	bmaggio@example.com
2	3	store_name	082 Purdy Expressway, O'Connellshire, IL 31732	877-917-5029	larissa10@example.org
3	4	store_name	994 Travis Plains, North Wadeton, WV 27575-3951	1-216-312-0375	alexandro.mcclure@example.net
*/

CREATE TABLE "Departments" (
	department_id INTEGER, 
	dept_store_id INTEGER NOT NULL, 
	department_name VARCHAR(80), 
	PRIMARY KEY (department_id), 
	FOREIGN KEY(dept_store_id) REFERENCES "Department_Stores" (dept_store_id)
)
/*
Table containing information about departments within department stores.
*/

/*
3 rows from Departments table:
department_id	dept_store_id	department_name
1	5	human resource
2	11	purchasing
3	4	marketing
*/

CREATE TABLE "Order_Items" (
	order_item_id INTEGER, 
	order_id INTEGER NOT NULL, 
	product_id INTEGER NOT NULL, 
	PRIMARY KEY (order_item_id), 
	FOREIGN KEY(product_id) REFERENCES "Products" (product_id), 
	FOREIGN KEY(order_id) REFERENCES "Customer_Orders" (order_id)
)
/*
Table containing details about items included in customer orders.
*/

/*
3 rows from Order_Items table:
order_item_id	order_id	product_id
1	9	7
2	1	3
3	5	2
*/

CREATE TABLE "Product_Suppliers" (
	product_id INTEGER NOT NULL, 
	supplier_id INTEGER NOT NULL, 
	date_supplied_from DATETIME NOT NULL, 
	date_supplied_to DATETIME, 
	total_amount_purchased VARCHAR(80), 
	total_value_purchased DECIMAL(19, 4), 
	PRIMARY KEY (product_id, supplier_id), 
	FOREIGN KEY(product_id) REFERENCES "Products" (product_id), 
	FOREIGN KEY(supplier_id) REFERENCES "Suppliers" (supplier_id)
)
/*
Table containing information about products and their suppliers.
*/

/*
3 rows from Product_Suppliers table:
product_id	supplier_id	date_supplied_from	date_supplied_to	total_amount_purchased	total_value_purchased
4	3	2017-06-19 00:49:05	2018-03-24 19:29:18	89366.05	36014.6000
8	4	2017-07-02 00:35:12	2018-03-25 07:30:49	25085.57	36274.5600
3	3	2017-10-14 19:15:37	2018-03-24 02:29:44	15752.45	7273.7400
*/

CREATE TABLE "Products" (
	product_id INTEGER, 
	product_type_code VARCHAR(10) NOT NULL, 
	product_name VARCHAR(80), 
	product_price DECIMAL(19, 4), 
	PRIMARY KEY (product_id)
)
/*
Table containing details about the products available.
*/

/*
3 rows from Products table:
product_id	product_type_code	product_name	product_price
1	Clothes	red jeans	734.7300
2	Clothes	yellow jeans	687.2300
3	Clothes	black jeans	695.1600
*/

CREATE TABLE "Staff" (
	staff_id INTEGER, 
	staff_gender VARCHAR(1), 
	staff_name VARCHAR(80), 
	PRIMARY KEY (staff_id)
)
/*
Table containing personal information about the staff.
*/

/*
3 rows from Staff table:
staff_id	staff_gender	staff_name
1	1	Tom
2	1	Malika
3	1	Katelynn
*/

CREATE TABLE "Staff_Department_Assignments" (
	staff_id INTEGER NOT NULL, 
	department_id INTEGER NOT NULL, 
	date_assigned_from DATETIME NOT NULL, 
	job_title_code VARCHAR(10) NOT NULL, 
	date_assigned_to DATETIME, 
	PRIMARY KEY (staff_id, department_id), 
	FOREIGN KEY(staff_id) REFERENCES "Staff" (staff_id), 
	FOREIGN KEY(department_id) REFERENCES "Departments" (department_id)
)
/*
Table mapping staff members to their department assignments and job titles.
*/

/*
3 rows from Staff_Department_Assignments table:
staff_id	department_id	date_assigned_from	job_title_code	date_assigned_to
5	4	2017-06-11 22:55:20	Department Manager	2018-03-23 21:59:11
10	5	2017-12-18 19:12:15	Sales Person	2018-03-23 20:25:24
1	5	2018-02-14 03:15:29	Clerical Staff	2018-03-24 19:57:56
*/

CREATE TABLE "Supplier_Addresses" (
	supplier_id INTEGER NOT NULL, 
	address_id INTEGER NOT NULL, 
	date_from DATETIME NOT NULL, 
	date_to DATETIME, 
	PRIMARY KEY (supplier_id, address_id), 
	FOREIGN KEY(supplier_id) REFERENCES "Suppliers" (supplier_id), 
	FOREIGN KEY(address_id) REFERENCES "Addresses" (address_id)
)

/*
Table containing the addresses of suppliers and customers.
*/

/*
3 rows from Supplier_Addresses table:
supplier_id	address_id	date_from	date_to
4	5	2016-09-22 16:41:31	2018-03-14 20:06:37
3	9	2014-11-07 19:18:49	2018-03-16 16:39:58
3	2	2008-11-22 12:01:25	2018-03-02 19:50:22
*/


CREATE TABLE "Suppliers" (
	supplier_id INTEGER, 
	supplier_name VARCHAR(80), 
	supplier_phone VARCHAR(80), 
	PRIMARY KEY (supplier_id)
)

/*
Table containing the name and phone of suppliers 
*/

/*
3 rows from Suppliers table:
supplier_id	supplier_name	supplier_phone
1	Lidl	(692)009-5928
2	AB Store	1-483-283-4742
3	Tesco	287-071-1153x254
*/

'''

example_prompt = PromptTemplate.from_template("User input: {input}\nSQL query: {query}")
prompt = FewShotPromptTemplate(
    examples=examples[:5],
    example_prompt=example_prompt,
    prefix=prefix_template,
    suffix=suffix_template,
    input_variables=["input", "top_k", "dialect"],
    partial_variables={
        "output_format_instruction": output_parser.get_format_instructions(), 
        "table_info": test
    }
)

In [12]:
write_query = create_sql_query_chain(llm, db, prompt)
write_query.get_prompts()[0].pretty_print()


You are a SQLite expert. Given an input question, first create a syntactically correct sqlite query to run, then look at the results of the query and return the answer to the input question.
Unless the user specifies in the question a specific number of examples to obtain, query for at most 5 results using the LIMIT clause as per SQLite. You can order the results to return the most informative data in the database.
Never query for all columns from a table. You must query only the columns that are needed to answer the question. Wrap each column name in double quotes (") to denote them as delimited identifiers.
Pay attention to use only the column names you can see in the tables below. Be careful to not query for columns that do not exist. Also, pay attention to which column is in which table.
Pay attention to use date('now') function to get the current date, if the question involves "today".

Only use the following tables:

CREATE TABLE "Addresses" (
	address_id INTEGER, 
	address_deta

### 잘되는 쿼리 실행

In [13]:
question = "How many customers are there?"

In [14]:
from operator import itemgetter
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import PromptTemplate
from langchain_core.runnables import RunnablePassthrough

chain = write_query | output_parser
result = chain.invoke({"question": question})

query_result = db.run(result["sql_query"])
query_result

{"question": "How many customers are there?", "sql_query": "SELECT COUNT(*) FROM \"Customers\""}

'[(15,)]'

In [23]:
result

{'question': 'How many customers are there?',
 'sql_query': 'SELECT COUNT(*) FROM "Customers"'}

In [21]:
chain.invoke({"question": question})

{"question": "How many customers are there?", "sql_query": "SELECT COUNT(*) FROM \"Customers\""}

{'question': 'How many customers are there?',
 'sql_query': 'SELECT COUNT(*) FROM "Customers"'}

In [24]:


advanced_query_generation_template='''

Human: You are a expert.
Given the following user Question, corresponding SQLQuery, and SQLResult, Answer the user Question.
|
Question: {question}
SQL Query: {query}
SQL Result: {result}
Answer: 

Assistant: 
'''

answer_prompt = PromptTemplate.from_template(
    advanced_query_generation_template,
    partial_variables={"query": result["sql_query"], "result": query_result}
)

answer = answer_prompt | llm | StrOutputParser()

response = answer.invoke({"question": question})

print ("\n================")
print ("\n\nRESPONSE:\n")
print (response)

{"question": "How many customers are there?", "sql_query": "SELECT COUNT(*) FROM \"Customers\""}Based on the provided SQL query `SELECT COUNT(*) FROM "Customers"` and the SQL result `[(15,)]`, the answer to the question "How many customers are there?" is:

Answer: There are 15 customers.

The `COUNT(*)` function in SQL is used to count the number of rows in a table. When applied to the "Customers" table, it returns the total number of rows, which represents the number of customers. The result `[(15,)]` indicates that the count is 15, so there are 15 customers in the database.


RESPONSE:

Based on the provided SQL query `SELECT COUNT(*) FROM "Customers"` and the SQL result `[(15,)]`, the answer to the question "How many customers are there?" is:

Answer: There are 15 customers.

The `COUNT(*)` function in SQL is used to count the number of rows in a table. When applied to the "Customers" table, it returns the total number of rows, which represents the number of customers. The result `[

## Evaluation QA set 생성

In [16]:
## 전체 테스트 데이터에서 target db 관련된것만 추출 
target_db = 'department_store'
with open('./evaluation/train_data/train_spider.json', 'rb') as ofp:
    train = json.load(ofp)

res = []
for t in train:
    if t['db_id'] == target_db:
        res.append((t['question'], t['query']))


# question file 저장할 파일
question_path = './evaluation/question.txt'

# answer file  저장할 파일
answer_path = './evaluation/answer.txt'

# 파일1과 파일2에 데이터를 쓰는 함수
def write_to_files(data, file1_path, file2_path):
    with open(file1_path, 'w') as file1, open(file2_path, 'w') as file2:
        for item in data:
            file1.write(item[0] + '\n')
            file2.write(item[1]+f'\t{target_db}' + '\n')

# 함수 호출하여 파일 생성
write_to_files(res, question_path, answer_path)

print(f"Data has been written to {question_path} and {answer_path}")


Data has been written to ./evaluation/question.txt and ./evaluation/answer.txt


### pred data 생성

In [24]:
import re

def normalize_sql(query):
    # Remove double quotes from column and table names
    query = re.sub(r'"(\w+)"', r'\1', query)
    # Normalize whitespace
    query = re.sub(r'\s+', ' ', query).strip()
    return query



# question file 저장할 파일
question_path = './evaluation/question.txt'

# answer file  저장할 파일
answer_path = './evaluation/answer.txt'

with open(question_path) as f:
    qlist = [l.strip().split('\t')[0] for l in f.readlines() if len(l.strip()) > 0]


test = False
file_prefix = 'All_ver2'
if test:
    qlist = qlist[:5]
    

In [17]:
import re

def normalize_sql(query):
    # Remove double quotes from column and table names
    query = re.sub(r'"(\w+)"', r'\1', query)
    # Normalize whitespace
    query = re.sub(r'\s+', ' ', query).strip()
    return query



# question file 저장할 파일
question_path = './evaluation/question.txt'

# answer file  저장할 파일
answer_path = './evaluation/answer.txt'

with open(question_path) as f:
    qlist = [l.strip().split('\t')[0] for l in f.readlines() if len(l.strip()) > 0]


test = False
file_prefix = 'no_fewshot_yes_description'
if test:
    qlist = qlist[:5]
    

## Get score

In [18]:
pred = []

for q in qlist:
    print('question===>', q)
    p = chain.invoke({"question": q})
    pred.append(normalize_sql(p['sql_query']))

## save file

pred_path = f'./evaluation/{file_prefix}_pred.txt'
print(pred_path)
with open(pred_path, 'w') as file1:
    for item in pred:
        file1.write(item + '\n')


question===> What are the ids of the top three products that were purchased in the largest amount?
{"question": "What are the ids of the top three products that were purchased in the largest amount?", "sql_query": "SELECT \"product_id\" FROM \"Product_Suppliers\" ORDER BY \"total_amount_purchased\" DESC LIMIT 3;"}question===> Give the ids of the three products purchased in the largest amounts.
{"question": "Give the ids of the three products purchased in the largest amounts.", "sql_query": "SELECT \"product_id\" FROM \"Product_Suppliers\" ORDER BY \"total_amount_purchased\" DESC LIMIT 3;"}question===> What are the product id and product type of the cheapest product?
{"question": "What are the product id and product type of the cheapest product?", "sql_query": "SELECT \"product_id\", \"product_type_code\" FROM \"Products\" ORDER BY \"product_price\" LIMIT 1;"}question===> Give the id and product type of the product with the lowest price.
{"question": "Give the id and product type of the

{"question": "What are the distinct names of customers with an order status of Pending, sorted by customer id?", "sql_query": "SELECT DISTINCT \"customer_name\" FROM \"Customers\" c JOIN \"Customer_Orders\" o ON c.\"customer_id\" = o.\"customer_id\" WHERE o.\"order_status_code\" = 'Pending' ORDER BY c.\"customer_id\";"}question===> Find the name and address of the customers who have both New and Pending orders.
{"question": "Find the name and address of the customers who have both New and Pending orders.", "sql_query": "SELECT c.customer_name, c.customer_address FROM Customers c JOIN Customer_Orders co ON c.customer_id = co.customer_id WHERE co.order_status_code IN ('New', 'PartFilled') GROUP BY c.customer_id HAVING COUNT(DISTINCT co.order_status_code) = 2;"}question===> What are the names and addressed of customers who have both New and Pending orders?
{
  "question": "What are the names and addressed of customers who have both New and Pending orders?",
  "sql_query": "SELECT c.custom

{"question": "What are the distinct ids of customers who made an order after any order that was Cancelled?", "sql_query": "SELECT DISTINCT c.customer_id FROM Customers c JOIN Customer_Orders co ON c.customer_id = co.customer_id WHERE co.order_date > (SELECT MAX(order_date) FROM Customer_Orders WHERE order_status_code = 'Cancelled')"}question===> What is id of the staff who had a Staff Department Assignment earlier than any Clerical Staff?
{"question": "What is id of the staff who had a Staff Department Assignment earlier than any Clerical Staff?", "sql_query": "SELECT staff_id FROM Staff_Department_Assignments WHERE date_assigned_from < (SELECT MIN(date_assigned_from) FROM Staff_Department_Assignments WHERE job_title_code = 'Clerical Staff') AND job_title_code <> 'Clerical Staff' ORDER BY date_assigned_from LIMIT 1;"}question===> Return the id of the staff whose Staff Department Assignment was earlier than that of any Clerical Staff.
{"question": "Return the id of the staff whose Staff

{"question": "Return the average total amount purchased and total value purchased for the supplier who supplies the greatest number of products.", "sql_query": "SELECT AVG(total_amount_purchased) AS avg_total_amount_purchased, AVG(total_value_purchased) AS avg_total_value_purchased FROM product_suppliers WHERE supplier_id = (SELECT supplier_id FROM product_suppliers GROUP BY supplier_id ORDER BY COUNT(product_id) DESC LIMIT 1);"}question===> What is the largest and smallest customer codes?
{"question": "What is the largest and smallest customer codes?", "sql_query": "SELECT MAX(\"customer_code\") AS largest_code, MIN(\"customer_code\") AS smallest_code FROM \"Customers\""}question===> Return the maximum and minimum customer codes.
{"question": "Return the maximum and minimum customer codes.", "sql_query": "SELECT MAX(\"customer_code\") AS max_customer_code, MIN(\"customer_code\") AS min_customer_code FROM \"Customers\""}question===> List the names of all the distinct customers who boug

{"question": "Find the average price of all product clothes.", "sql_query": "SELECT AVG(\"product_price\") FROM \"Products\" WHERE \"product_type_code\" = 'Clothes'"}question===> What is the average price of clothes?
{"question": "What is the average price of clothes?", "sql_query": "SELECT AVG(\"product_price\") FROM \"Products\" WHERE \"product_type_code\" = 'Clothes'"}question===> Find the name of the most expensive hardware product.
{"question": "Find the name of the most expensive hardware product.", "sql_query": "SELECT \"product_name\" FROM \"Products\" WHERE \"product_type_code\" = 'Hardware' ORDER BY \"product_price\" DESC LIMIT 1;"}question===> What is the name of the hardware product with the greatest price?
{"question": "What is the name of the hardware product with the greatest price?", "sql_query": "SELECT \"product_name\" FROM \"Products\" WHERE \"product_type_code\" = 'Hardware' ORDER BY \"product_price\" DESC LIMIT 1;"}./evaluation/no_fewshot_yes_description_pred.txt


In [21]:
import subprocess

# Python 파일 실행 명령어
command = f'''python evaluation.py --gold '{answer_path}' --pred '{pred_path}' --db './data/database' --table './data/tables.json' --etype exec '''

print(command)
# subprocess 모듈을 사용하여 명령어 실행
result = subprocess.run(command, shell=True, capture_output=True, text=True)

# 출력 결과 확인
print(result.stdout)
print(result.stderr)


python evaluation.py --gold './evaluation/answer.txt' --pred './evaluation/no_fewshot_yes_description_pred.txt' --db './data/database' --table './data/tables.json' --etype exec 
[pred]
[('7308 Joan Lake Suite 346\nLizethtown, DE 56522',)]
SELECT customer_address FROM Customers WHERE customer_id = 10;
[gold]
[("36594 O'Keefe Lock\nNew Cali, RI 42319",)]
SELECT T1.address_details FROM addresses AS T1 JOIN customer_addresses AS T2 ON T1.address_id  =  T2.address_id WHERE T2.customer_id  =  10
eval_err_num:1
------------------
[pred]
[('7308 Joan Lake Suite 346\nLizethtown, DE 56522',)]
SELECT c.customer_address FROM Customers c WHERE c.customer_id = 10 LIMIT 1;
[gold]
[("36594 O'Keefe Lock\nNew Cali, RI 42319",)]
SELECT T1.address_details FROM addresses AS T1 JOIN customer_addresses AS T2 ON T1.address_id  =  T2.address_id WHERE T2.customer_id  =  10
eval_err_num:2
------------------
[pred]
[(13, 3)]
SELECT product_id, COUNT(*) AS order_count FROM Order_Items GROUP BY product_id ORDER BY 

In [9]:
a = db.run(''' SELECT customer_id, customer_name FROM Customers WHERE customer_address LIKE '%WY%' AND payment_method_code <> 'Credit Card' ''')

In [10]:
b = db.run(''' SELECT customer_id ,  customer_name FROM customers WHERE customer_address LIKE "%WY%" AND payment_method_code != "Credit Card" ''')
