In [1]:
#SQL 예시 코드
import sqlite3
from pprint import pprint


conn = sqlite3.connect("Chinook_Sqlite.sqlite")
cursor = conn.cursor()

question = "가장 많이 판매된 앨범 이름은?"

sql = """
SELECT Album.Title, COUNT(InvoiceLine.InvoiceLineId) AS SalesCount
FROM InvoiceLine
JOIN Track ON InvoiceLine.TrackId = Track.TrackId
JOIN Album ON Track.AlbumId = Album.AlbumId
GROUP BY Album.AlbumId
ORDER BY SalesCount DESC
LIMIT 1;
"""

cursor.execute(sql)
result = cursor.fetchall()

print(question)
pprint(result)

conn.close()

가장 많이 판매된 앨범 이름은?
[('Minha Historia', 27)]


In [2]:
from langgraph.prebuilt import create_react_agent
# from openai import AzureOpenAI
import os
from dotenv import load_dotenv
from langchain_openai import AzureChatOpenAI
from langchain_ollama import ChatOllama
from pydantic import BaseModel
from langchain.tools import tool
from typing import List, Dict, Any, Optional
from dataclasses import dataclass
import re
from langchain_core.messages import HumanMessage

load_dotenv()

# Azure OpenAI 클라이언트 초기화
llm_selector = AzureChatOpenAI(
    azure_deployment=os.getenv("SKCC_AZURE_MODEL"),#"gpt-4o",
    azure_endpoint=os.getenv("SKCC_AZURE_ENDPOINT"),
    api_key=os.getenv("SKCC_AZURE_OPENAI_KEY"),
    api_version="2024-02-15-preview",
    temperature=0.7
)
#llm_selector = ChatOllama(model="qwen3:4b", temperature=0.2, format="json")
import json
# with open('Chinook_Column_Config.json', 'r', encoding='utf-8') as f:

#     json_data = json.load(f)

# global full_schema_json

# full_schema_json = json_data

@dataclass
class TableSchema:
    table_name: str
    columns: List[str]
    descriptions: Dict[str, str] = None
    samples: List[Dict[str, Any]] = None

def _tokenize(s: str) -> List[str]:
    return re.findall(r"[A-Za-z0-9_]+", s.lower())

def _score(text: str, q_tokens: List[str], keyword_map: Dict[str, List[str]], relation_bonus_map: Dict[str, List[str]]) -> int:
    base_score = sum(1 for qt in q_tokens if qt in text.lower())

    # keyword_map = {
    #     "판매": ["invoice", "invoiceline", "order", "payment", "track", "album"],
    #     "앨범": ["album", "track", "invoice"],
    #     "음악": ["track", "artist", "album"],
    #     "고객": ["customer", "invoice"],
    #     "직원": ["employee", "supportrep"],
    # }
    # relation_bonus_map = {
    #     "invoice": ["invoiceline", "track"],
    #     "track": ["album", "invoiceline"],
    #     "album": ["track", "artist"],
    # }

    semantic_bonus = 0
    for qt in q_tokens:
        for k, synonyms in keyword_map.items():
            if qt == k and any(word in text.lower() for word in synonyms):
                semantic_bonus += 2

    for key, related in relation_bonus_map.items():
        if key in text.lower() and any(r in text.lower() for r in related):
            semantic_bonus += 1

    return base_score + semantic_bonus


def _table_text_blob(ts: TableSchema) -> str:
    col_part = " ".join(ts.columns)
    desc_part = " ".join([(ts.descriptions or {}).get(c, "") for c in ts.columns])
    return f"{ts.table_name} {col_part} {desc_part}".lower()

class FindTablesInput(BaseModel):
    query: str
    top_k: int = 5
    full_schema_json: List[Dict[str, Any]]

class FindTablesOutput(BaseModel):
    table_name: str
    score: float

@tool("find_relevant_tables", args_schema=FindTablesInput, return_direct=False)
def find_relevant_tables(query: str, full_schema_json: List[Dict[str, Any]], top_k: int = 5) -> List[FindTablesOutput]:
    """
    사용자 질문(query)과 DB 스키마(db_schema)를 입력받아 관련도가 높은 테이블 top_k개를 반환
    반환 형식: [{"table_name": "...", "score": 7}, ...]
    """

    q_tokens = _tokenize(query)
    scored = []

    for t in full_schema_json:
        ts = TableSchema(
            table_name=t["table_name"],
            columns=t.get("column_names", []),
            descriptions={c: d for c, d in zip(t.get("column_names", []), t.get("description", []) or [])},
            samples=t.get("sample_rows", []),
        )
        sc = _score(_table_text_blob(ts), q_tokens)
        scored.append({"table_name": ts.table_name, "score": sc})

    scored.sort(key=lambda x: x["score"], reverse=True)
    return scored[:top_k]

class GetColumnsInput(BaseModel):
    table_name: str
    full_schema_json: List[Dict[str, Any]]


@tool("get_table_columns", args_schema=GetColumnsInput, return_direct=False)
def get_table_columns(table_name: str, full_schema_json: List[Dict[str, Any]]) -> Dict[str, Any]:
    """
    단일 테이블의 컬럼, 타입, 설명을 반환.
    """
    for t in full_schema_json:
        if t["table_name"].lower() == table_name.lower():
            return {
                "table_name": t["table_name"],
                "columns": t.get("column_names", []),
                "types": t.get("column_types", []),
                "descriptions": t.get("description", []),
                "samples": t.get("sample_rows", [])
            }
    return {"table_name": table_name, "error": "not found"}

SELECTOR_SYSTEM_PROMPT = """
당신은 SQLite Text-to-SQL용 MAC-SQL Selector Agent입니다.
당신의 임무는 **사용자 질문과 전체 DB 스키마(JSON 리스트)**를 기반으로 관련된 테이블과 컬럼만 선택하는 것입니다.
입력으로 주어진 'query'에 대해 'full_schema_json', 'table_relation', 'join_hint', 'keyword_map', 'relation_bonus_map' 를 참고하여 어떤 데이터가 연결되어야 하는지 고려하세요.


당신은 다음 두 가지 도구를 사용할 수 있습니다:
1) find_relevant_tables: 관련 테이블 Top-K 후보 검색
2) get_table_columns: 특정 테이블의 컬럼 및 설명 확인

모든 결과는 아래 형식의 JSON으로만 반환해야 합니다:
{
  "selected_tables": ["TBL1", "TBL2", ...],
  "selected_columns": {
    "TBL1": ["col_a", "col_b"],
    "TBL2": ["col_x"]
  },
  "rationale": "왜 이 테이블과 컬럼을 선택했는지 한국어로 설명",
  "confidence": 0.0_to_1.0
}

규칙:
- 정확성을 우선합니다 (적지만 핵심적인 테이블만 선택)
- DB 스키마에 존재하지 않는 테이블/컬럼은 절대 생성하지 마세요
- 출력은 반드시 JSON 형식만 (백틱, 문장 불가)
- 모든 설명은 한국어로 작성하세요.
"""

selector_tools = [find_relevant_tables, get_table_columns]

selector_agent = create_react_agent(
    llm_selector,
    tools=selector_tools,
    prompt=SELECTOR_SYSTEM_PROMPT,
)

#nl_question = "각 국가의 최신 통화코드를 알려줘"
#nl_question = "How did each salesperson's annual total sales compare to their annual sales quota? Provide the difference between their total sales and the quota for each year, organized by salesperson and year."
#nl_question = "가장 많이 판매된 앨범 이름은?"





#result = selector_agent.invoke({
#    "messages": [
#        HumanMessage(content=f"query:\n{nl_question}")
#    ]
#})

C:\Users\worb1\AppData\Local\Temp\ipykernel_12104\2804277544.py:161: LangGraphDeprecatedSinceV10: create_react_agent has been moved to `langchain.agents`. Please update your import to `from langchain.agents import create_agent`. Deprecated in LangGraph V1.0 to be removed in V2.0.
  selector_agent = create_react_agent(


In [3]:

llm_decomposer = AzureChatOpenAI(
    azure_deployment=os.getenv("SKCC_AZURE_MODEL"),#"gpt-4o",
    azure_endpoint=os.getenv("SKCC_AZURE_ENDPOINT"),
    api_key=os.getenv("SKCC_AZURE_OPENAI_KEY"),
    api_version="2024-02-15-preview",
    temperature=0.4
)


DECOMPOSER_SYSTEM_PROMPT = """
당신은 Text-to-SQL 시스템의 Decomposer Agent입니다.
입력된 자연어 질문을 SQL로 작성하기 위한 **논리적 단계(step)** 로 분해하세요.

입력으로 주어진 'related_tables'를 참고하여 어떤 데이터가 연결되어야 하는지 고려하세요.

출력 형식(JSON만 허용):
{
  "decomposition_steps": [
    {"step": 1, "subquery": "첫 번째 단계 설명"},
    {"step": 2, "subquery": "두 번째 단계 설명"},
    ...
  ],
  "reasoning": "왜 이런 단계로 분해했는지",
  "confidence": 0.0_to_1.0
}

규칙:
- 모든 단계는 SQL 작성의 논리적 순서(집계 → 정렬 → 필터 → 출력)에 따라 작성하세요.
- 불필요한 말이나 백틱(``) 없이, JSON만 출력하세요.
- 관련 테이블 간의 조인을 명확히 고려하세요.
- 모든 설명은 한국어로 작성하세요.
"""

decomposer_agent = create_react_agent(
    llm_decomposer,
    tools=[],  
    prompt=DECOMPOSER_SYSTEM_PROMPT,
)


C:\Users\worb1\AppData\Local\Temp\ipykernel_12104\245651428.py:34: LangGraphDeprecatedSinceV10: create_react_agent has been moved to `langchain.agents`. Please update your import to `from langchain.agents import create_agent`. Deprecated in LangGraph V1.0 to be removed in V2.0.
  decomposer_agent = create_react_agent(


In [4]:
llm_composer = AzureChatOpenAI(
    azure_deployment=os.getenv("SKCC_AZURE_MODEL"),#"gpt-4o",
    azure_endpoint=os.getenv("SKCC_AZURE_ENDPOINT"),
    api_key=os.getenv("SKCC_AZURE_OPENAI_KEY"),
    api_version="2024-02-15-preview",
    temperature=0.4
)

# Chinook Database의 주요 관계(Join 관계) 관계:
# - Album.ArtistId → Artist.ArtistId
# - Track.AlbumId → Album.AlbumId
# - Track.GenreId → Genre.GenreId
# - InvoiceLine.TrackId → Track.TrackId
# - InvoiceLine.InvoiceId → Invoice.InvoiceId
# - Invoice.CustomerId → Customer.CustomerId
COMPOSER_PROMPT = """
당신은 Text-to-SQL Composer Agent입니다.
입력된 자연어 질문, decomposition_steps, 그리고 related_tables 정보를 기반으로
SQLite 데이터베이스에서 실행 가능한 SQL 쿼리를 작성하세요.

출력은 반드시 JSON 형식만으로 출력하며, 코드블록(```)이나 'json' 문자열은 절대 포함하지 마세요.

출력 형식(JSON만 허용):
{
  "sql": "SELECT ...",
  "rationale": "쿼리 구성 이유",
  "confidence": 0.0~1.0
}



규칙:
- SQLite 문법을 따르세요.
- 키워드는 대문자(SELECT, FROM, JOIN, WHERE, GROUP BY, ORDER BY, LIMIT 등).
- Table alias 사용 가능 (예: a, t, i).
- 문자열 결합은 `||` 연산자를 사용하세요. (예: FirstName || ' ' || LastName)
- 날짜 비교는 문자열(YYYY-MM-DD) 비교로 처리하세요. (SQLite는 TEXT 타입 날짜를 지원)
- 질문에 '가장 많이 판매된', '상위', 'Top' 등이 포함되면 반드시 GROUP BY + COUNT + ORDER BY + LIMIT 절을 사용하세요.
- LIMIT 절로 결과 개수를 제한하세요. (예: LIMIT 1)
- 불필요한 세미콜론(;)은 제거해도 됩니다.
"""
composer_agent = create_react_agent(llm_composer, tools=[], prompt=COMPOSER_PROMPT)


C:\Users\worb1\AppData\Local\Temp\ipykernel_12104\856257992.py:42: LangGraphDeprecatedSinceV10: create_react_agent has been moved to `langchain.agents`. Please update your import to `from langchain.agents import create_agent`. Deprecated in LangGraph V1.0 to be removed in V2.0.
  composer_agent = create_react_agent(llm_composer, tools=[], prompt=COMPOSER_PROMPT)


In [5]:
llm_refiner = AzureChatOpenAI(
    azure_deployment=os.getenv("SKCC_AZURE_MODEL"),#"gpt-4o",
    azure_endpoint=os.getenv("SKCC_AZURE_ENDPOINT"),
    api_key=os.getenv("SKCC_AZURE_OPENAI_KEY"),
    api_version="2024-02-15-preview",
    temperature=0.4
)

REFINER_PROMPT = """
당신은 SQL Refiner Agent입니다.
입력된 SQL Query 를 점검하고, 문법적 오류와 누락된 JOIN, GROUP BY, LIMIT 등을 찾고 이를 개선하세요.
반드시 아래 형식의 **JSON만 출력**하세요.
코드블록(```)이나 추가 문장은 절대 포함하지 마세요.

규칙 : 
- 입력된 SQL 쿼리를 점검하여 문법적 오류나 누락된 조인, GROUP BY, LIMIT 등을 개선하세요.
- JOIN 누락으로 인한 컬럼 reference 오류를 검증하세요.
- 코드블록(```)이나 추가 문장은 절대 포함하지 마세요.

출력 형식(JSON만):
{
  "refined_sql": "...",
  "changes": ["어떤 개선을 했는지 목록"],
  "confidence": 0.0~1.0
}
"""
refiner_agent = create_react_agent(llm_refiner, tools=[], prompt=REFINER_PROMPT)

C:\Users\worb1\AppData\Local\Temp\ipykernel_12104\4290923746.py:27: LangGraphDeprecatedSinceV10: create_react_agent has been moved to `langchain.agents`. Please update your import to `from langchain.agents import create_agent`. Deprecated in LangGraph V1.0 to be removed in V2.0.
  refiner_agent = create_react_agent(llm_refiner, tools=[], prompt=REFINER_PROMPT)


In [6]:
def make_query(nl_question:str):

    with open('Chinook_Column_Config.json', 'r', encoding='utf-8') as f:

        json_data = json.load(f)

    global full_schema_json

    full_schema_json = json_data

    table_relations = ["Album.ArtistId → Artist.ArtistId"
                       , "Track.AlbumId → Album.AlbumId"
                       , "Track.GenreId → Genre.GenreId"
                       , "InvoiceLine.TrackId → Track.TrackId"
                       , "InvoiceLine.InvoiceId → Invoice.InvoiceId"
                       , "Invoice.CustomerId → Customer.CustomerId"]

    table_join_hint = [""" "판매", "주문", "결제", "구매" 관련 질문 → Invoice 또는 InvoiceLine 관련 """
                       , """ "판매", "주문", "결제", "구매" 관련 질문 → Invoice 또는 InvoiceLine 관련 """
                       , """ "앨범", "음악", "트랙", "곡", "아티스트" 관련 질문 → Album, Track, Artist 관련 """
                       , """ "고객", "국가", "도시" 관련 → Customer, Invoice """]
    
    keyword_map = {
        "판매": ["invoice", "invoiceline", "order", "payment", "track", "album"],
        "앨범": ["album", "track", "invoice"],
        "음악": ["track", "artist", "album"],
        "고객": ["customer", "invoice"],
        "직원": ["employee", "supportrep"],
    }

    relation_bonus_map = {
        "invoice": ["invoiceline", "track"],
        "track": ["album", "invoiceline"],
        "album": ["track", "artist"],
    }

    selector_result = selector_agent.invoke(
        {
    "messages": [
        HumanMessage(content=f"""
                     query:{nl_question}\n
                     table_relation: {table_relations}\n
                     join_hint:{table_join_hint}\n
                     keyword_map:{keyword_map}\n
                     relation_bonus_map:{relation_bonus_map}\n
                     full_schema_json: {full_schema_json}
""")
    ]
    }
    )


    selected_tables = json.loads(selector_result["messages"][-1].content)["selected_tables"]


    decomposer_result = decomposer_agent.invoke({
        "messages": [
            HumanMessage(content=f"query: {nl_question}\nrelated_tables: {selected_tables}")
        ]
    })

    dec_steps = json.loads(decomposer_result["messages"][-1].content)
    #print("#1 ", dec_steps)

    composer_result = composer_agent.invoke({"messages": [
        HumanMessage(content=f"query: {nl_question}\nrelated_tables: {selected_tables}\ndecomposition_steps: {dec_steps}")
    ]})
    #print("#2 ", composer_result)

    sql = json.loads(composer_result["messages"][-1].content)["sql"]

    refiner_result = refiner_agent.invoke({"messages": [
        HumanMessage(content=f"sql: {sql}\nquestion: {nl_question}")
    ]})
    print("##", refiner_result['messages'][-1].content)
    res = json.loads(refiner_result['messages'][-1].content)

    return res['refined_sql']

In [7]:
import json
with open('Chinook_Column_Config.json', 'r', encoding='utf-8') as f:

    json_data = json.load(f)

In [8]:
import sqlite3
from pprint import pprint


conn = sqlite3.connect("Chinook_Sqlite.sqlite")
cursor = conn.cursor()

question = "가장 많이 판매된 앨범 이름과 판매량은?"
schema = json_data
llm_sql = make_query(nl_question=question)
print("LLM QUERY : ", llm_sql)
answer_sql = """
SELECT 
a.Title AS AlbumName, SUM(il.Quantity) AS TotalSales 
FROM InvoiceLine il 
JOIN Track t ON il.TrackId = t.TrackId 
JOIN Album a ON t.AlbumId = a.AlbumId 
GROUP BY a.Title 
ORDER BY TotalSales DESC LIMIT 1
"""

cursor.execute(llm_sql)
llm_query_result = cursor.fetchall()


cursor.execute(answer_sql)
answer_query_result = cursor.fetchall()

print("--- 질문 ---")
print(question)
print("--- LLM QUERY RESULT ---")
print(llm_query_result)
print("--- ANSWER QUERY RESULT ---")
print(answer_query_result)

conn.close()

## {
  "refined_sql": "SELECT a.Title AS AlbumName, SUM(il.Quantity) AS TotalSales FROM Album a JOIN Track t ON a.AlbumId = t.AlbumId JOIN InvoiceLine il ON t.TrackId = il.TrackId GROUP BY a.Title ORDER BY TotalSales DESC LIMIT 1",
  "changes": ["GROUP BY 컬럼을 AlbumId에서 Title로 변경하여 SELECT에 사용된 Title 컬럼과 일치하도록 수정", "문법적 오류 없음 확인"],
  "confidence": 0.95
}
LLM QUERY :  SELECT a.Title AS AlbumName, SUM(il.Quantity) AS TotalSales FROM Album a JOIN Track t ON a.AlbumId = t.AlbumId JOIN InvoiceLine il ON t.TrackId = il.TrackId GROUP BY a.Title ORDER BY TotalSales DESC LIMIT 1
--- 질문 ---
가장 많이 판매된 앨범 이름과 판매량은?
--- LLM QUERY RESULT ---
[('Minha Historia', 27)]
--- ANSWER QUERY RESULT ---
[('Minha Historia', 27)]


In [9]:
import sqlite3
from pprint import pprint


conn = sqlite3.connect("Chinook_Sqlite.sqlite")
cursor = conn.cursor()

question = "고객이 가장 많은 국가는 어디인가요?"
llm_sql = make_query(question)
print("LLM QUERY : ", llm_sql)
answer_sql = """
SELECT Country, COUNT(CustomerId) AS CustomerCount
FROM Customer
GROUP BY Country
ORDER BY CustomerCount DESC
LIMIT 1;
"""

cursor.execute(llm_sql)
llm_query_result = cursor.fetchall()


cursor.execute(answer_sql)
answer_query_result = cursor.fetchall()

print("--- 질문 ---")
print(question)
print("--- LLM QUERY RESULT ---")
print(llm_query_result)
print("--- ANSWER QUERY RESULT ---")
print(answer_query_result)

conn.close()

## {
  "refined_sql": "SELECT Country, COUNT(*) AS CustomerCount FROM Customer GROUP BY Country ORDER BY CustomerCount DESC LIMIT 1",
  "changes": ["No changes were necessary as the SQL query is syntactically correct and includes GROUP BY, ORDER BY, and LIMIT clauses."],
  "confidence": 1.0
}
LLM QUERY :  SELECT Country, COUNT(*) AS CustomerCount FROM Customer GROUP BY Country ORDER BY CustomerCount DESC LIMIT 1
--- 질문 ---
고객이 가장 많은 국가는 어디인가요?
--- LLM QUERY RESULT ---
[('USA', 13)]
--- ANSWER QUERY RESULT ---
[('USA', 13)]


In [10]:
import sqlite3
from pprint import pprint


conn = sqlite3.connect("Chinook_Sqlite.sqlite")
cursor = conn.cursor()

question = "총 구매 금액이 가장 많은 고객 5명을 알려주세요."
llm_sql = make_query(question)
print("LLM QUERY : ", llm_sql)
answer_sql = """
SELECT c.FirstName || ' ' || c.LastName AS CustomerName, SUM(i.Total) AS TotalSpent
FROM Customer c
JOIN Invoice i ON c.CustomerId = i.CustomerId
GROUP BY c.CustomerId, c.FirstName, c.LastName
ORDER BY TotalSpent DESC
LIMIT 5;
"""

"""
SELECT c.CustomerId, c.FirstName || ' ' || c.LastName AS FullName, SUM(il.UnitPrice * il.Quantity) AS TotalSpent FROM InvoiceLine il JOIN Customer c ON il.CustomerId = c.CustomerId GROUP BY c.CustomerId, c.FirstName, c.LastName ORDER BY TotalSpent DESC LIMIT 5
"""

cursor.execute(llm_sql)
llm_query_result = cursor.fetchall()


cursor.execute(answer_sql)
answer_query_result = cursor.fetchall()

print("--- 질문 ---")
print(question)
print("--- LLM QUERY RESULT ---")
print(llm_query_result)
print("--- ANSWER QUERY RESULT ---")
print(answer_query_result)

conn.close()

## {
  "refined_sql": "SELECT c.CustomerId, c.FirstName || ' ' || c.LastName AS CustomerName, SUM(i.Total) AS TotalPurchase FROM Customer c JOIN Invoice i ON c.CustomerId = i.CustomerId GROUP BY c.CustomerId, c.FirstName, c.LastName ORDER BY TotalPurchase DESC LIMIT 5",
  "changes": [
    "GROUP BY 절에 누락된 c.FirstName, c.LastName 추가",
    "문법 및 논리적 오류 없음 확인"
  ],
  "confidence": 1.0
}
LLM QUERY :  SELECT c.CustomerId, c.FirstName || ' ' || c.LastName AS CustomerName, SUM(i.Total) AS TotalPurchase FROM Customer c JOIN Invoice i ON c.CustomerId = i.CustomerId GROUP BY c.CustomerId, c.FirstName, c.LastName ORDER BY TotalPurchase DESC LIMIT 5
--- 질문 ---
총 구매 금액이 가장 많은 고객 5명을 알려주세요.
--- LLM QUERY RESULT ---
[(6, 'Helena Holý', 49.62), (26, 'Richard Cunningham', 47.62), (57, 'Luis Rojas', 46.62), (45, 'Ladislav Kovács', 45.62), (46, "Hugh O'Reilly", 45.62)]
--- ANSWER QUERY RESULT ---
[('Helena Holý', 49.62), ('Richard Cunningham', 47.62), ('Luis Rojas', 46.62), ('Ladislav Kovács', 45.62), ("

In [11]:
import sqlite3
from pprint import pprint


conn = sqlite3.connect("Chinook_Sqlite.sqlite")
cursor = conn.cursor()

question = "2022년 이전 송장(Invoice) 기준으로 가장 많이 판매된 음악 장르는 무엇인가요?"
llm_sql = make_query(question)
print("LLM QUERY : ", llm_sql)
answer_sql = """
SELECT g.Name AS GenreName, COUNT(il.InvoiceLineId) AS SalesCount
FROM Invoice i
JOIN InvoiceLine il ON i.InvoiceId = il.InvoiceId
JOIN Track t ON il.TrackId = t.TrackId
JOIN Genre g ON t.GenreId = g.GenreId
WHERE i.InvoiceDate < '2022-01-01'
GROUP BY g.GenreId, g.Name
ORDER BY SalesCount DESC
LIMIT 1;
"""

"""
실제 LLM이 뽑아낸 SQL
SELECT t.GenreId, COUNT(il.InvoiceLineId) AS TotalSales 
FROM InvoiceLine il 
JOIN Track t ON il.TrackId = t.TrackId 
JOIN Invoice i ON il.InvoiceId = i.InvoiceId 
WHERE i.InvoiceDate < '2022-01-01' 
GROUP BY t.GenreId 
ORDER BY TotalSales DESC 
LIMIT 1
"""

cursor.execute(llm_sql)
llm_query_result = cursor.fetchall()


cursor.execute(answer_sql)
answer_query_result = cursor.fetchall()

print("--- 질문 ---")
print(question)
print("--- LLM QUERY RESULT ---")
print(llm_query_result)
print("--- ANSWER QUERY RESULT ---")
print(answer_query_result)

conn.close()

## {
  "refined_sql": "SELECT g.Name AS GenreName, COUNT(il.TrackId) AS SalesCount FROM Invoice i JOIN InvoiceLine il ON i.InvoiceId = il.InvoiceId JOIN Track t ON il.TrackId = t.TrackId JOIN Genre g ON t.GenreId = g.GenreId WHERE i.InvoiceDate < '2022-01-01' GROUP BY g.Name ORDER BY SalesCount DESC LIMIT 1",
  "changes": ["SQL 쿼리에는 문법적 오류가 없으며, 모든 필요한 JOIN이 포함되어 있습니다. GROUP BY와 LIMIT도 적절히 사용되었습니다. 따라서 수정이 필요하지 않습니다."],
  "confidence": 1.0
}
LLM QUERY :  SELECT g.Name AS GenreName, COUNT(il.TrackId) AS SalesCount FROM Invoice i JOIN InvoiceLine il ON i.InvoiceId = il.InvoiceId JOIN Track t ON il.TrackId = t.TrackId JOIN Genre g ON t.GenreId = g.GenreId WHERE i.InvoiceDate < '2022-01-01' GROUP BY g.Name ORDER BY SalesCount DESC LIMIT 1
--- 질문 ---
2022년 이전 송장(Invoice) 기준으로 가장 많이 판매된 음악 장르는 무엇인가요?
--- LLM QUERY RESULT ---
[('Rock', 180)]
--- ANSWER QUERY RESULT ---
[('Rock', 180)]
