In [1]:
import csv
import pandas as pd
import pprint
import re
from typing import List, Dict, Any, Generator
import sqlglot
from typing import TypedDict, Any, Union, cast
from tqdm.notebook import tqdm
import json

## Helper functions

In [3]:
DIALECT = "mysql"

log_csv_file_path = '/mnt/Code/code/AI/agentic-AI/SQL-QA/logs/turn_log.csv'

DELIMITER = (
        # "\t"  # Using tab as delimiter which is less likely to appear in text content
        "|"
    )

CANDIDATE_NAMES = [
    'direct_generation',
    'cot_generation',
    'dac_cot_genration',
    'query_plan_generation'
]

In [4]:
def pd_read_csv(file_path: str) -> pd.DataFrame:
    df = pd.read_csv(
        file_path,
        encoding="utf-8",
        quoting=1,  # QUOTE_ALL mode
        escapechar="\\",
        on_bad_lines="warn",
    )
    return df

def pd_save_csv(df: pd.DataFrame, csv_result_path: str):
    df.to_csv(
        csv_result_path,
        encoding="utf-8",
        quoting=1,  # QUOTE_ALL mode
        escapechar="\\",
        index=False
    )

    

In [5]:
def normalize_sql(sql: str, pretty=True) -> str:
    # TODO: sqlglot not supporting UTF-8
    """
    Normalize SQL queries by removing extra whitespace and converting to lowercase.
    """
    try:
        return sqlglot.transpile(
                    sql, write=DIALECT, pretty=pretty,
            )[0]

    except Exception as e:
        print(f"Error normalizing SQL: {e} at\nQuery: {sql}")
        # Fallback to a simple normalization if sqlglot fails
        return sql
    # return sqlglot.parse_one(sql).sql().lower().strip()
def is_exact_match(src_sql: str, tg_sql: str) -> bool:
    """
    Check if the query matches the answer exactly.
    """
    # return query.strip() == answer.strip()
    try:
        return normalize_sql(src_sql) == normalize_sql(tg_sql)
    except Exception as e:
        print(f"Error normalizing SQL: {e} at\nQuery: {src_sql}\nAnswer: {tg_sql}")
        return False

def normalize_answer(answer: str) -> str:   
    """
    Normalize the answer by removing extra whitespace and converting to lowercase.
    """
    return answer.strip().lower()
    
def is_execution_match(src_result: str, tg_result: str) -> bool:
    """
    Check if the query matches the answer after execution.
    """
    try:
        # Normalize both query and answer
        normalized_src_query = normalize_answer(src_result)
        normalized_tg_answer = normalize_answer(tg_result)
        
        # Compare normalized versions
        return normalized_src_query == normalized_tg_answer
    except Exception as e:
        print(f"Error normalizing execution result: {e} at\nQuery: {src_result}\nAnswer: {tg_result}")
        return False


regex = r"Câu lệnh SQL \d: ```sql([^`]*)```"

def parse_sql_candidates(regex=regex, merge_prompt='') -> List[str]:

    matches = re.finditer(regex, merge_prompt, re.MULTILINE | re.DOTALL)
    candidates = []
    for matchNum, match in enumerate(matches, start=1):
        
        # print ("Match {matchNum} was found at {start}-{end}: {match}".format(matchNum = matchNum, start = match.start(), end = match.end(), match = match.group()))
        
        for groupNum in range(0, len(match.groups())):
            groupNum = groupNum + 1
            
            # print ("Group {groupNum} found at {start}-{end}: {group}".format(groupNum = groupNum, start = match.start(groupNum), end = match.end(groupNum), group = match.group(groupNum)))
            # print(match.group(groupNum).strip())
            # print('-' * 20)
            sql = match.group(groupNum).strip()
            sql = normalize_sql(sql)
            candidates.append(sql)

    return candidates


In [None]:
def gen_get_row_sql_gen_candidates(
    csv_file_path: str, delimiter: str = DELIMITER
) :
    """
    Extract Rows from a CSV file and yield each row as a dictionary.
    Each row contains fields like:
    
    """
    with open(
        csv_file_path, "r", newline="", encoding="utf-8"
    ) as csvfile:
        reader = csv.DictReader(
            csvfile, delimiter=delimiter, quoting=csv.QUOTE_ALL
        )
        i = 0
        for row in reader:
            # i += 1
            # if i > 5: break
            # print(f'----------Row {i}---------')
            # pprint.pprint(row, indent=4, width=80)
            yield row

In [7]:
get_row_candidates = gen_get_row_sql_gen_candidates(log_csv_file_path)

In [8]:
row = next(get_row_candidates)

In [9]:
row

{'created_date': '2025-06-20 11:05:02',
 'question': 'khách hàng nào chi tiêu nhiều nhất trong tháng 4',
 'linking_structured_result': "{'schema_linking': 'patient:Bảng này chứa thông tin về khách hàng (bệnh nhân).\\nsale_invoice:Bảng này chứa thông tin về các hóa đơn bán hàng, bao gồm cả tổng số tiền đã chi tiêu.\\nservice_invoice:Bảng này chứa thông tin về các hóa đơn dịch vụ, bao gồm cả tổng số tiền đã chi tiêu.\\nexpense_voucher:Bảng này chứa thông tin về các phiếu chi, có thể liên quan đến việc chi tiêu của khách hàng.', 'tables': ['patient', 'sale_invoice', 'service_invoice', 'expense_voucher']}",
 'filtered_schema_tables': '[\n    "sale_invoice",\n    "expense_voucher",\n    "patient",\n    "service_invoice"\n]',
 'direct_generation_prompt': '---retry--- \n \n\n    **CHÚ Ý**: \n    - Phải tuân thủ đúng cú pháp và các quy tắc của hệ quản trị cơ sở dữ liệu MYSQL.\n    - Câu truy vấn SQL phải chứa thông tin có ý nghĩa và dễ hiểu cho người dùng.\n    \n    \n\n**Yêu cầu**: Hãy suy n

In [16]:
from langchain_community.utilities import SQLDatabase

conn = 'mysql+pymysql://root:your_password@103.72.98.83:3306/gsv'
db =  SQLDatabase.from_uri(conn)

def run_sql(sql: str) -> Any:
    """
    Execute the SQL query and return the result.
    """
    # try:
    #     result = db.execute(sql)
    #     return result.fetchall()
    # except Exception as e:
    #     print(f"Error executing SQL: {e} at\nSQL: {sql}")
    #     return None
    result = db.run_no_throw(sql)
    return result

In [None]:
test_sql = ['SELECT\n  id,\n  branch_name,\n  branch_code,\n  description\nFROM branch\nWHERE\n  is_active = 1',
 'SELECT\n  branch_name,\n  branch_code,\n  description\nFROM branch\nWHERE\n  is_active = 1']

for sql in test_sql:
    print(f'Original SQL: \n```{sql}\n```')
    normalized_sql = normalize_sql(sql)
    print(f'Normalized SQL: \n```{normalized_sql}```')
    print(f'Is exact match: {is_exact_match(sql, normalized_sql)}')
    print('-' * 40)

In [None]:

candidates_gen = gen_get_row_sql_gen_candidates(csv_file_path=log_csv_file_path, delimiter=DELIMITER)
# row = next(iter(candidates_gen))  # Skip the first row (header)
row = next(candidates_gen)  # Get the first row
print(row['user_question'])

Liệt kê tất cả các chi nhánh hiện đang hoạt động.



In [None]:
import sys

csv.field_size_limit(sys.maxsize)

csv_result_path = "/mnt/Code/code/AI/agentic-AI/SQL-QA/data/GSV/generated-data/gen_success_data_results_LLM_judge_updated.csv"

df = pd_read_csv(csv_result_path)

df["candidate_generations"] = None
df["candidate_em"] = None
df["candidate_ex"] = None
df["candidate_least_correct"] = None
df["candidate_passes"] = None

candidates_gen = gen_get_row_sql_gen_candidates(
    csv_file_path=log_csv_file_path, delimiter=DELIMITER
)
pbar = tqdm(df.iterrows(), total=len(df), leave=False)
for idx, llm_row in pbar:
    # if idx > 5:
    #     break
    pbar.write(f"----------Row {idx}---------")
    pbar.write(f'Processing llm_row with question: {llm_row["question"]}')
    while True:
        try:
            candidate_row = next(candidates_gen)
        except StopIteration:
            pbar.write(f"No candidate row found for index {idx}")
            candidate_row = None
            break

        if candidate_row["user_question"] != llm_row["question"]:
            pbar.write(f'skip candidate row:\n{candidate_row["user_question"]}')
            continue  # Skip if the question matches the current row's question
        else:
            break
    if not candidate_row:
        break
    sql_candidates = parse_sql_candidates(merge_prompt=candidate_row["merger_prompt"])
    df.at[idx, "candidate_generations"] = sql_candidates
    em_candidates = [
        is_exact_match(c, llm_row["ground_truth_sql"]) for c in sql_candidates
    ]
    df.at[idx, "candidate_em"] = em_candidates
    ex_canddiates = [
        is_execution_match(run_sql(c), llm_row["ground_truth_result"])
        for c in sql_candidates
    ]
    df.at[idx, "candidate_ex"] = ex_canddiates
    df.at[idx, "candidate_least_correct"] = [
        c
        for c, em, ex in zip(sql_candidates, em_candidates, ex_canddiates)
        if (em) or (ex)
    ]
    df.at[idx, "candidate_passes"] = [
        CANDIDATE_NAMES[i]
        for i, (em, ex) in enumerate(zip(em_candidates, ex_canddiates))
        if (em) or (ex)
    ]
    # pbar.write(f'Row {idx} candidates: {row["candidate_generations"]}')

    # if int( idx ) > 5:
    #     break
    # pbar.write(llm_row)
    # pbar.write(llm_row['question'])
    # pbar.write(llm_row['candidate_generations'])
    # pbar.write(llm_row['candidate_em'])
    # pbar.write(llm_row['candidate_ex'])
    # pbar.write(llm_row['candidate_least_correct'])
    pass
pbar.close()

In [None]:
pd_save_csv(df, csv_result_path)

In [103]:
for idx, row in df.iterrows():
    df.at[idx, "candidate_passes"] = [CANDIDATE_NAMES[i] for i, (em, ex) in enumerate(zip(row['candidate_em'], row['candidate_ex'])) if em or ex]

In [None]:
df.iloc[-5:]

In [None]:
df['candidate_least_correct'].value_counts()

In [None]:
df['llm_exact_match'].value_counts()

llm_exact_match
True     127
False     97
Name: count, dtype: int64

In [None]:
df['llm_execution_match'].value_counts()

llm_execution_match
True     135
False     89
Name: count, dtype: int64

In [106]:
candidate_counter = {}
for candidate in CANDIDATE_NAMES:
    candidate_counter[candidate] = df['candidate_passes'].apply(lambda x: candidate in x).sum()
candidate_counter
# df['candidate_passes']

{'direct_generation': np.int64(100),
 'cot_generation': np.int64(109),
 'dac_cot_genration': np.int64(104),
 'query_plan_generation': np.int64(102)}

## GT_question + PRED_sql -> GT_execution + PRED_sql

In [7]:
import pandas as pd
import csv

In [14]:
csv_log_file = '/mnt/Code/code/AI/agentic-AI/SQL-QA/data/GSV/generated-data/GSV-data-Nam-200_results_eval_LLM-as-judge_KT_KD_results_results.csv'

In [15]:
df = pd_read_csv(csv_log_file)
# df['gt_execution'] = None
# df['llm_final_generation'] = None
# df['llm_execution'] = None

In [None]:
for idx, row in df.iterrows():
    df.at[idx, 'gt_execution'] = run_sql(df.at[idx, 'sql'])
    # df.at[idx, 'llm_final']


In [None]:
# pd_save_csv(df, csv_log_file)

## Eval on first 200

In [None]:
# df = df[:200]

In [54]:
df['eval_em'] = None
df['eval_ex'] = None
df['llm_em'] = None
df['llm_ex'] = None

In [17]:
len(df)

48

In [None]:
max_len = 1000
pbar = tqdm(df.iterrows(), total=len(df))

accepted_ids = []
for idx, row in pbar:
    if len(accepted_ids) > 200: 
        break
    try:
        if (
            len(str(row["generated_sql_query"])) > max_len
            or len(str(row["generated_raw_result"])) > max_len
        ):
            continue

        df.at[idx, "eval_em"] = is_exact_match(row["sql"], row["generated_sql_query"])
        df.at[idx, "eval_ex"] = is_execution_match(
            str(row["gt_execution"]), str(row["generated_raw_result"])
        )
        accepted_ids.append(idx)
    except Exception as e:
        print("-------")
        print(idx)
        print(row)
        print("-------")
        raise (e)

pbar.close()

In [61]:
df['eval_ex'].value_counts()

eval_ex
False    105
True      96
Name: count, dtype: int64

In [62]:
df['eval_em'].value_counts()

eval_em
False    199
True       2
Name: count, dtype: int64

In [63]:
saved_df = df[['question', 'sql', 'gt_execution', 'generated_sql_query', 'generated_raw_result', 'llm_em', 'llm_ex']]

In [65]:
csv_log_file = '/mnt/Code/code/AI/agentic-AI/SQL-QA/data/GSV/generated-data/GSV-data-Nam-200_results-llm-as-judge.csv'
pd_save_csv(saved_df, csv_log_file)

## Extract table, column

In [6]:
from sqlglot import parse_one, exp, transpile

In [7]:
import pandas as pd

In [8]:
csv_file_path = '/mnt/Code/code/AI/agentic-AI/SQL-QA/data/GSV/generated-data/GSV-data-Nam-200_results_eval_LLM-as-judge_KT_KD_results.csv'

In [9]:
df = pd.read_csv(csv_file_path)

In [11]:
print(transpile(sql, write="mysql", pretty=True)[0])

WITH MonthlyAppointments AS (
  SELECT
    doctor_id,
    DATE_FORMAT(appointment_time, '%Y-%m') AS appointment_month,
    COUNT(*) AS appointment_count
  FROM appointment
  WHERE
    appointment_time >= DATE_SUB(CURDATE(), INTERVAL (INTERVAL '1' MONTH) DAY)
    AND appointment_time <= CURDATE()
  GROUP BY
    doctor_id,
    appointment_month
), MonthlyChanges AS (
  SELECT
    ma1.doctor_id,
    ma1.appointment_month,
    ma1.appointment_count,
    LAG(ma1.appointment_count, 1, 0) OVER (PARTITION BY ma1.doctor_id ORDER BY ma1.appointment_month) AS previous_month_count
  FROM MonthlyAppointments AS ma1
), GrowthRates AS (
  SELECT
    doctor_id,
    appointment_month,
    appointment_count,
    previous_month_count,
    (
      appointment_count - previous_month_count
    ) AS growth
  FROM MonthlyChanges
  WHERE
    appointment_month = DATE_FORMAT(CURDATE(), '%Y-%m')
)
SELECT
  e.first_name,
  e.last_name,
  gr.growth
FROM GrowthRates AS gr
JOIN employee AS e
  ON gr.doctor_id = e.id


In [12]:
def extract_with_detailed_info(sql_string, dialect=""):
    """
    Extract tables and columns with more detailed information including aliases.
    
    Args:
        sql_string (str): The SQL query string to analyze
        dialect (str): SQL dialect
    
    Returns:
        dict: Detailed information about tables and columns
    """
    try:
        parsed = sqlglot.parse_one(sql_string, dialect=dialect)
        
        result = {
            "tables": {},
            "columns": [],
            "table_aliases": {}
        }
        
        # Extract table information including aliases
        for table in parsed.find_all(exp.Table):
            table_name = table.name
            table_alias = table.alias if table.alias else None
            
            result["tables"][table_name] = {
                "alias": table_alias,
                "columns": []
            }
            
            if table_alias:
                result["table_aliases"][table_alias] = table_name
        
        # Extract column information
        for column in parsed.find_all(exp.Column):
            column_info = {
                "column_name": column.name,
                "table_reference": None,
                "actual_table": None
            }
            
            if column.table:
                column_info["table_reference"] = column.table
                # Check if it's an alias
                if column.table in result["table_aliases"]:
                    column_info["actual_table"] = result["table_aliases"][column.table]
                else:
                    column_info["actual_table"] = column.table
            
            result["columns"].append(column_info)
            
            # Add column to the appropriate table
            if column_info["actual_table"]:
                if column_info["actual_table"] in result["tables"]:
                    result["tables"][column_info["actual_table"]]["columns"].append(column.name)
        
        # Remove duplicates from column lists
        for table_info in result["tables"].values():
            table_info["columns"] = list(set(table_info["columns"]))
        
        return result
        
    except Exception as e:
        return {"error": str(e)}

In [75]:
extract_with_detailed_info(sql)

{'tables': {'GrowthRates': {'alias': 'gr', 'columns': ['doctor_id', 'growth']},
  'employee': {'alias': 'e', 'columns': ['last_name', 'id', 'first_name']},
  'appointment': {'alias': None, 'columns': []},
  'MonthlyAppointments': {'alias': 'ma1',
   'columns': ['appointment_count', 'appointment_month', 'doctor_id']},
  'MonthlyChanges': {'alias': None, 'columns': []}},
 'columns': [{'column_name': 'first_name',
   'table_reference': 'e',
   'actual_table': 'employee'},
  {'column_name': 'last_name',
   'table_reference': 'e',
   'actual_table': 'employee'},
  {'column_name': 'growth',
   'table_reference': 'gr',
   'actual_table': 'GrowthRates'},
  {'column_name': 'doctor_id',
   'table_reference': 'gr',
   'actual_table': 'GrowthRates'},
  {'column_name': 'id', 'table_reference': 'e', 'actual_table': 'employee'},
  {'column_name': 'growth',
   'table_reference': 'gr',
   'actual_table': 'GrowthRates'},
  {'column_name': 'doctor_id', 'table_reference': None, 'actual_table': None},
  {'

In [28]:
pd_save_csv(df, csv_file_path)

## Domain related 49

In [9]:
csv_data_file = '/mnt/Code/code/AI/agentic-AI/SQL-QA/data/GSV/generated-data/GSV-data-Nam-200_results_eval_LLM-as-judge_KT_KD_-question-then-enhanced_results.csv'

In [11]:
ex_df = pd.read_csv('/mnt/Code/code/AI/agentic-AI/SQL-QA/data/GSV/generated-data/GSV-data-Nam-200_results_eval_LLM-as-judge - Sheet1.csv')

In [10]:
df = pd.read_csv(csv_data_file)

In [12]:
df['gt_sql'] = ex_df['gt_sql']

In [34]:
df.tail()

Unnamed: 0,question,enhanced_question,generated_sql_query,generated_query_result,generated_sql_error,generated_raw_result,gt_sql,em,ex,gt_execution
43,Sản phẩm nào bán chạy nhất hôm nay?,"Dựa trên số lượng bán ra, thống kê và cho biết...","SELECT i.item_name AS product_name, SUM(sid.qu...",Không tìm thấy dữ liệu phù hợp với yêu cầu của...,,,"SELECT sid.item_name AS product_name, SUM(sid....",False,False,
44,Top 5 sản phẩm bán chậm nhất trong tháng 5.,Thống kê số lượng bán ra của tất cả các sản ph...,"SELECT i.item_name, SUM(sid.quantity) AS total...",Không tìm thấy dữ liệu phù hợp với yêu cầu của...,,,"SELECT sid.item_name AS product_name, SUM(sid....",False,False,
45,Thống kê sản phẩm bán chạy nhất quý 1.,Thống kê tổng số lượng bán ra của mỗi sản phẩm...,"SELECT sid.item_name AS product_name, SUM(sid....",Sản phẩm có số lượng bán ra cao nhất trong quý...,,"[('BQV test quantity', 255.0)]","SELECT sid.item_name AS product_name, SUM(sid....",False,False,"[('BQV test quantity', 255.0), ('Mydocalm 150 ..."
46,Báo cáo chi phí theo từng hạng mục trong quý 1.,Lập báo cáo tổng hợp chi phí toàn hệ thống tro...,"SELECT reason AS 'Hạng mục chi phí', SUM(amoun...",Không tìm thấy dữ liệu phù hợp với yêu cầu của...,,,"SELECT rc.value AS category, SUM(ev.amount) AS...",False,False,
47,Có bao nhiêu lịch hẹn có trạng thái là 4?,Thống kê tổng số lượng lịch hẹn có mã trạng th...,"SELECT COUNT(A.id) AS total_appointments, RC.v...",Không tìm thấy dữ liệu phù hợp với yêu cầu của...,,"[(0, None)]",SELECT COUNT(id) AS TotalAppointmentsWithStatu...,False,False,"[(0,)]"


In [17]:
from pandas import isna
from tqdm.notebook import tqdm
from pprint import pprint

pbar = tqdm(df.iterrows())

for idx, row in pbar:
    try:
        # if pd.isna(row["generated_query_result"]): continue
        df.at[idx, "em"] = is_exact_match(row["gt_sql"], row["generated_sql_query"])
        
        gt_exec = str( run_sql(row["gt_sql"]) )
        df.at[idx, "ex"] = is_execution_match(
            gt_exec, row["generated_raw_result"]
        )
        df.at[idx, 'gt_execution'] = gt_exec
    except Exception as e:
        print('-'*20 + str(idx) + '-'*20)
        print('-'*10 + f'Error: {e}' + '-'*20)
        pprint(row.to_dict(), indent=2)
        print('-'*20)
        pass
pbar.close()

0it [00:00, ?it/s]

Error normalizing execution result: 'float' object has no attribute 'strip' at
Query: 
Answer: nan
Error normalizing execution result: 'float' object has no attribute 'strip' at
Query: 
Answer: nan
Error normalizing execution result: 'float' object has no attribute 'strip' at
Query: 
Answer: nan
Error normalizing execution result: 'float' object has no attribute 'strip' at
Query: [('Nguyễn Thị ', 'Tuyết'), ('Trần Hậu Thạch ', 'Lâm'), ('Bùi Văn ', 'Vương'), ('Vinh', 'Bác sĩ'), ('Admin', None), ('Hoàng Trọng', 'Bình')]
Answer: nan
Error normalizing execution result: 'float' object has no attribute 'strip' at
Query: 
Answer: nan
Error normalizing execution result: 'float' object has no attribute 'strip' at
Query: [('Nguyễn Thị Thu  Hiền',), ('Nguyễn Thị Thu  Hằng',), ('Đinh Thị  Gấm',), ('Phạm Thị  Mai',)]
Answer: nan
Error normalizing execution result: 'float' object has no attribute 'strip' at
Query: [('Vinh Marketing',)]
Answer: nan
Error normalizing SQL: argument 'sql': 'float' object

In [43]:
pd_save_csv(df, csv_data_file)

In [23]:
df['em'].value_counts()

em
False    47
True      1
Name: count, dtype: int64

In [24]:
df['ex'].value_counts()

ex
False    42
True      6
Name: count, dtype: int64

In [29]:
len(df[df['gt_execution'] == ''])

12

In [39]:
df['generated_raw_result'].isna().value_counts()

generated_raw_result
True     26
False    22
Name: count, dtype: int64

Result on `accountant` domain's gen question + original question -> text2sql

In [46]:
df['em'].value_counts()

em
False    27
Name: count, dtype: int64

In [47]:
df['ex'].value_counts()

ex
False    20
True      7
Name: count, dtype: int64

In [40]:
pd_save_csv(df, csv_data_file)

In [41]:
df.columns

Index(['question', 'enhanced_question', 'generated_sql_query',
       'generated_query_result', 'generated_sql_error', 'generated_raw_result',
       'gt_sql', 'em', 'ex', 'gt_execution'],
      dtype='object')

In [42]:
df = pd_read_csv(csv_data_file)

In [43]:
df['em'].value_counts()

em
False    43
True      5
Name: count, dtype: int64

In [44]:
df['ex'].value_counts()

ex
False    32
True     16
Name: count, dtype: int64

### Leveled question

In [45]:
question_file_path = '/mnt/Code/code/AI/agentic-AI/SQL-QA/data/GSV/generated-data/GSV-data-Nam-200_results_eval_LLM-as-judge_KT_KD_-question-only.csv'

In [46]:
qdf = pd.read_csv(question_file_path)


In [47]:
df['level'] = qdf['level']

In [48]:
df.groupby('level')['em'].value_counts()

level  em   
easy   False    20
       True      5
hard   False    23
Name: count, dtype: int64

In [49]:
df.groupby('level')['ex'].value_counts()

level  ex   
easy   True     14
       False    11
hard   False    21
       True      2
Name: count, dtype: int64

## No-domain 49

In [9]:
import pandas as pd
import csv

In [None]:
mdf = pd.read_csv('/mnt/Code/code/AI/agentic-AI/SQL-QA/data/GSV/generated-data/GSV-data-Nam-200_results_eval_LLM-as-judge_KT_KD_results.csv')

In [10]:
mdf['question'].to_csv('/mnt/Code/code/AI/agentic-AI/SQL-QA/data/GSV/generated-data/GSV-data-Nam-200_results_eval_LLM-as-judge_KT_KD_-question-only.csv', index=False, quoting=csv.QUOTE_ALL)

In [49]:
mdf['gt_sql'] = df['gt_sql']

In [54]:
from pandas import isna
from tqdm.notebook import tqdm

pbar = tqdm(mdf.iterrows())

for idx, row in pbar:
    try:
        # if pd.isna(row["generated_query_result"]): continue
        mdf.at[idx, "em"] = is_exact_match(row["gt_sql"], row["gen_sql"])
        gt_exec = str( run_sql(row["gt_sql"]) )
        gen_exe = str(run_sql(row['gen_sql']))
        mdf.at[idx, "ex"] = is_execution_match(
            gt_exec, gen_exe
        )
        mdf.at[idx, 'gt_execution'] = gt_exec
    except Exception as e:
        print('-'*20)
        print(row)
        print('ERR: ' + str(e))
        print('-'*20)
        pass
pbar.close()

0it [00:00, ?it/s]

  mdf.at[idx, "em"] = is_exact_match(row["gt_sql"], row["gen_sql"])
  mdf.at[idx, "ex"] = is_execution_match(


Error normalizing SQL: Invalid expression / Unexpected token. Line 1, Col: 37.
  SELECT SUM(sid.total_amount) AS `[4mTổng[0m doanh thu` FROM sale_invoice_detail sid JOIN sale_invoice si ON sid.sale_invoice_id = si.id JOIN it at
Query: SELECT SUM(sid.total_amount) AS `Tổng doanh thu` FROM sale_invoice_detail sid JOIN sale_invoice si ON sid.sale_invoice_id = si.id JOIN item i ON sid.item_name = i.item_name JOIN reference_code rc ON i.item_type_id = rc.id WHERE MONTH(si.created_on) = MONTH(CURRENT_DATE()) AND YEAR(si.created_on) = YEAR(CURRENT_DATE()) AND rc.value IN ('Thuốc', 'Mỹ phẩm');
Error normalizing SQL: Invalid expression / Unexpected token. Line 1, Col: 37.
  SELECT SUM(sid.total_amount) AS `[4mTổng[0m doanh thu` FROM sale_invoice_detail sid JOIN sale_invoice si ON sid.sale_invoice_id = si.id JOIN it at
Query: SELECT SUM(sid.total_amount) AS `Tổng doanh thu` FROM sale_invoice_detail sid JOIN sale_invoice si ON sid.sale_invoice_id = si.id JOIN item i ON sid.item_name = i.item_

In [55]:
mdf['em'].value_counts()

em
False    28
True     20
Name: count, dtype: int64

In [56]:
mdf['ex'].value_counts()

ex
True     27
False    21
Name: count, dtype: int64

In [57]:
mdf.head()

Unnamed: 0,ROLE,question,gt_sql,gt_execution,gen_sql,gen_execution,llm_em,llm_ex,gen_response,gen_sql_error,gt_schema,role,gen_schema,em,ex,llm_judge_em,llm_judge_ex
0,1.0,Chi nhánh nào có tổng chi phí dịch vụ phát sin...,"SELECT b.branch_name AS branch_name, SUM(si.to...",,"SELECT b.branch_name AS branch_name, SUM(si.to...",,False,True,,,"{\n ""appointment_service"": {\n ""alias"": ""a...",,"{\n ""service_invoice"": {\n ""alias"": ""si"",\...",True,True,,
1,1.0,Bác sĩ nào có số lượng lịch hẹn tăng trưởng nh...,WITH MonthlyAppointments AS ( SELECT e.id as d...,,WITH MonthlyAppointments AS ( SELECT doctor_id...,,False,True,,,"{\n ""DoctorTrends"": {\n ""alias"": null,\n ...",,"{\n ""GrowthRates"": {\n ""alias"": ""gr"",\n ...",False,True,,
2,1.0,Liệt kê các telesale và số lượng lịch hẹn họ t...,"SELECT CONCAT(e.first_name, ' ', e.last_name) ...",,"SELECT e.first_name, e.last_name, COUNT(a.tele...",,False,True,,,"{\n ""employee"": {\n ""alias"": ""e"",\n ""co...",,"{\n ""employee"": {\n ""alias"": ""e"",\n ""co...",False,True,,
3,1.0,Tìm danh sách các bác sĩ đã thực hiện lịch hẹn...,"SELECT DISTINCT e.first_name, e.last_name FROM...","[('Nguyễn Thị ', 'Tuyết'), ('Trần Hậu Thạch ',...","SELECT e.first_name, e.last_name, e.nickname, ...","[('Vinh', 'Marketing', None, '0987654321', 'Ch...",False,False,,,"{\n ""employee"": {\n ""alias"": ""e"",\n ""co...",,"{\n ""employee"": {\n ""alias"": ""e"",\n ""co...",False,False,,
4,1.0,Tìm danh sách các kỹ thuật viên đã thực hiện l...,"SELECT DISTINCT CONCAT(e.first_name, ' ', e.la...",,"SELECT DISTINCT e.first_name AS Ten, e.last_na...",,False,True,,,"{\n ""employee"": {\n ""alias"": ""e"",\n ""co...",,"{\n ""employee"": {\n ""alias"": ""e"",\n ""co...",False,True,,


In [58]:
mdf.tail()

Unnamed: 0,ROLE,question,gt_sql,gt_execution,gen_sql,gen_execution,llm_em,llm_ex,gen_response,gen_sql_error,gt_schema,role,gen_schema,em,ex,llm_judge_em,llm_judge_ex
43,1.0,Sản phẩm nào bán chạy nhất hôm nay?,"SELECT sid.item_name AS product_name, SUM(sid....",,"SELECT sid.item_name AS product_name, SUM(sid....",,,,Không tìm thấy dữ liệu phù hợp với yêu cầu của...,,"""argument 'sql': 'float' object cannot be conv...",,"{\n ""sale_invoice_detail"": {\n ""alias"": ""s...",True,True,,
44,1.0,Top 5 sản phẩm bán chậm nhất trong tháng 5.,"SELECT sid.item_name AS product_name, SUM(sid....",,"SELECT sid.item_name AS product_name, SUM(sid....",,,,Không tìm thấy dữ liệu phù hợp với yêu cầu của...,,"""argument 'sql': 'float' object cannot be conv...",,"{\n ""sale_invoice"": {\n ""alias"": ""si"",\n ...",True,True,,
45,1.0,Thống kê sản phẩm bán chạy nhất quý 1.,"SELECT sid.item_name AS product_name, SUM(sid....","[('BQV test quantity', 255.0), ('Mydocalm 150 ...","SELECT sid.item_name AS product_name, SUM(sid....","[('BQV test quantity', 255.0), ('Mydocalm 150 ...",,,Dưới đây là thống kê 10 sản phẩm bán chạy nhất...,,"""argument 'sql': 'float' object cannot be conv...",,"{\n ""sale_invoice_detail"": {\n ""alias"": ""s...",True,True,,
46,1.0,Báo cáo chi phí theo từng hạng mục trong quý 1.,"SELECT rc.value AS category, SUM(ev.amount) AS...",,"SELECT rc.value AS category, SUM(ev.amount) AS...",,,,Không tìm thấy dữ liệu phù hợp với yêu cầu của...,,"""argument 'sql': 'float' object cannot be conv...",,"{\n ""expense_voucher"": {\n ""alias"": ""ev"",\...",True,True,,
47,,Có bao nhiêu lịch hẹn có trạng thái là 4?,SELECT COUNT(id) AS TotalAppointmentsWithStatu...,"[(0,)]",SELECT COUNT(id) AS TotalAppointmentsWithStatu...,"[(0,)]",False,True,,,"{\n ""appointment"": {\n ""alias"": ""a"",\n ...",,"{\n ""appointment"": {\n ""alias"": null,\n ...",True,True,,
