# Evaluation SQL Query Result (Refactored)

**Authors**
1. Alfan Dinda Rahmawan (alfan.d.rahmawan@gdplabs.id)

## Installation

In [None]:
# !pip install -r requirements.txt
%pip install -q mysql-connector-python==9.2.0

## Setup and Configuration

In [1]:
import os
import pandas as pd
import logging
from dotenv import load_dotenv
from modules.google_sheets_writer import GoogleUtil
from tqdm import tqdm
import ast

from modules.database_connection import connect_to_mariadb
from modules.sql_query_tester import SQLQueryTester
from modules.evaluator import TextToSQLEvaluator
from modules.constants import (
    ColumnName
)

# Setup logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)

# Load environment variables
load_dotenv()


EXPERIMENT = {
    "ft_qwen_7b_coder": {
        "exp_id": "1",
        "core_employee": {
            "inference_sheet": "inference_core_employee",
            "evaluation_sheet": "eval_result_core_employee",
        },
        "time_management": {
            "inference_sheet": "inference_time_management",
            "evaluation_sheet": "eval_result_time_management",
        },
        "core_employee_old": {
            "inference_sheet": "inference_core_employee_old",
            "evaluation_sheet": "eval_result_core_employee_old",
        }
    },
}

# Google Sheets configuration
GOOGLE_SPREADSHEET_ID = "1dDMqrol_DrEMjvLy88IRu2WdHN7T5BU0LrD8ORLuNPI"
GOOGLE_SPREADSHEET_URL = f"https://docs.google.com/spreadsheets/d/{GOOGLE_SPREADSHEET_ID}/edit?usp=sharing"


# ## Load Data Test
EXP_ID = "l"
EXPERIMENT_NAME = "ft_qwen_7b_coder"
DATA_TEST_TYPE = "core_employee"
EVAL_SHEET_NAME = EXPERIMENT[EXPERIMENT_NAME][DATA_TEST_TYPE]["evaluation_sheet"]
INFERENCE_SHEET_NAME = EXPERIMENT[EXPERIMENT_NAME][DATA_TEST_TYPE]["inference_sheet"]

# Configuration
# Get database names from environment variables
DB_CORE_MANAGEMENT = os.getenv('CORE_DB_NAMES')
DB_TIME_MANAGEMENT = os.getenv('TIME_MANAGEMENT_DB_NAMES')

# Google Sheets credentials
GOOGLE_SHEETS_CLIENT_EMAIL = os.getenv('GOOGLE_SHEETS_CLIENT_EMAIL')
GOOGLE_SHEETS_PRIVATE_KEY = os.getenv('GOOGLE_SHEETS_PRIVATE_KEY')
google: GoogleUtil = GoogleUtil(GOOGLE_SHEETS_PRIVATE_KEY, GOOGLE_SHEETS_CLIENT_EMAIL)

## Database Connection

In [2]:
# Connect to databases
time_management_db = connect_to_mariadb(DB_TIME_MANAGEMENT)
core_employee_db = connect_to_mariadb(DB_CORE_MANAGEMENT)

# Create database dictionary
databases = {
    'core': core_employee_db,
    'time': time_management_db
}

Connected to MariaDB Server version 8.0.41-0ubuntu0.22.04.1
Connected to MariaDB Server version 8.0.41-0ubuntu0.22.04.1


#### Sanity Check

In [3]:
# connection = time_management_db
connection = core_employee_db
query = """
SELECT e.name AS 'nama_karyawan' FROM employees e JOIN termination_entries te ON e.id = te.employee_id WHERE YEAR(te.effective_date) = 2025;
"""
cursor = connection.cursor(dictionary=True)
cursor.execute(query)
result = cursor.fetchall()
print(result)

[{'nama_karyawan': 'Pham Rubert'}, {'nama_karyawan': 'Timmy Turner'}]


## Load and Process Data

In [4]:
# Load prediction results
from typing import List

rows: List[list] = google.retrieve_worksheet(GOOGLE_SPREADSHEET_ID, INFERENCE_SHEET_NAME)
df_generator_result: pd.DataFrame = pd.DataFrame(rows[1:], columns=rows[0])
df_generator_result = df_generator_result[df_generator_result[ColumnName.EXP_ID] == str(EXP_ID)]

# Extract queries and results
list_generated_sql_query = df_generator_result[ColumnName.GENERATED_SQL_QUERY].tolist()
list_expected_query_result = df_generator_result[ColumnName.EXPECTED_QUERY_RESULT].tolist()

# Convert string representations to Python objects
exp_text_to_sql = [ast.literal_eval(result_str) for result_str in list_expected_query_result]

## Execute Queries

In [5]:
from modules.postprocess import extract_sql_query, extract_clean_sql_query

# Initialize query tester

# Execute queries and collect results
pred_text_to_sql = []
for idx, row in tqdm(df_generator_result.iterrows(), total=len(df_generator_result), desc="Processing queries"):
    database_type = row[ColumnName.DATABASE_TYPE]
    query = row[ColumnName.GENERATED_SQL_QUERY]
    query = extract_sql_query(query)
    query_tester = SQLQueryTester(databases, default_db=database_type, refresh_interval=3600)
    results, error_message = query_tester.execute_query(query)  # used for fine tuning result.
    # Store the results in the DataFrame
    df_generator_result.at[idx, ColumnName.GENERATED_QUERY_RESULT] = str(results)
    pred_text_to_sql.append(results)


Processing queries: 100%|██████████| 52/52 [00:00<00:00, 230.97it/s]


## Evaluate Results

In [6]:
df_generator_result.head(2)

Unnamed: 0,Exp ID,No,Prompt,Generated SQL Query,Expected SQL Query,Expected Query Result,Database,Time Taken,Generated Query Result
208,l,1,Bagaimana perbandingan jumlah karyawan berdasa...,"```json\n{\n ""sql_query"": ""SELECT o.name AS '...","SELECT organizations.name AS ""organization_nam...",[{'organization_name': 'Information Technology...,core,7.0638086795806885,"[{'organization_name': 'Human Resources', 'tot..."
209,l,2,Bagaimana data termination berdasarkan nama ja...,"{\n ""sql_query"": ""SELECT e.name AS 'employee_...","SELECT job_titles.name AS ""job_title_name"", CO...","[{'job_title_name': 'Information Technology', ...",core,4.123811483383179,"[{'employee_name': 'Timmy Turner', 'job_title_..."


In [7]:
# Create evaluator instance
evaluator = TextToSQLEvaluator(pred_text_to_sql, exp_text_to_sql)

# Create evaluation DataFrame
eval_df = evaluator.create_evaluation_dataframe(df_generator_result, list_generated_sql_query, EXP_ID)

# Print evaluation summary
evaluator.print_evaluation_summary()


Overall Metrics:
Average Precision: 0.1321
Average Recall: 0.1538
Average F1 Score: 0.1386
Average Accuracy: 0.1154

Detailed Results:

Query 1:
Precision: 1.0000
Recall: 1.0000
F1 Score: 1.0000
Accuracy: 1.0000

Query 2:
Precision: 0.0000
Recall: 0.0000
F1 Score: 0.0000
Accuracy: 0.0000

Items in prediction but not in expected (Predicted but not in Ground Truth):
  {'Employee Name': 'Timmy Turner', 'Job Title Id': '06a8aa72-37aa-47cb-9315-f2a265e200f7', 'Effective Date': '2019-08-05', 'Termination Reason': 'Sakit berkepanjangan dan tidak dapat lanjut bekerja setelah 12 (dua belas) bulan'}

Items in expected but not in prediction (Ground Truth but not Predicted):
  {'Job Title Name': 'Information Technology', 'Termination Count': 1}

Query 3:
Precision: 0.0000
Recall: 0.0000
F1 Score: 0.0000
Accuracy: 0.0000

Items in prediction but not in expected (Predicted but not in Ground Truth):
  {'Current Location': 'Bandung', 'Point Of Hire Location': None, 'Number Of Employees': 2}
  {'Curre

## Upload to Google Sheets

In [None]:
from modules.google_sheets_writer import GoogleSheetsWriter
from modules.google_sheets_writer import GoogleUtil


# Create writer instance
writer = GoogleSheetsWriter(
    google_util=google,
    sheet_id=GOOGLE_SPREADSHEET_ID,
    worksheet_name=EVAL_SHEET_NAME,
    batch_size=10,
    max_retries=5,
    batch_delay=2
)

# Write the DataFrame
result = writer.write_dataframe(eval_df)

# Log results
logger.info(f"Successfully wrote {result.successful_rows} rows")
if result.failed_rows > 0:
    logger.error(f"Failed to write {result.failed_rows} rows")
    for error in result.errors:
        logger.error(f"Row {error['row_number']}: {error['error']}")