# Installation

In [28]:
!pip install -q ollama
!pip install -r requirements.txt
!pip install -q tqdm

Note: you may need to restart the kernel to use updated packages.


## Install glair sdk

### Install from local

In [None]:
import os
import getpass
# Set environment variables for Git credentials
username = getpass.getpass("Input Your GitHub Username: ")
token = getpass.getpass("Input Your GitHub Personal Access Token: ")

os.environ['GIT_USERNAME'] = username
os.environ['GIT_TOKEN'] = token

# Clone using the credential helper
!git config --global credential.helper store
!echo "https://$GIT_USERNAME:$GIT_TOKEN@github.com" > ~/.git-credentials
!git clone https://github.com/GDP-ADMIN/gen-ai-internal.git
!cd gen-ai-internal/libs/gllm-retrieval && pip install -e .

### Install from github

In [None]:
import getpass
import subprocess
import sys

def install_sdk_library():
    token = getpass.getpass("Input Your Personal Access Token: ")

    cmd = f'pip install "gllm-retrieval @ git+https://{token}@github.com/GDP-ADMIN/gen-ai-internal.git@f/gllm-retriever-text-to-sql#subdirectory=libs/gllm-retrieval"'

    with subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True) as process:
        stdout, stderr = process.communicate()

        if process.returncode != 0:
            sys.stdout.write(stderr)
            raise subprocess.CalledProcessError(returncode=process.returncode, cmd=cmd)
        else:
            sys.stdout.write(stdout)

install_sdk_library()

# Evaluate model for text to sql

## Load data from GDrive

In [8]:
import os
import pandas as pd
import json

from typing import List
from dotenv import load_dotenv

load_dotenv()

GOOGLE_SPREADSHEET_ID: str = "1dDMqrol_DrEMjvLy88IRu2WdHN7T5BU0LrD8ORLuNPI" # put your spreadsheet id here
GOOGLE_SPREADSHEET_URL: str = f"https://docs.google.com/spreadsheets/d/{GOOGLE_SPREADSHEET_ID}/edit?usp=sharing" # put your spreadsheet link here
DATA_TEST_SHEET_NAME: str = "catapa_test_core_employee"

GOOGLE_SHEETS_CLIENT_EMAIL: str = os.getenv('GOOGLE_SHEETS_CLIENT_EMAIL')
GOOGLE_SHEETS_PRIVATE_KEY: str = os.getenv('GOOGLE_SHEETS_PRIVATE_KEY')

In [9]:
## Google Auth
# Google Authentication
from modules.google_sheets_writer import GoogleUtil
from IPython.display import display, Markdown

PRIVATE_KEY = GOOGLE_SHEETS_PRIVATE_KEY
google: GoogleUtil = GoogleUtil(PRIVATE_KEY, GOOGLE_SHEETS_CLIENT_EMAIL)


In [10]:
## Load Data Test
rows: List[list] = google.retrieve_worksheet(GOOGLE_SPREADSHEET_ID, DATA_TEST_SHEET_NAME)
df_data_test: pd.DataFrame = pd.DataFrame(rows[1:], columns=rows[0])
display(df_data_test.head(2))

Unnamed: 0,No,Prompt,Category,Expected SQL Query,Expected Query Result,Expected SQL Query (base from catapa),Expected Query Result (base from catapa),Catapa Prompts,Database
0,1,Bagaimana perbandingan jumlah karyawan berdasa...,medium,"SELECT organizations.name AS ""organization_nam...",[{'organization_name': 'Information Technology...,SELECT\n organizations.name AS organiza...,"[{'organization_name': 'ICT', 'total_employees...",System: **System Instruction**:\n - You are a ...,core
1,2,Bagaimana data termination berdasarkan nama ja...,medium,"SELECT job_titles.name AS ""job_title_name"", CO...","[{'job_title_name': 'Information Technology', ...",SELECT\n job_titles.name AS job_title_n...,"[{'job_title': 'HRD - Finance', 'termination_c...",System: **System Instruction**:\n - You are a ...,core


## Inference Model

### Set Database Connection

In [5]:
from dotenv import load_dotenv
import mysql.connector
from mysql.connector import Error
from typing import Dict, List, Any, Optional


def connect_to_mariadb(database_name: str) -> Optional[mysql.connector.connection.MySQLConnection]:
    """
    Connect to the MariaDB database.

    Returns:
        Optional[mysql.connector.connection.MySQLConnection]: A connection object if successful, None otherwise.
    """
    try:
        connection = mysql.connector.connect(
            host="localhost",
            port=33062,
            user="app_user_demo",
            password="StrongPassw0rd!",
            database=database_name
        )

        if connection.is_connected():
            db_info = connection.get_server_info()
            print(f"Connected to MariaDB Server version {db_info}")
            return connection

    except Error as e:
        print(f"Error connecting to MariaDB: {e}")
        return None

def execute_query(connection: mysql.connector.connection.MySQLConnection, query: str) -> List[Dict[str, Any]]:
    """
    Execute a query on the MariaDB database.

    Args:
        connection: The database connection object.
        query: The SQL query to execute.

    Returns:
        List[Dict[str, Any]]: A list of dictionaries containing the query results.
    """
    cursor = connection.cursor(dictionary=True)
    cursor.execute(query)
    result = cursor.fetchall()
    cursor.close()
    return result

time_management_db = connect_to_mariadb("ru4f_time_management")
core_employee_db = connect_to_mariadb("ru4f_core_employee")


Connected to MariaDB Server version 5.5.5-10.5.28-MariaDB-ubu2004
Connected to MariaDB Server version 5.5.5-10.5.28-MariaDB-ubu2004


## Inference using API endpoint

### SET OPEN_AI/DEEPSEEK CONNECTION

In [4]:
from gllm_inference.lm_invoker.openai_compatible_lm_invoker import OpenAICompatibleLMInvoker
import os

OPEN_AI_KEY = os.getenv("OPEN_AI_KEY")
OPENAI_ENDPOINT = os.getenv("OPENAI_ENDPOINT")
OPENAI_MODEL = os.getenv("OPENAI_MODEL")

DEEPSEEK_API_KEY = os.getenv("DEEPSEEK_API_KEY")
DEEPSEEK_ENDPOINT = os.getenv("DEEPSEEK_ENDPOINT")
DEEPSEEK_MODEL = os.getenv("DEEPSEEK_MODEL")


lm_invoker = OpenAICompatibleLMInvoker(
    base_url=OPENAI_ENDPOINT,
    model_name=OPENAI_MODEL,
    api_key=OPEN_AI_KEY
)


### Inference 

#### System prompt

In [5]:
system_prompt = """**System Instruction**:
- You are a SQL generator expert.
- Your role is to create SQL queries based on provided database schema, table relationships, master data, the current date, and a user's instruction.
- If the query uses any table listed in `Data Trustee Enabled Tables`, you must modify the query to include data trustee compliance requirements. Otherwise, you must not add any JOIN or filter related to data trustee compliance.

**Database Schema**:
- The schema for the database is as follows:
  {schema}

**Table Relationships**:
- The relationships between these tables are described below:
  {relations}

**Master Data**:
- Master data in the database are listed here:
  {master_data}

**Data Trustee Enabled Tables**:
- List of tables requiring a data trustee and specific columns for JOIN operations. Entries may indicate a prerequisite join with another table before joining to the `employment_statuses` table, denoted by `table_name.column_name`. A join must first occur with `table_name` then followed by a join to `employment_statuses` using `column_name` from `table_name`:
  {data_trustee_tables}

**Anonymized Entities**:
- List of anonymized entities along with their descriptions that may be used in the `WHERE` clause:
  {anonymized_entities_description}

**Query Examples**:
- Here are the query examples for some cases:

> Turnover Rate:
Calculate the turnover rate for each year over the last 10 years. When querying turnover rate, do not filter the approval status in the WHERE clause.
query: WITH year_series AS (
SELECT YEAR(DATE_SUB(CURRENT_DATE, INTERVAL n YEAR)) AS year
FROM (
    SELECT 0 AS n UNION ALL SELECT 1 UNION ALL SELECT 2 UNION ALL SELECT 3 UNION ALL SELECT 4
    UNION ALL SELECT 5 UNION ALL SELECT 6 UNION ALL SELECT 7 UNION ALL SELECT 8 UNION ALL SELECT 9
) numbers
)
SELECT
ys.year AS `Year`,
SUM(
CASE
    WHEN te.effective_date BETWEEN STR_TO_DATE(CONCAT(ys.year, '-01-01'), '%Y-%m-%d')
        AND STR_TO_DATE(CONCAT(ys.year, '-12-31'), '%Y-%m-%d')
        AND te.approval_status = 'APPROVED'
    THEN 1
    ELSE 0
END
) / SUM(
CASE
    WHEN employees.join_date <= STR_TO_DATE(CONCAT(ys.year, '-01-01'), '%Y-%m-%d')
        AND (
            te.id IS NULL
            OR te.effective_date >= STR_TO_DATE(CONCAT(ys.year, '-01-01'), '%Y-%m-%d')
            OR te.approval_status != 'APPROVED'
        )
    THEN 1
    ELSE 0
END
) * 100 AS `Turnover Rate`
FROM
employees
JOIN employment_statuses ON employees.id = employment_statuses.employee_id
CROSS JOIN
year_series ys
LEFT JOIN
termination_entries te ON employees.id = te.employee_id
WHERE employment_statuses.organization_id IN ('[ORGANIZATION_IDS]')
AND employment_statuses.job_level_id IN ('[JOB_LEVEL_IDS]')
AND employment_statuses.location_id IN ('[LOCATION_IDS]')
GROUP BY
ys.year;

Retrieve the turnover rate for the current year by default if the user does not specify a turnover rate period.
query: SELECT
    YEAR(CURRENT_DATE) AS `Year`,
    (SUM(
        CASE
            WHEN te.effective_date BETWEEN STR_TO_DATE(CONCAT(YEAR(CURRENT_DATE), '-01-01'), '%Y-%m-%d')
                AND STR_TO_DATE(CONCAT(YEAR(CURRENT_DATE), '-12-31'), '%Y-%m-%d')
                AND te.approval_status = 'APPROVED'
        THEN 1
        ELSE 0
    END
    ) / SUM(
        CASE
            WHEN employees.join_date <= STR_TO_DATE(CONCAT(YEAR(CURRENT_DATE), '-01-01'), '%Y-%m-%d')
                AND (
                    te.id IS NULL
                    OR te.effective_date >= STR_TO_DATE(CONCAT(YEAR(CURRENT_DATE), '-01-01'), '%Y-%m-%d')
                    OR te.approval_status != 'APPROVED'
                )
        THEN 1
        ELSE 0
    END
    )) * 100 AS `Turnover Rate`
FROM
    employees
JOIN employment_statuses ON employees.id = employment_statuses.employee_id
LEFT JOIN
    termination_entries te ON employees.id = te.employee_id
WHERE employment_statuses.organization_id IN ('[ORGANIZATION_IDS]')
AND employment_statuses.job_level_id IN ('[JOB_LEVEL_IDS]')
AND employment_statuses.location_id IN ('[LOCATION_IDS]');

> Quarterly new employee count:
Calculate the number of employees hired each quarter.
query: SELECT
  YEAR(employees.join_date) AS `Year`,
  QUARTER(employees.join_date) AS `Quarter`,
  COUNT(*) AS `New Employee Count`
FROM
  employees
JOIN employment_statuses ON employees.id = employment_statuses.employee_id
WHERE
  employees.active = TRUE
  AND employment_statuses.organization_id IN ('[ORGANIZATION_IDS]')
  AND employment_statuses.job_level_id IN ('[JOB_LEVEL_IDS]')
  AND employment_statuses.location_id IN ('[LOCATION_IDS]')
GROUP BY
  YEAR(employees.join_date),
  QUARTER(employees.join_date)
ORDER BY
  `Year`,
  `Quarter`;

> Employee Managerial Status:
Display all employees' names along with their managerial status (Manager/Non-Manager).
query: SELECT
  employees.name AS 'Employee Name',
  CASE WHEN employees.id IN (SELECT manager_id FROM employees) THEN 'Manager' ELSE 'Non-Manager' END AS 'Managerial Status'
FROM
  employees
JOIN employment_statuses ON employees.id = employment_statuses.employee_id
WHERE
  employees.active = TRUE
  AND employment_statuses.organization_id IN ('[ORGANIZATION_IDS]')
  AND employment_statuses.job_level_id IN ('[JOB_LEVEL_IDS]')
  AND employment_statuses.location_id IN ('[LOCATION_IDS]');

> Attendance After Holiday:
Show the attendance status of all employees the day after a holiday.
query: SELECT
  employees.name,
  attendance_statuses.name,
  attendances.date
FROM
  attendances
  JOIN employees ON attendances.employee_id = employees.id
  JOIN attendance_statuses ON attendances.attendance_status_in_id = attendance_statuses.id
  JOIN employment_statuses ON employees.id = employment_statuses.employee_id
WHERE
  attendances.date IN (
    SELECT
      date + INTERVAL 1 DAY
    FROM
      employee_roster_view
    WHERE
      shift_id IS NULL
  )
AND employees.active = TRUE
AND employment_statuses.organization_id IN ('[ORGANIZATION_IDS]')
AND employment_statuses.job_level_id IN ('[JOB_LEVEL_IDS]')
AND employment_statuses.location_id IN ('[LOCATION_IDS]');

> Job Titles List:
List all job titles.
query: SELECT
  job_titles.name AS `Job Title`
FROM
  job_titles;

Explanation:
- In this case, the `job_titles` table is not listed in the `Data Trustee Enabled Tables`. Therefore, no JOIN or filter related to data trustee compliance is added, and the query remains simple.

> Example of Query Requiring Prior Join:
Retrieve all loan installments.
Explanation: The `loan_installments` table requires a prior join with the `loan_entries` table before joining with the `employment_statuses` table for data trustee compliance.
query: SELECT * FROM loan_installments
 JOIN loan_entries ON loan_installments.loan_entry_id = loan_entries.id
 JOIN employment_statuses ON loan_entries.employee_id = employment_statuses.employee_id
WHERE employment_statuses.organization_id IN ('[ORGANIZATION_IDS]')
AND employment_statuses.job_level_id IN ('[JOB_LEVEL_IDS]')
AND employment_statuses.location_id IN ('[LOCATION_IDS]');

**Current Date**:
- The current date is provided for context:
  {current_date}

**Definitions**:
- Quarter: A quarter is a three-month period on a financial calendar.
  First quarter: January 1 - March 31
  Second quarter: April 1 - June 30
  Third quarter: July 1 - September 30
  Fourth quarter: October 1 - December 31
- Semester: A semester is a six-month period on a financial calendar.
  First semester: January 1 - June 30
  Second semester: July 1 - December 31

**User's Instruction**:
- Below is the user's instruction:
  {user_instruction}

**Task**:
1. **Generate SQL Query**:
   - Generate an SQL query that aggregates data relevant to the user's instruction.

2. **Check for Data Trustee Requirements**:
   - If the query uses any table listed in `Data Trustee Enabled Tables`, modify the query to:
     - Add a JOIN operation with the `employment_statuses` table using the `employee_id` column.
     - Add filters for `employment_statuses.organization_id IN ('[ORGANIZATION_IDS]'), employment_statuses.job_level_id IN ('[JOB_LEVEL_IDS]'), employment_statuses.location_id IN ('[LOCATION_IDS]')`.
     - Ensure prerequisite JOINs are added before joining with the `employment_statuses` table as per the data trustee table list.
   - If the query does not use any table in the `Data Trustee Enabled Tables`, do not add the data trustee modifications.

**Specific SQL Requirements**:
- The SQL query should:
   - Be executable directly in MariaDB 10.4.
   - Do NOT `SELECT` any identifiers like `id` or `employee_id`, etc., except for aggregate functions like `MAX`, `SUM`, `AVG`, etc.
   - Always show name instead of id, for example use religion.name instead of religion.id. If the name is not directly available, join to the respective table to get the name.
   - Always prefix column names with the table name to avoid ambiguity and ensure clarity.
   - Use aliases in the `SELECT`, clause using snake_case.
   - Do NOT use aliases in `FROM` clause. e.g `FROM employees AS e` is not allowed.
   - Correctly handle aggregate functions like `MAX`, `SUM`, `AVG`, etc., within the `HAVING` or `SELECT` clauses, not in the `WHERE` clause.
   - Ensure that JOIN conditions match the correct foreign keys and are structured properly.
   - Always wrap date string in `STR_TO_DATE()` function. Example: STR_TO_DATE('2020-01-01', '%Y-%m-%d').
   - When comparing dates or using date ranges, ensure the comparator is in date type by using `CAST()`. Example:
     ```sql
     WHERE attendance.date BETWEEN CAST(DATE_FORMAT(CURRENT_DATE, '%Y-01-01') AS DATE)
       AND CAST(DATE_FORMAT(CURRENT_DATE, '%Y-12-31') AS DATE)
     ```
   - When trying to find the last day of some period, avoid using `LAST_DAY()` function.
   - Include conditional logic based on table data:
     - If including data from "employees" but not "termination_entries", add:
       `"WHERE employees.active = TRUE"`
     - If including data from "termination_entries", add:
       `"WHERE termination_entries.approval_status = 'APPROVED'"`
     - Avoid combining both `WHERE` statements in the same query.
  - Ensure `WHERE` clauses are placed before the `GROUP BY` clause to filter rows before grouping.
  - Use `HAVING` clauses after `GROUP BY` to apply conditions on aggregated data such as sums, counts, or max values.
  - Do NOT use aliases in `HAVING` clauses.
  - Do not `CAST` the column `custom_data.value`.
  - When performing division, ensure that the denominator is not zero to avoid division by zero.
  - Rename the column names based on the user's instructions, opting for a human-readable format instead of snake case. Make sure the column name and the user question are in the same language.
  - Ensure you reference the correct column name using backticks (`).
  - When filtering with anonymized values (marked with `<...>`):
    - Avoid compare against the `id` column or `id` foreign key column (e.g., `location_id`).
    - Anonymized values follow this pattern: `<ENTITY_NAME_NUMBER>` (e.g., `<LOCATION_1>`)
    - Examples:
        - Correct: `WHERE locations.name = '<LOCATION_1>'`
        - Incorrect: `WHERE locations.id = '<LOCATION_1>'` or `WHERE location_id = '<LOCATION_1>'`
    - This applies to all anonymized entities listed in `Anonymized Entities` section

**SQL Query**:
- Your generated and modified SQL query should be placed here.
- Do NOT add anything else besides the SQL query."""

#### Prediction sql query using api

In [None]:
import os
import asyncio
from datetime import datetime
from time import time
from typing import Dict, Any, List, Optional

from tqdm import tqdm
import pandas as pd
from gllm_inference.prompt_builder import OpenAIPromptBuilder
from gllm_inference.request_processor import LMRequestProcessor
from gllm_retrieval.query_transformer.text_to_sql_query_transformer import OneToOneQueryTransformer
from gllm_retrieval.utils.sql_utils import format_sql_query

from modules.database_info.schema import employee_schema, time_management_schema
from modules.database_info.master_data import employee_master_data, time_management_master_data
from modules.database_info.relation import employee_relations, time_management_relations
from modules.database_info.trustee_tables import data_trustee_employee, data_trustee_time_management
from modules.database_info.anonymize_entities import anonymized_entities_description

async def process_sql_queries(
    df_data_test: pd.DataFrame,
    inference_result_dir: str,
    model_name: str,
    system_prompt: str,
    lm_invoker: Any,
    context_data: Dict[str, str]
) -> pd.DataFrame:
    """Process SQL queries for all test data rows.

    Args:
        df_data_test: DataFrame containing test data
        inference_result_dir: Directory to store results
        model_name: Name of the model used for inference
        system_prompt: System prompt template
        lm_invoker: LM invoker instance
        context_data: Dictionary with context data for the prompt

    Returns:
        DataFrame with query results
    """
    # Ensure result directory exists
    os.makedirs(inference_result_dir, exist_ok=True)

    # Setup result path and load existing results if available
    sql_generator_result_path = os.path.join(inference_result_dir, f"{model_name}-using-catapa-prompt.csv")

    if os.path.exists(sql_generator_result_path):
        df_query_result = pd.read_csv(sql_generator_result_path)
        processed_nos = set(df_query_result['No'].astype(int).tolist())
    else:
        df_query_result = pd.DataFrame(
            columns=[
                'No',
                'Prompt',
                'Generated SQL Query',
                'Expected SQL Query',
                'Expected Query Result',
                'Time Taken'
            ]
        )
        processed_nos = set()

    # Setup the query transformer
    prompt_builder = OpenAIPromptBuilder(system_prompt)
    lm_request_processor = LMRequestProcessor(prompt_builder, lm_invoker)
    text_to_sql = OneToOneQueryTransformer(lm_request_processor)

    # Process each row
    new_results = []
    with tqdm(total=len(df_data_test), desc="Generating SQL queries") as pbar:
        for _, df_row in df_data_test.iterrows():
            no = int(df_row['No'])

            # Skip already processed items
            if no in processed_nos:
                pbar.update(1)
                pbar.set_postfix({"Status": f"Skipped {no}"})
                continue

            user_instruction = df_row['Prompt']

            try:
                # Measure time and generate SQL
                start_time = time()
                queries = await text_to_sql.transform({
                    **context_data,
                    "user_instruction": user_instruction
                })
                time_taken = time() - start_time

                sql_query = format_sql_query(queries[0])

                # Create result row
                result_row = {
                    'No': no,
                    'Prompt': df_row['Prompt'],
                    'Generated SQL Query': sql_query,
                    'Expected SQL Query': df_row['Expected SQL Query'],
                    'Expected Query Result': df_row['Expected Query Result'],
                    'Time Taken': time_taken
                }

                new_results.append(result_row)

                # Update progress
                pbar.update(1)
                pbar.set_postfix({"Status": f"Processed {no}", "Time": f"{time_taken:.2f}s"})

                # Save every 5 records to prevent data loss
                if len(new_results) % 5 == 0:
                    temp_df = pd.DataFrame(new_results)
                    df_query_result = pd.concat([df_query_result, temp_df], ignore_index=True)
                    df_query_result.to_csv(sql_generator_result_path, index=False)

            except Exception as e:
                pbar.update(1)
                pbar.set_postfix({"Status": f"Error on {no}", "Error": str(e)[:20]})
                print(f"Error processing query {no}: {str(e)}")

    # Save remaining results
    if new_results:
        temp_df = pd.DataFrame(new_results)
        df_query_result = pd.concat([df_query_result, temp_df], ignore_index=True)
        df_query_result.to_csv(sql_generator_result_path, index=False)

    return df_query_result


# Initialize directories and configuration
inference_result_dir = "sql_generator_result"
model_name = OPENAI_MODEL
database_type = "core"
current_date = datetime.now().strftime("%d %B %Y")

if database_type == "core":
    schema = employee_schema
    relations = employee_relations
    master_data = employee_master_data
    data_trustee_tables = data_trustee_employee
    master_data = employee_master_data
else:
    schema = time_management_schema
    relations = time_management_relations
    master_data = time_management_master_data
    data_trustee_tables = data_trustee_time_management
    master_data = time_management_master_data

context_data = {
    "schema": schema,
    "relations": relations,
    "master_data": master_data,
    "data_trustee_tables": data_trustee_tables,
    "anonymized_entities_description": anonymized_entities_description,
    "current_date": current_date
}

# Test a single query first
prompt_builder = OpenAIPromptBuilder(system_prompt)
lm_request_processor = LMRequestProcessor(prompt_builder, lm_invoker)
text_to_sql = OneToOneQueryTransformer(lm_request_processor)

test_query = "Bagaimana perbandingan jumlah karyawan berdasarkan organisasi? Munculkan nama organisasi dan total karyawan."
test_result = await text_to_sql.transform({**context_data, "user_instruction": test_query})
print("Test query result:")
print(format_sql_query(test_result[0]))

# Process all queries
df_query_result = await process_sql_queries(
    df_data_test=df_data_test,
    inference_result_dir=inference_result_dir,
    model_name=model_name,
    system_prompt=system_prompt,
    lm_invoker=lm_invoker,
    context_data=context_data
)

print(f"SQL generation complete. Results saved to {os.path.join(inference_result_dir, f'{model_name}-using-catapa-prompt.csv')}")

### Upload to google drive

In [48]:
from modules.google_sheets_writer import GoogleSheetsWriter
import logging

generate_sql_query = "generate_sql_query_open_ai"

writer = GoogleSheetsWriter(
    google_util=google,  # Your GoogleUtil instance
    sheet_id=GOOGLE_SPREADSHEET_ID,
    worksheet_name=generate_sql_query,
    batch_size=10,  # Customize batch size
    max_retries=5,  # Customize retry attempts
    batch_delay=2  # Customize delay between batches
)
# Write the DataFrame
result = writer.write_dataframe(df_query_result)

# Log results
logging.info(f"Successfully wrote {result.successful_rows} rows")
if result.failed_rows > 0:
    logging.error(f"Failed to write {result.failed_rows} rows")
    for error in result.errors:
        logging.error(f"Row {error['row_number']}: {error['error']}")

  0%|          | 0/3 [00:00<?, ?it/s]2025-02-28 13:34:23,812 - INFO - Successfully wrote row 1/30
2025-02-28 13:34:26,475 - INFO - Successfully wrote row 2/30
2025-02-28 13:34:29,170 - INFO - Successfully wrote row 3/30
2025-02-28 13:34:31,799 - INFO - Successfully wrote row 4/30
2025-02-28 13:34:34,279 - INFO - Successfully wrote row 5/30
2025-02-28 13:34:36,714 - INFO - Successfully wrote row 6/30
2025-02-28 13:34:39,173 - INFO - Successfully wrote row 7/30
2025-02-28 13:34:41,834 - INFO - Successfully wrote row 8/30
2025-02-28 13:34:44,599 - INFO - Successfully wrote row 9/30
2025-02-28 13:34:47,363 - INFO - Successfully wrote row 10/30
 33%|███▎      | 1/3 [00:28<00:57, 28.59s/it]2025-02-28 13:34:51,884 - INFO - Successfully wrote row 11/30
2025-02-28 13:34:54,327 - INFO - Successfully wrote row 12/30
2025-02-28 13:34:56,797 - INFO - Successfully wrote row 13/30
2025-02-28 13:34:59,509 - INFO - Successfully wrote row 14/30
2025-02-28 13:35:02,004 - INFO - Successfully wrote row 15/

# =============================================================================