In [1]:
from llama_index.utils.workflow import draw_all_possible_flows
import sys
import os
import base64
import requests
# Add the parent directory to sys.path to enable absolute imports
sys.path.append(os.path.abspath("../.."))  # Go up two levels to project root
sys.path.append(os.path.abspath("../../slm-engine"))
sys.path.append(os.path.abspath("../../slm-engine/src"))  # Add src directory specifically

from core.utils import schema_parser

In [2]:
api_url = "http://localhost:9191/api/v1/db/get-schema/sqlite"
# --- End of configuration ---

# --- Function to call API to get Schema ---
def fetch_schema_from_api(db_file_path, endpoint_url=api_url):
    """
    Read SQLite file, send to API and return the 'data' part of the schema if successful.
    """
    try:
        with open(db_file_path, 'rb') as f:
            db_content = f.read()
        
        if not db_content:
             print(f"  Error: File is empty or cannot be read: {db_file_path}")
             return None

        file_name = os.path.basename(db_file_path)
        files = {'file': (file_name, db_content, 'application/octet-stream')}

        print(f"  Sending '{file_name}' to API...")
        response = requests.post(endpoint_url, files=files, timeout=60) 
        response.raise_for_status() 
        result = response.json()

        if result.get("code") == 0:
            schema_data = result.get("data", {})
            if not schema_data or 'tables' not in schema_data: # Check if 'tables' exists
                print(f"  Error: API returned success code but missing 'tables' data for '{file_name}'.")
                return None
            print(f"  Success: Schema retrieved for '{file_name}'.")
            return schema_data # Only return the 'data' part containing the schema
        else:
            error_message = result.get("message", "Unknown error from API")
            print(f"  API Error: '{file_name}' -> {error_message} (Error code: {result.get('code')})")
            return None

    except FileNotFoundError:
        print(f"  Error: File not found: {db_file_path}")
        return None
    except requests.exceptions.ConnectionError:
        print(f"  Error: Cannot connect to API at {endpoint_url}.")
        return None # Can add sys.exit(1) if you want to stop completely
    except requests.exceptions.Timeout:
        print(f"  Error: API request timed out for file '{file_name}'.")
        return None
    except requests.exceptions.RequestException as e:
        print(f"  Request Error: '{file_name}' -> {e}")
        return None
    except requests.exceptions.JSONDecodeError:
        print(f"  Error: Cannot decode JSON from API response for '{file_name}'.")
        print(f"  Response received (first 100 characters): {response.text[:100]}...")
        return None
    except Exception as e:
        print(f"  Unexpected error when calling API: '{file_name}' -> {e}")
        return None

In [14]:
import json

# Load the test dataset
file_path = "../../database/company_employee/company_employee.sqlite"
schema = fetch_schema_from_api(file_path)
db_id = file_path.split("/")[-1].split(".")[0]
print(f"Database ID: {db_id}")

# Load questions and gold SQL queries from the dataset
dataset_paths = ['../dataset/test_set.json', '../dataset/0_train_spider.json']
all_examples = []

for path in dataset_paths:
    try:
        with open(path, 'r') as f:
            data = json.load(f)
            # Find examples for the cinema database
            for item in data:
                if item['db_id'] == db_id:
                    all_examples.append({
                        'question': item['question'],
                        'gold_sql': item['query']
                    })
    except Exception as e:
        print(f"Error loading {path}: {e}")


import os

# Create output directory if it doesn't exist
os.makedirs("./output", exist_ok=True)

schema_prompt = schema_parser(schema['tables'], "Synthesis", include_sample_data=True)

# Write schema and examples to file
with open(f"./output/{db_id}.txt", "w") as f:
    f.write(schema_prompt)
    f.write("\n\n========= Questions and Gold SQL Queries =========\n")
    for i, example in enumerate(all_examples[:10], 1):
        f.write(f"\nExample {i}: {example['question']}\n")
        f.write(f"Gold SQL: {example['gold_sql']}\n")

  Sending 'company_employee.sqlite' to API...
  Success: Schema retrieved for 'company_employee.sqlite'.
Database ID: company_employee
