In [1]:
import os
import json
import sqlite3
from glob import glob

# Path to the directory containing all DB directories like bike_1, concert_singer, etc.
BASE_DIR = "./database"  # Replace with the correct path

def get_tables(conn):
    """Retrieve a list of all tables in the database."""
    cursor = conn.cursor()
    cursor.execute("SELECT name FROM sqlite_master WHERE type='table'")
    tables = [row[0] for row in cursor.fetchall()]
    return tables

def get_columns(conn, table_name):
    """Retrieve column info for a given table."""
    # PRAGMA table_info(table_name) returns rows of the form:
    # (cid, name, type, notnull, dflt_value, pk)
    cursor = conn.cursor()
    cursor.execute(f"PRAGMA table_info({table_name})")
    columns_info = cursor.fetchall()
    columns = [col[1] for col in columns_info]  # col[1] is column name
    primary_keys = [col[1] for col in columns_info if col[5] == 1]  # col[5]==1 means primary key
    return columns, primary_keys

def get_foreign_keys(conn, table_name):
    """Retrieve foreign key info for a given table."""
    # PRAGMA foreign_key_list(table_name) returns rows:
    # (id, seq, table, from, to, on_update, on_delete, match)
    cursor = conn.cursor()
    cursor.execute(f"PRAGMA foreign_key_list({table_name})")
    fkeys_info = cursor.fetchall()
    # Build a dict of { "from_column": "referenced_table.referenced_column" }
    foreign_keys = {}
    for fkey in fkeys_info:
        # fkey[2] = referenced table name
        # fkey[3] = from_column (in current table)
        # fkey[4] = to_column (in referenced table)
        ref_table = fkey[2]
        from_col = fkey[3]
        to_col = fkey[4]
        foreign_keys[from_col] = f"{ref_table}.{to_col}"
    return foreign_keys

def infer_description(table_name):
    """Optional: generate or return a simple description for the table.
    In a real scenario, you might leave this blank or fill manually later."""
    return f"This is the {table_name} table."

def main():
    schema_data = {}
    
    # Find all directories that contain databases. Adjust this pattern if needed.
    db_dirs = [d for d in os.listdir(BASE_DIR) if os.path.isdir(os.path.join(BASE_DIR, d))]

    for db_id in db_dirs:
        db_path_sqlite = os.path.join(BASE_DIR, db_id, f"{db_id}.sqlite")
        db_path_db = os.path.join(BASE_DIR, db_id, f"{db_id}.db")

        # Choose the file that exists. Prefer .sqlite if both exist.
        if os.path.exists(db_path_sqlite):
            db_path = db_path_sqlite
        elif os.path.exists(db_path_db):
            db_path = db_path_db
        else:
            print(f"No .sqlite or .db file found for {db_id}, skipping...")
            continue

        conn = sqlite3.connect(db_path)
        tables = get_tables(conn)
        db_tables_info = {}

        for table_name in tables:
            columns, primary_keys = get_columns(conn, table_name)
            foreign_keys = get_foreign_keys(conn, table_name)
            description = infer_description(table_name)

            db_tables_info[table_name] = {
                "columns": columns,
                "primary_keys": primary_keys,
                "foreign_keys": foreign_keys,
                "description": description
            }

        conn.close()

        schema_data[db_id] = {
            "tables": db_tables_info
        }

    # Write out the schema_info.json
    with open("schema_info.json", "w") as f:
        json.dump(schema_data, f, indent=2)

    print("schema_info.json generated successfully.")

if __name__ == "__main__":
    main()

schema_info.json generated successfully.
