In [1]:
import os
from sql_nameguard.llm_suggest import LLMSuggester
from sql_nameguard.analyze import SQLAnalyzer

from dotenv import load_dotenv
load_dotenv()

  from .autonotebook import tqdm as notebook_tqdm


True

In [2]:
openai_api_key = os.getenv("OPENAI_API_KEY")

In [3]:
sample_sql = """
WITH customer_order_totals AS (          -- ‚úÖ GOOD CTE name
    SELECT
        cust.customer_id,               -- ‚úÖ clear column name
        SUM(item.quantity * item.unit_price) AS lifetime_value  -- ‚úÖ good semantic alias
    FROM customers AS cust              -- ‚úÖ good table alias
    JOIN orders AS ord                  -- ‚úÖ good table alias
        ON cust.customer_id = ord.customer_id
    JOIN order_items AS item            -- ‚úÖ good table alias
        ON ord.order_id = item.order_id
    WHERE ord.status = 'COMPLETED'
    GROUP BY cust.customer_id
),

t2 AS (                                 -- ‚ùå POOR CTE name (what is t2?)
    SELECT
        cot.customer_id AS cid,         -- üòê meh alias: shorter but less clear
        cot.lifetime_value AS v,        -- ‚ùå bad alias: single letter, opaque meaning
        COUNT(o2.order_id) AS cnt       -- ‚ùå vague alias, "cnt" of what?
    FROM customer_order_totals AS cot   -- ‚úÖ good table alias
    JOIN orders AS o2                   -- üòê okay but a bit cryptic if more tables existed
        ON cot.customer_id = o2.customer_id
    WHERE o2.status = 'COMPLETED'
    GROUP BY cot.customer_id, cot.lifetime_value
),

high_value_customers AS (               -- ‚úÖ GOOD CTE name
    SELECT
        cid AS customer_id,             -- üòê fixing cid‚Ä¶ but alias hop is unnecessary
        v   AS lifetime_value,          -- ‚ùå still opaque ‚Üí better to keep original name
        cnt AS completed_order_count    -- ‚úÖ finally a descriptive alias
    FROM t2
    WHERE v > 1000
)

SELECT
    hvc.customer_id,                    -- ‚úÖ clear
    hvc.lifetime_value,                 -- ‚úÖ clear
    hvc.completed_order_count,          -- ‚úÖ clear
    CONCAT(cust.first_name, ' ', cust.last_name) AS full_name,  -- ‚úÖ good column alias
    cust.email,
    cust.city AS customer_city          -- ‚úÖ descriptive alias
FROM high_value_customers AS hvc        -- ‚úÖ good table alias
JOIN customers AS cust                  -- ‚úÖ good table alias
    ON cust.customer_id = hvc.customer_id
ORDER BY hvc.lifetime_value DESC;

"""

In [4]:
from sql_nameguard.parse_sql import SQLParser

In [5]:
# initial_aliases = SQLParser.parse(sample_sql)

In [6]:
# initial_aliases

In [7]:
analyzer = SQLAnalyzer()

aliases = analyzer.analyze(sample_sql, threshold=0.5)

Alias 't2' appears to be a poor name for the code (similarity=0.220)

code:
SELECT cot.customer_id AS cid /* üòê meh alias: shorter but less clear */, cot.lifetime_value AS v /* ‚ùå bad alias: single letter, opaque meaning */, COUNT(o2.order_id) AS cnt /* ‚ùå vague alias, "cnt" of what? */ FROM customer_order_totals AS cot /* ‚úÖ good table alias */ JOIN orders AS o2 /* üòê okay but a bit cryptic if more tables existed */ ON cot.customer_id = o2.customer_id WHERE o2.status = 'COMPLETED' GROUP BY cot.customer_id, cot.lifetime_value

Alias 'lifetime_value' appears to be a poor name for the code (similarity=0.159)

code:
SUM(item.quantity * item.unit_price)

Alias 'v' appears to be a poor name for the code (similarity=0.326)

code:
cot.lifetime_value

Alias 'lifetime_value' appears to be a poor name for the code (similarity=0.371)

code:
v

Alias 'hvc' appears to be a poor name for the code (similarity=0.493)

code:
high_value_customers AS hvc /* ‚úÖ good table alias */

Alias 'ord' app

In [8]:
aliases

[{'alias': 'customer_order_totals',
  'code': "SELECT cust.customer_id /* ‚úÖ clear column name */, SUM(item.quantity * item.unit_price) AS lifetime_value /* ‚úÖ good semantic alias */ FROM customers AS cust /* ‚úÖ good table alias */ JOIN orders AS ord /* ‚úÖ good table alias */ ON cust.customer_id = ord.customer_id JOIN order_items AS item /* ‚úÖ good table alias */ ON ord.order_id = item.order_id WHERE ord.status = 'COMPLETED' GROUP BY cust.customer_id",
  'type': 'CTE',
  'similarity': 0.6159325},
 {'alias': 't2',
  'code': 'SELECT cot.customer_id AS cid /* üòê meh alias: shorter but less clear */, cot.lifetime_value AS v /* ‚ùå bad alias: single letter, opaque meaning */, COUNT(o2.order_id) AS cnt /* ‚ùå vague alias, "cnt" of what? */ FROM customer_order_totals AS cot /* ‚úÖ good table alias */ JOIN orders AS o2 /* üòê okay but a bit cryptic if more tables existed */ ON cot.customer_id = o2.customer_id WHERE o2.status = \'COMPLETED\' GROUP BY cot.customer_id, cot.lifetime_value'

In [9]:
suggester = LLMSuggester(model='openai/gpt-5-nano', api_key=openai_api_key)

In [10]:
responses = []
for alias in aliases:
    if alias['similarity'] < 0.4:
        print(f'Selecting alternates for alias: {alias["alias"]}...')
        response = suggester.suggest_aliases(alias_type=alias['type'], code=alias['code'])
        responses.append(response)

Selecting alternates for alias: t2...
Selecting alternates for alias: lifetime_value...
Selecting alternates for alias: v...
Selecting alternates for alias: lifetime_value...
Selecting alternates for alias: cot...


In [None]:
responses

[{'response_object': {}, 'raw_response': ''},
 {'response_object': {}, 'raw_response': ''},
 {'response_object': {}, 'raw_response': ''},
 {'response_object': {}, 'raw_response': ''},
 {'response_object': {}, 'raw_response': ''}]

: 