In [None]:
# DANKE at a glance: 

# i. User writes a qs 
# ii. DANKE finds keywords -> tells the LLM which table/ columns are needed 
# iii. We pre-join tables into a view 
# iv. LLM writes SQL easily without worrying about joins

In [2]:
import os
import numpy as np
import pandas as pd
from dotenv import load_dotenv
import faiss
from google import genai

load_dotenv()
GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
client = genai.Client(api_key=GEMINI_API_KEY)

In [24]:
 
 
user_question = "Show all customers from New York who made orders above 500."


Keyword Matching with DANKE: (DANKE has a dictionary of the database:) 
    DANKE finds important words and tells you which table and column they belong to:
    DANKE is saying:

    “New York” is in Customer.city

    “orders above 500” is in Orders.amount
    

In [None]:
def danke_keyword_match(user_question):
    # return matched keywords and associated tables/columns
    # In real DANKE, it would query its keyword dictionary
    KM = {
        "New York": {"table": "Customer", "column": "city"},
        "orders above 500": {"table": "Orders", "column": "amount"}
    }
    return KM

KM = danke_keyword_match(user_question)
print(KM)


{'New York': {'table': 'Customer', 'column': 'city'}, 'orders above 500': {'table': 'Orders', 'column': 'amount'}}


In [17]:
def find_joins(tables, schema):
    joins = []
    # naive pairwise join check
    tables = list(tables)
    for i in range(len(tables)-1):
        t1, t2 = tables[i], tables[i+1]
       
        for col, ref in schema[t2].items():
            if ref[0] == t1:  
                joins.append(f"{t1} JOIN {t2} ON {t1}.{ref[1]} = {t2}.{col}")
    return " ".join(joins)


In [None]:
schema = {
    "Customer": {"id": None},
    "Orders": {"customer_id": ("Customer", "id")}
}

KM = {
    "customer": {"table": "Customer"},
    "orders": {"table": "Orders"}
}


# Pseudocode
def danke_synthesize_view(KM, schema):
    tables = set([v['table'] for v in KM.values()])
    # Find minimal join paths between tables using schema relationships
    join_clauses = find_joins(tables, schema)
    view_sql = f"CREATE VIEW V AS SELECT * FROM {join_clauses};"
    return view_sql
view_sql = danke_synthesize_view(KM, schema)
print(view_sql)


CREATE VIEW V AS SELECT * FROM Customer JOIN Orders ON Customer.id = Orders.customer_id;


LLM generates SQL using DANKE info

In [20]:
prompt = f"""
You are an AI assistant that generates SQL queries.

Database view:
{view_sql}

Keyword matches from DANKE:
{KM}

User question: {user_question}

Generate the SQL query using the view and keywords above.
"""


In [25]:

response = client.models.generate_content(
    model="gemini-3-flash-preview",
    contents=prompt,
    config={"temperature": 0}
)
sql_query = response.text
print("Generated SQL:\n", sql_query)


Generated SQL:
 ```sql
SELECT *
FROM V
WHERE city = 'New York' AND amount > 500;
```
