In [2]:
from langchain_community.utilities import SQLDatabase

db = SQLDatabase.from_uri("sqlite:///../data/catastici.db")

# test DB
print(db.dialect)
print(db.get_usable_table_names())
db.run("SELECT * FROM catastici LIMIT 1;")

sqlite
['catastici']


"[(3183, 'liberal', 'campi', 'casa e bottega da barbier', 70, 'campo vicino alla chiesa')]"

In [52]:
import pandas as pd
query_res = pd.read_csv('./test_data_generated.csv')

In [53]:
import ast
def clean_query(sql_query):
    """clean the output"""
    # change to list
    sql_query_list = ast.literal_eval(sql_query)
    
    # split on ;
    sql_query_list = [query.split(';')[0]+';' for query in sql_query_list]
    return sql_query_list

def check_sql_executability(query, db):
    try:
        return db.run(query)
    except:
        return "ERROR"

In [54]:
# clean output
for idx, row in query_res.iterrows():
    query_list_clean = clean_query(row['generated_query'])
    final_out = None
    for out in query_list_clean:
        answer = check_sql_executability(out, db)
        if answer != "ERROR":
            final_out = out
            break
    if final_out == None:
        final_out = '\n'.join(query_list_clean)
        answer = "ERROR"
    query_res.loc[idx,'generated_answer'] = answer
    query_res.loc[idx,'generated_query'] = final_out

# Check

In [55]:
query_res.loc[(query_res['generated_answer']=='ERROR'), 'output'] = 'ERROR'
query_res.loc[(query_res['generated_answer']==query_res['true_answer']), 'output'] = 'EM'

In [70]:
query_res['output'].value_counts()

output
EM       253
ERROR     13
Name: count, dtype: int64

In [13]:
# import os
# os.environ["OPENAI_API_KEY"] = ''

from openai import OpenAI
client = OpenAI()

In [65]:
system_prompt_1 = """You are an assistant that is an expert in assessing Sqlite SQL queries.
You are given a Database Schema, a question, a true SQL query that answers the question and generated SQL query to answer the question.
Compare the true and generated SQL queries and evaluate if the generated query answers the question and acceptable.
Note that true and generated SQL queries are not the same, but both may be accapted as long as they both answer the question.
Respond with YES if generated query is acceptable, or NO if generated query does not answer the question.
"""

system_prompt_2 = """You are an assistant that is an expert in assessing Sqlite SQL queries.
You are given a Database Schema, a question and an SQL query to answer the question.
Look at the SQL query and assess if the query answers the question and acceptable.
Respond with YES if the query is acceptable, or NO if generated query does not answer the question.
"""

prompt = """### Database Schema
CREATE TABLE [catastici]
(
    [Owner_ID] INT, -- Unique ID of each owner of the property
    [Owner_First_Name] NVARCHAR(30), -- First name of the owner of the property
    [Owner_Family_Name] NVARCHAR(30), -- Family name of the owner of the property
    [Property_Type] NVARCHAR(100), -- Specific type of the property given in Italian. For example, "casa", "bottega da barbier", "bottega da fruttariol".
    [Rent_Income] INT, -- Rent price of the property that the owner receives as income, given in Venice ancient gold coin ducato.
    [Property_Location] NVARCHAR(100) -- Ancient approximate toponym of the property given in Italian.
);

### Question
{question}

### True SQL query
{true_query}

### Generated SQL query
{generated_query}

### Response
"""

In [58]:
example = query_res[query_res.output.isna()].iloc[10]
print(example['question'])
print(example['true_query'])
print(example['generated_query'])

Could you enumerate all individuals who possess ownership of properties?
SELECT DISTINCT "Owner_First_Name", "Owner_Family_Name" 
FROM catastici

SELECT "Owner_ID", "Owner_First_Name", "Owner_Family_Name"
FROM catastici
GROUP BY "Owner_ID", "Owner_First_Name", "Owner_Family_Name"
HAVING COUNT(*) > 1;


In [48]:
response = client.chat.completions.create(
  model="gpt-4-turbo-preview",
  messages=[
    {"role": "system", "content": system_prompt_1},
    {"role": "user", "content": prompt.format(question=example['question'],true_query=example['true_query'],generated_query=example['generated_query'])}
  ]
)
print(response.choices[0].message.content)

NO


In [66]:
responses_1 = {}
for idx, r in query_res[query_res.output.isna()].iterrows():
    response = client.chat.completions.create(
        model="gpt-4-turbo-preview",
        messages=[
            {"role": "system", "content": system_prompt_1},
            {"role": "user", "content": prompt.format(question=r['question'],true_query=r['true_query'],generated_query=r['generated_query'])}
        ]
    )
    responses_1[idx] = response.choices[0].message.content

In [59]:
responses_2 = {}
for idx, r in query_res[query_res.output.isna()].iterrows():
    response = client.chat.completions.create(
        model="gpt-4-turbo-preview",
        messages=[
            {"role": "system", "content": system_prompt_2},
            {"role": "user", "content": prompt.format(question=r['question'],true_query=r['true_query'],generated_query=r['generated_query'])}
        ]
    )
    responses_2[idx] = response.choices[0].message.content

In [None]:
# responses_2 = responses.copy()

In [72]:
query_wrong = query_res[query_res.output.isna()].drop('output',axis=1)

In [69]:
[k for k, v in responses_2.items() if responses_1.get(k) != v]

[67, 107, 111, 131, 171, 176, 186, 187, 256, 262, 345, 365, 382, 421, 488]

In [73]:
for k,v in responses_1.items():
    query_wrong.loc[k, 'output_1'] = v

for k,v in responses_2.items():
    query_wrong.loc[k, 'output_2'] = v

In [75]:
query_wrong.output_1.value_counts()

output_1
NO     159
YES     75
Name: count, dtype: int64

In [76]:
query_wrong.output_2.value_counts()

output_2
NO     172
YES     62
Name: count, dtype: int64

**Results**
- Wrong -> 234<br>
    - True -> 75
    - Wrong -> 159
- Error -> 13<br>
- Exact Match -> 253
- True -> **328**

In [80]:
for idx, row in query_wrong[query_wrong.output_1 != query_wrong.output_2].iterrows():
    print(f"{idx} - {row['question_id']}")
    print(f"1-{row['output_1']}, 2-{row['output_2']}")
    print(f"Question: {row['question']}")
    print(f"Evidence: {row['evidence']}")
    print()
    print('True SQL:')
    print(row['true_query'])
    print('Generated SQL:')
    print(row['generated_query'])
    print('\n\n') 

67 - 13
1-YES, 2-NO
Question: Across the dataset, what quantity of various property types exists?
Evidence: nan

True SQL:
SELECT COUNT(DISTINCT "Property_Type") AS num_property_types
FROM catastici;
Generated SQL:

SELECT "Property_Type", COUNT("Property_Type") AS Property_Count
FROM catastici
GROUP BY "Property_Type"
ORDER BY Property_Count DESC;



107 - 21
1-YES, 2-NO
Question: What sum of rent is accumulated from every "bottega da casarol" category property?
Evidence: "Property_Type" = "bottega da casarol"

True SQL:
SELECT SUM("Rent_Income") 
FROM catastici 
WHERE "Property_Type" = 'bottega da casarol';
Generated SQL:
SELECT "Property_Type", SUM("Rent_Income") 
FROM catastici 
WHERE "Property_Type" = 'bottega da casarol';



111 - 22
1-YES, 2-NO
Question: Whose properties yield a rental income of 30?
Evidence: nan

True SQL:
SELECT "Owner_First_Name", "Owner_Family_Name" 
FROM catastici 
WHERE "Rent_Income" = 30;
Generated SQL:

SELECT "Owner_First_Name", "Owner_Family_Name", "Pr