# Evaluation

In [None]:
import json

def _extract_result(
        all_data_path="./database/structured_database/bfsi_ground_truth_easy.json",
        cypher_data_path="./database/structured_database/output/bfsi_cypher_gpt4o_mini_easy.json",
        sql_data_path="./database/structured_database/output/bfsi_sql_gpt4o_easy.json"
):
    with open(all_data_path, "r") as f:
        all_data = json.load(f)

    with open(cypher_data_path, "r") as f:
        cypher_data = json.load(f)

    with open(sql_data_path, "r") as f:
        sql_data = json.load(f)

    cypher_data_adj = dict([(item['id'], item) for item in cypher_data])
    sql_data_adj = dict([(item['id'], item) for item in sql_data])

    all_data_adj = []
    for item in all_data:
        idx = item['id']
        ground_truth = item['answer']
        sql_answer = sql_data_adj[idx]["sql_answer"] if idx in sql_data_adj else ""
        cypher_answer = cypher_data_adj[idx]["cypher_answer"] if idx in cypher_data_adj else ""
        item = {
            "id": idx,
            "ground_truth": ground_truth,
            "sql_answer": sql_answer,
            "cypher_answer": cypher_answer
        }
        all_data_adj.append(item)
    return all_data_adj

In [2]:
import re
def extract_figure(text):
    if 'None' in text:
        return 0.0
    else:
        numbers = re.findall(r"\d+", text)
        numbers = list(map(float, numbers))
        if len(numbers)>=1:
            return numbers[0]
        if len(numbers)==0:
            return 0.0
        else:
            return numbers

In [3]:
def compare_fn(item):
    result = {}
    ground_truth = item["ground_truth"]
    cypher_answer = item["cypher_answer"]
    sql_answer = item["sql_answer"]
    num_cypher_answer = extract_figure(cypher_answer)
    num_sql_answer = extract_figure(sql_answer)
    result["id"] = item["id"]
    result["is_true_sql_answer"] =  1 if num_sql_answer == ground_truth else 0
    result["is_true_cypher_answer"] =  1 if num_cypher_answer == ground_truth else 0
    result["ground_truth"] = item["ground_truth"]
    result['cypher_answer'] = cypher_answer
    result['sql_answer'] = sql_answer
    result['num_cypher_answer'] = num_cypher_answer
    result['num_sql_answer'] = num_sql_answer
    return result

In [4]:
def measure_accuracy(
    ground_truth="./database/structured_database/bfsi_ground_truth_easy.json",
    cypher_data_path="./database/structured_database/output/bfsi_cypher_gpt4o_mini_easy.json",
    sql_data_path="./database/structured_database/output/bfsi_sql_gpt4o_easy.json"
):
    result_data = _extract_result(
        all_data_path=ground_truth,
        cypher_data_path=cypher_data_path,
        sql_data_path=sql_data_path
    )
    
    n_true_cypher = 0
    n_true_sql = 0

    for (i, item) in enumerate(result_data):
        compare = compare_fn(item=item)
        
        if compare['is_true_cypher_answer']==1:
            n_true_cypher+=1
        if compare['is_true_sql_answer']==1:
            n_true_sql+=1

    print("Accuracy cypher:\n", n_true_cypher/len(result_data))
    print("Accuracy sql:\n", n_true_sql/len(result_data))

In [5]:
measure_accuracy(
    ground_truth="./database/structured_database/bfsi_ground_truth_easy.json",
    cypher_data_path="./database/structured_database/output/bfsi_cypher_gpt4o_mini_easy.json",
    sql_data_path="./database/structured_database/output/bfsi_sql_gpt4o_easy.json"
)

Accuracy cypher:
 0.904296875
Accuracy sql:
 0.7265625


In [6]:
measure_accuracy(
    ground_truth="./database/structured_database/bfsi_ground_truth_medium.json",
    cypher_data_path="./database/structured_database/output/bfsi_cypher_gpt4o_mini_medium.json",
    sql_data_path="./database/structured_database/output/bfsi_sql_gpt4o_medium.json"
)

Accuracy cypher:
 0.8198198198198198
Accuracy sql:
 0.7777777777777778


In [7]:
measure_accuracy(
    ground_truth="./database/structured_database/bfsi_ground_truth_hard.json",
    cypher_data_path="./database/structured_database/output/bfsi_cypher_gpt4o_mini_hard.json",
    sql_data_path="./database/structured_database/output/bfsi_sql_gpt4o_hard.json"
)

Accuracy cypher:
 0.8008474576271186
Accuracy sql:
 0.690677966101695
