In [1]:
from src.selection_agent.utils import select_agent
from src.model.inference_endpoints import LLM
import os

def select_best_candidate(
    fixed_queries: list[tuple[str, list[tuple[str]]]],
    ir: list[str],
    database_name: str,
    database_path: str, 
    question: str,
    hint: str,
    model: LLM
):
    queries = []
    results = []
    for i in fixed_queries:
        queries.append(i[0])
        results.append(i[1])
    
    best_query = None
    best_result = None
    scores = {query: 0 for query, result in fixed_queries}
    for i in range(len(fixed_queries)):
        for j in range(i+1, len(fixed_queries)):
            candidate_a_query, candidate_a_result = fixed_queries[i]
            candidate_b_query, candidate_b_result = fixed_queries[j]

            if set(candidate_a_result) == set(candidate_b_result):
                scores[candidate_a_query] += 1

            else:
                selected_query = select_agent(
                    ir = ir,
                    database_name = database_name,
                    database_path = database_path,
                    question = question,
                    hint = hint,
                    queries = queries,
                    results = results,
                    model = model, 
                )

                if selected_query == "A":
                    scores[candidate_a_query] += 1
                
                else:
                    scores[candidate_b_query] += 1
    
    if len(scores) > 0:

        best_query = max(scores, key = scores.get)
        best_result = next(result for query, result in fixed_queries if query == best_query)
        return best_query, best_result
    
    else:
        return None, None

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
import httpx
from openai import OpenAI
from random import shuffle

fixed_queries = [('SELECT T1."School Name", T1."Low Grade", T2.Latitude FROM frpm T1 INNER JOIN schools T2 ON T1.CDSCode = T2.CDSCode WHERE T2.State = \'CA\' AND T2.Latitude = (SELECT MIN(T2.Latitude) FROM schools T2 WHERE T2.State = \'CA\')',
  [('Willow Elementary', 'K', 32.547737)])]
shuffle(fixed_queries)

ir = ["`schools`.`City`.`San Diego`", "`frpm`.`Low Grade`", "`frpm`.`School Name`.`Vidya Mandir`", "`frpm`.`CDSCode`", "`schools.CDSCode`", "`schools`. `State`", "`schools`.`Latitude`"]
question = "In which city can you find the school in the state of California with the lowest latitude coordinates and what is its lowest grade? Indicate the school name."
hint = "State of California refers to state = 'CA'"
database_name = "california_schools"
database_path = f"{os.environ['DATABASE_ROOT_PATH']}/{database_name}"

http_client = httpx.Client(verify=False)
model = 'tgi'

client = OpenAI(
    base_url=os.environ['BASE_URL'],
    http_client=http_client,
    api_key=os.environ['API_KEY']
)

llm = LLM(
    client = client,
    model = model, 
    gen_params = {
        'STREAM': False,
        'TEMPERATURE': 0.5,
        'MAX_NEW_TOKENS': 2048 
    }
) # Need to change this function

best_query, best_result = select_best_candidate(
    fixed_queries=fixed_queries,
    ir=ir,
    database_name=database_name,
    database_path=database_path,
    question=question,
    hint=hint,
    model=llm
)



In [3]:
best_query

'SELECT T1."School Name", T1."Low Grade", T2.Latitude FROM frpm T1 INNER JOIN schools T2 ON T1.CDSCode = T2.CDSCode WHERE T2.State = \'CA\' AND T2.Latitude = (SELECT MIN(T2.Latitude) FROM schools T2 WHERE T2.State = \'CA\')'

In [4]:
best_result

[('Willow Elementary', 'K', 32.547737)]