In [9]:
import asyncio
import os
import re

import pandas as pd
import requests 
from dotenv import load_dotenv
from tqdm import tqdm

from autogen_core.models import UserMessage
from autogen_agentchat.agents import AssistantAgent
from autogen_agentchat.messages import StructuredMessage
from autogen_agentchat.ui import Console
from autogen_ext.models.openai import OpenAIChatCompletionClient

In [10]:
# same seed for any random operation
seed = 42

# Load environment variables from .env file
load_dotenv()
assert "TOGETHER_API_KEY" in os.environ

settings = {
    "model": "Qwen/Qwen3-235B-A22B-fp8-tput",
    "temperature": 0.1,
    "top_p": 1,
    "frequency_penalty": 0,
    "presence_penalty": 0,
    "max_tokens": 10000,
}

model_client = OpenAIChatCompletionClient(
    base_url = "https://api.together.xyz/v1/",
    api_key=os.getenv("TOGETHER_API_KEY", ""),
    model_info= {
        "vision": False,
        "function_calling": True,
        "json_output": True,
        "family": "unknown",
        "structured_output": True
    },
    **settings   
)

portal_url = "https://open.canada.ca/data/api/3/action/package_search?q="

In [11]:
with open(os.path.join('..', 'backend', 'prompts', 'JoinSelector.md')) as file:
    JOIN_SELECTOR_PROMPT_TEMPLATE = file.read()

In [12]:
keywords_gt = pd.read_csv(os.path.join('..', 'notebooks', 'keywords_gt_with_examples.csv'))

In [13]:
keywords_gt[
    (keywords_gt['n_keywords'] == 2) & 
    (keywords_gt['presence'] == 1) &
    (keywords_gt['result_count'] >= 15) & 
    (keywords_gt['r_id_index'] < 20)
]

Unnamed: 0,country_tag,type,difficulty,r_rsc_id,s_rsc_id,r_pkg_id,s_pkg_id,r_col_name,s_col_name,nl,top_k,n_keywords,keywords,presence,result_count,r_id_index,s_id_index,prompt_tokens,completion_tokens,response
7,CAN,multi-table-join,challenging,a7c95445-e336-41e3-945c-1bc938ff0612,ae2204c7-db9e-450e-8158-c70eedada7ae,12c89aa5-063a-f0ed-334a-3019133b9ee8,4879e498-c9ac-44a3-d7d1-21af22e2224c,department_/_minista_re,department_/_minista_re,How has the payment amount changed for the sam...,1000,2,suppliers+payments,1,17,14.0,12.0,648,15,<think>\n\n</think>\n\n<keywords>suppliers+pay...
8,CAN,multi-table-join,simple,ce336623-43d6-4e5d-81d8-26fb9145cf92,ab5df4ac-7f55-461b-934f-d7af48d18e6f,2f40fb8f-a9e7-4f5b-94e9-04fbfaa10676,2013c544-62e8-4485-ad4b-6e5ba5e373b9,coa_segment_04_l3_and_desc,coa_segment_04_l3_and_desc,How do the operating expenses for different fi...,1000,2,british columbia+operating expenses,1,47,0.0,31.0,649,19,<think>\n\n</think>\n\n<keywords>british colum...
9,CAN,multi-table-join,moderate,ce336623-43d6-4e5d-81d8-26fb9145cf92,ab5df4ac-7f55-461b-934f-d7af48d18e6f,2f40fb8f-a9e7-4f5b-94e9-04fbfaa10676,2013c544-62e8-4485-ad4b-6e5ba5e373b9,coa_segment_04_l3_and_desc,coa_segment_04_l3_and_desc,"For similar operating cost categories, how do ...",1000,2,british columbia+operating costs,1,26,0.0,17.0,625,19,<think>\n\n</think>\n\n<keywords>british colum...
12,CAN,single-table,moderate,b6629b1d-27df-4c33-a4bf-a480776f3a92,,7643c948-d661-4d90-ab91-e9ac732fc737,,,,Which energy scenarios in Canada’s Energy Futu...,1000,2,canada+energy demand projections,1,21,19.0,,608,17,<think>\n\n</think>\n\n<keywords>canada+energy...
