From e4b65dc8471705cfd71cb488bdd17a95d15d995c Mon Sep 17 00:00:00 2001
From: vrtornisiello <vrtornisiello@gmail.com>
Date: Tue, 5 May 2026 13:39:47 -0300
Subject: [PATCH 1/5] feat: update graphql queries

---
 app/agent/tools/queries.py | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/app/agent/tools/queries.py b/app/agent/tools/queries.py
index 4e9b386..9c87da0 100644
--- a/app/agent/tools/queries.py
+++ b/app/agent/tools/queries.py
@@ -5,13 +5,11 @@
             node {
                 id
                 name
-                slug
                 description
                 organizations {
                     edges {
                         node {
                             name
-                            slug
                         }
                     }
                 }
@@ -34,7 +32,6 @@
                         node {
                             id
                             name
-                            slug
                             description
                             temporalCoverage
                             cloudTables {
@@ -62,7 +59,6 @@
             node {
                 id
                 name
-                slug
                 description
                 temporalCoverage
                 cloudTables {
@@ -81,12 +77,22 @@
                             name
                             description
                             measurementUnit
+                            coveredByDictionary
+                            isPartition
                             bigqueryType {
                                 name
                             }
                             directoryPrimaryKey {
                                 table {
                                     id
+                                    cloudTables {
+                                        edges {
+                                            node {
+                                                gcpDatasetId
+                                                gcpTableId
+                                            }
+                                        }
+                                    }
                                 }
                             }
                         }

From 3619e4234d5c3c6fac504ebe0839463eacd17490 Mon Sep 17 00:00:00 2001
From: vrtornisiello <vrtornisiello@gmail.com>
Date: Tue, 5 May 2026 16:04:44 -0300
Subject: [PATCH 2/5] feat: remove redundant fields, add new fields, update
 tool descriptions, and stop indenting tool outputs

---
 app/agent/tools/__init__.py | 10 +++---
 app/agent/tools/api.py      | 61 +++++++++++++++++++++----------------
 app/agent/tools/bigquery.py | 50 +++++++++++++++++-------------
 app/agent/tools/models.py   |  9 +++---
 4 files changed, 73 insertions(+), 57 deletions(-)

diff --git a/app/agent/tools/__init__.py b/app/agent/tools/__init__.py
index c1f9fa3..1e2d5ca 100644
--- a/app/agent/tools/__init__.py
+++ b/app/agent/tools/__init__.py
@@ -14,11 +14,11 @@ def get_tools() -> list[BaseTool]:
 
         Returns:
             list[BaseTool]: Tools in suggested usage order:
-                - search_datasets: Find datasets using keywords
-                - get_dataset_details: Get comprehensive dataset information
-                - get_table_details: Get comprehensive table information
-                - execute_bigquery_sql: Execute SQL queries against BigQuery tables
-                - decode_table_values: Decode coded values using dictionary tables
+                - search_datasets: Find datasets using keywords.
+                - get_dataset_details: Get comprehensive dataset information.
+                - get_table_details: Get comprehensive table information.
+                - execute_bigquery_sql: Execute SQL queries against BigQuery tables.
+                - decode_table_values: Decode coded values using dictionary tables.
         """
         return [
             search_datasets,
diff --git a/app/agent/tools/api.py b/app/agent/tools/api.py
index 5c276fa..14c6900 100644
--- a/app/agent/tools/api.py
+++ b/app/agent/tools/api.py
@@ -29,6 +29,9 @@
 # URL for fetching dataset details
 GRAPHQL_URL = f"{settings.BASEDOSDADOS_BASE_URL}/graphql"
 
+# datasets to skip
+SKIP_DIRECTORY_DATASETS = {"br_bd_diretorios_data_tempo"}
+
 # URL for fetching usage guides
 BASE_USAGE_GUIDE_URL = "https://raw.githubusercontent.com/basedosdados/website/refs/heads/main/next/content/userGuide/pt"
 
@@ -44,8 +47,8 @@ async def search_datasets(query: str) -> str:
 
     Args:
         query (str): 2-3 keywords maximum. Use Portuguese terms, organization acronyms, or dataset acronyms.
-            Good Examples: "censo", "educacao", "ibge", "inep", "rais", "saude"
-            Avoid: "Brazilian population data by municipality"
+            Good Examples: "censo", "educacao", "ibge", "inep", "rais", "saude".
+            Avoid: "Brazilian population data by municipality".
 
     Returns:
         str: JSON array of datasets. If empty/irrelevant results, try different keywords.
@@ -69,15 +72,14 @@ async def search_datasets(query: str) -> str:
         dataset_overview = DatasetOverview(
             id=dataset["id"],
             name=dataset["name"],
-            slug=dataset.get("slug"),
             description=dataset.get("description"),
+            organizations=[org["name"] for org in dataset.get("organizations", [])],
             tags=[tag["name"] for tag in dataset.get("tags", [])],
             themes=[theme["name"] for theme in dataset.get("themes", [])],
-            organizations=[org["name"] for org in dataset.get("organizations", [])],
         )
         overviews.append(dataset_overview.model_dump())
 
-    return json.dumps(overviews, ensure_ascii=False, indent=2)
+    return json.dumps(overviews, ensure_ascii=False)
 
 
 @tool
@@ -93,11 +95,10 @@ async def get_dataset_details(dataset_id: str) -> str:
 
     Returns:
         str: JSON object with complete dataset information, including:
-            - Basic metadata (name, description, tags, themes, organizations)
+            - Basic metadata (name, description, tags, themes, organizations).
             - tables: Array of all tables in the dataset with:
-                - gcp_id: Full BigQuery table reference (`project.dataset.table`)
-                - temporal coverage: Authoritative temporal coverage for the table
-                - table descriptions explaining what each table contains
+                - gcp_id: Full BigQuery table reference (`project.dataset.table`).
+                - table descriptions explaining what each table contains.
             - usage_guide: Provide key information and best practices for using the dataset.
 
     Next step: Use `get_table_details()` with returned table IDs.
@@ -125,7 +126,6 @@ async def get_dataset_details(dataset_id: str) -> str:
 
     dataset_id = dataset["id"].split("DatasetNode:")[-1]
     dataset_name = dataset["name"]
-    dataset_slug = dataset.get("slug")
     dataset_description = dataset.get("description")
 
     # Tags
@@ -158,9 +158,7 @@ async def get_dataset_details(dataset_id: str) -> str:
 
         table_id = table["id"].split("TableNode:")[-1]
         table_name = table["name"]
-        table_slug = table.get("slug")
         table_description = table.get("description")
-        table_temporal_coverage = table.get("temporalCoverage")
 
         cloud_table_edges = table["cloudTables"]["edges"]
         if cloud_table_edges:
@@ -177,9 +175,7 @@ async def get_dataset_details(dataset_id: str) -> str:
                 id=table_id,
                 gcp_id=table_gcp_id,
                 name=table_name,
-                slug=table_slug,
                 description=table_description,
-                temporal_coverage=table_temporal_coverage,
             )
         )
 
@@ -197,7 +193,6 @@ async def get_dataset_details(dataset_id: str) -> str:
     result = Dataset(
         id=dataset_id,
         name=dataset_name,
-        slug=dataset_slug,
         description=dataset_description,
         tags=dataset_tags,
         themes=dataset_themes,
@@ -206,7 +201,7 @@ async def get_dataset_details(dataset_id: str) -> str:
         usage_guide=usage_guide,
     )
 
-    return result.model_dump_json(indent=2)
+    return result.model_dump_json()
 
 
 @tool
@@ -222,10 +217,14 @@ async def get_table_details(table_id: str) -> str:
 
     Returns:
         str: JSON object with complete table information, including:
-            - Basic metadata (name, description, slug)
-            - gcp_id: Full BigQuery table reference (`project.dataset.table`)
-            - temporal coverage: Authoritative temporal coverage for the table
-            - columns: All column names, types, and descriptions
+            - Basic metadata (name, description).
+            - gcp_id: Full BigQuery table reference (`project.dataset.table`).
+            - columns: All column names, types, and descriptions, including
+                `needs_decoding` and `reference_table_id` for coded columns.
+            - partitioned_by: Columns to filter on for cost control.
+            - period_start / period_end: First and last period covered by the table.
+                Format varies (`2024`, `'2026-04-12'`, etc.) — use the value verbatim,
+                matched to the appropriate temporal column (`ano`, `data`, etc.).
 
     Next step: Use `execute_bigquery_sql()` to execute queries.
     """
@@ -252,9 +251,8 @@ async def get_table_details(table_id: str) -> str:
 
     table_id = table["id"].split("TableNode:")[-1]
     table_name = table["name"]
-    table_slug = table.get("slug")
     table_description = table.get("description")
-    table_temporal_coverage = table.get("temporalCoverage")
+    table_temporal_coverage = table.get("temporalCoverage", {})
 
     cloud_table_edges = table["cloudTables"]["edges"]
     if cloud_table_edges:
@@ -267,14 +265,23 @@ async def get_table_details(table_id: str) -> str:
         table_gcp_id = None
 
     table_columns = []
+    partitioned_by = []
+
     for edge in table["columns"]["edges"]:
         column = edge["node"]
 
+        if column["isPartition"]:
+            partitioned_by.append(column["name"])
+
         directory_primary_key = column["directoryPrimaryKey"]
 
         if directory_primary_key is not None:
             directory_table = directory_primary_key["table"]
-            directory_table_id = directory_table["id"].split("TableNode:")[-1]
+            directory_cloud_table = directory_table["cloudTables"]["edges"][0]["node"]
+            if directory_cloud_table["gcpDatasetId"] in SKIP_DIRECTORY_DATASETS:
+                directory_table_id = None
+            else:
+                directory_table_id = directory_table["id"].split("TableNode:")[-1]
         else:
             directory_table_id = None
 
@@ -285,6 +292,7 @@ async def get_table_details(table_id: str) -> str:
                 description=column.get("description"),
                 unit=column.get("measurementUnit"),
                 reference_table_id=directory_table_id,
+                needs_decoding=column["coveredByDictionary"],
             )
         )
 
@@ -292,10 +300,11 @@ async def get_table_details(table_id: str) -> str:
         id=table_id,
         gcp_id=table_gcp_id,
         name=table_name,
-        slug=table_slug,
         description=table_description,
-        temporal_coverage=table_temporal_coverage,
         columns=table_columns,
+        partitioned_by=partitioned_by,
+        period_start=table_temporal_coverage.get("start"),
+        period_end=table_temporal_coverage.get("end"),
     )
 
-    return result.model_dump_json(indent=2)
+    return result.model_dump_json()
diff --git a/app/agent/tools/bigquery.py b/app/agent/tools/bigquery.py
index fb2fe99..0f961e1 100644
--- a/app/agent/tools/bigquery.py
+++ b/app/agent/tools/bigquery.py
@@ -30,20 +30,19 @@ def execute_bigquery_sql(sql_query: str, config: RunnableConfig) -> str:
     It includes a 10GB processing limit for safety.
 
     Args:
-        sql_query (str): Standard GoogleSQL query. Must reference
-            tables using their full `gcp_id` from `get_dataset_details()`.
-
-    Best practices:
-        - Use fully qualified names: `project.dataset.table`
-        - Select only needed columns, avoid `SELECT *`
-        - Add `LIMIT` for exploration
-        - Filter early with `WHERE` clauses
-        - Order by relevant columns
-        - Never use DDL/DML commands
-        - Use appropriate data types in comparisons
+        sql_query (str): Standard GoogleSQL query. Must reference tables using their full `gcp_id` from `get_dataset_details()`.
+
+    Rules:
+        - Use fully qualified names: `project.dataset.table`.
+        - Select only needed columns, don't use `SELECT *`.
+        - Always filter by partitioned columns when present (see `partitioned_by` in `get_table_details` results). In `JOIN` queries, each partitioned table needs its own partition filter.
+        - Order by relevant columns.
+        - Use `LIMIT` for exploration.
+        - Use appropriate data types in comparisons.
+        - Only `SELECT` statements are allowed.
 
     Returns:
-        str: Query results as JSON array. Empty results return "[]".
+        str: Query results as JSON array.
     """
     client = _get_client()
 
@@ -70,15 +69,19 @@ def execute_bigquery_sql(sql_query: str, config: RunnableConfig) -> str:
             ),
         )
         results = [dict(row) for row in job.result()]
+
+        if not results:
+            results = "Query returned 0 rows. Review the filters per the empty-result protocol."
+
     except GoogleAPICallError as e:
         reason = e.errors[0].get("reason") if getattr(e, "errors", None) else None
         if reason == "bytesBilledLimitExceeded":
             raise ValueError(
-                f"Query exceeds the {MAX_BYTES_BILLED // 10**9}GB processing limit. Add WHERE filters or select fewer columns."
+                f"Query exceeds the {MAX_BYTES_BILLED // 10**9}GB processing limit. Filter by partitioned columns."
             ) from e
         raise
 
-    return json.dumps(results, ensure_ascii=False, indent=2, default=str)
+    return json.dumps(results, ensure_ascii=False, default=str)
 
 
 @tool
@@ -86,18 +89,21 @@ def execute_bigquery_sql(sql_query: str, config: RunnableConfig) -> str:
 def decode_table_values(
     table_gcp_id: str, config: RunnableConfig, column_name: str | None = None
 ) -> str:
-    """Decode coded values from a table using its dataset's `dicionario` table.
+    """Fetch the dictionary mapping (code -> human-readable value) for a coded column.
+
+    REQUIRED whenever a column has `needs_decoding: true` in `get_table_details`,
+    BEFORE writing any SQL that filters, joins, or displays that column.
 
-    Use when column values appear to be codes (e.g., 1,2,3 or A,B,C) and the
-    column does NOT have a `reference_table_id` in `get_table_details()` metadata.
+    Returns pairs of `chave` (the literal value stored in the table) and `valor` (its meaning).
 
     Args:
-        table_gcp_id (str): Full BigQuery table reference.
-        column_name (str | None, optional): Column with coded values. If `None`,
-            all columns will be used. Defaults to `None`.
+        table_gcp_id (str): Full BigQuery table reference (`project.dataset.table`).
+        column_name (str | None, optional): The specific column to decode. Always
+            provide this when you know which column you need; passing None returns
+            the entire dictionary for the table and wastes tokens.
 
     Returns:
-        str: JSON array with chave (code) and valor (meaning) mappings.
+        str: JSON array of {nome_coluna, chave, valor} entries.
     """
     if "`" in table_gcp_id:
         table_gcp_id = table_gcp_id.replace("`", "")
@@ -148,4 +154,4 @@ def decode_table_values(
             raise ValueError("Dictionary table not found for this dataset.") from e
         raise
 
-    return json.dumps(results, ensure_ascii=False, indent=2, default=str)
+    return json.dumps(results, ensure_ascii=False, default=str)
diff --git a/app/agent/tools/models.py b/app/agent/tools/models.py
index 9082c58..def58ff 100644
--- a/app/agent/tools/models.py
+++ b/app/agent/tools/models.py
@@ -9,6 +9,7 @@ class Column(BaseModel):
     description: str | None
     unit: str | None = Field(exclude_if=lambda v: v is None)
     reference_table_id: str | None = Field(exclude_if=lambda v: v is None)
+    needs_decoding: bool
 
 
 class TableOverview(BaseModel):
@@ -17,15 +18,16 @@ class TableOverview(BaseModel):
     id: str
     gcp_id: str | None
     name: str
-    slug: str | None
     description: str | None
-    temporal_coverage: dict[str, str | None]
 
 
 class Table(TableOverview):
     """Complete table information including all its columns."""
 
     columns: list[Column]
+    partitioned_by: list[str]
+    period_start: str | None
+    period_end: str | None
 
 
 class DatasetOverview(BaseModel):
@@ -33,11 +35,10 @@ class DatasetOverview(BaseModel):
 
     id: str
     name: str
-    slug: str | None
     description: str | None
+    organizations: list[str]
     tags: list[str]
     themes: list[str]
-    organizations: list[str]
 
 
 class Dataset(DatasetOverview):

From 891816c3e135a4bc9b15d2fa37fe1a237a327885 Mon Sep 17 00:00:00 2001
From: vrtornisiello <vrtornisiello@gmail.com>
Date: Tue, 5 May 2026 16:04:49 -0300
Subject: [PATCH 3/5] feat: update system prompt to align with the new metadata
 and tool descriptions

---
 app/agent/prompts.py | 70 ++++++++++++++++++++++++++------------------
 1 file changed, 41 insertions(+), 29 deletions(-)

diff --git a/app/agent/prompts.py b/app/agent/prompts.py
index 8b7f9b3..101adeb 100644
--- a/app/agent/prompts.py
+++ b/app/agent/prompts.py
@@ -17,7 +17,7 @@
 
 Padrões comuns nas fontes de dados:
 - Geográfico: `sigla_uf` (estado), `id_municipio` (município - código IBGE 7 dígitos).
-- Temporal: `ano` (ano), campo `temporal_coverage` dos metadados.
+- Temporal: `ano` (ano), campos `period_start` / `period_end` dos metadados da tabela.
 - Identificadores: `id_*`, `codigo_*`, `sigla_*`.
 
 ---
@@ -25,9 +25,9 @@
 # Ferramentas Disponíveis
 - **search_datasets**: Busca datasets por palavra-chave.
 - **get_dataset_details**: Obtém informações detalhadas sobre um dataset, com visão geral das tabelas.
-- **get_table_details**: Obtém informações detalhadas sobre uma tabela, com colunas e cobertura temporal.
+- **get_table_details**: Obtém informações detalhadas sobre uma tabela, com colunas, período de cobertura e particionamento.
 - **execute_bigquery_sql**: Executa consultas SQL no BigQuery.
-- **decode_table_values**: Decodifica colunas utilizando um dicionário de dados.
+- **decode_table_values**: Retorna o dicionário de chave/valor para decodificar uma coluna.
 
 ---
 
@@ -35,14 +35,16 @@
 Siga este fluxo ao responder perguntas sobre dados:
 1. **Busque datasets**: Use `search_datasets` para encontrar datasets relacionados à pergunta, seguindo o **Protocolo de Busca**.
 2. **Explore os datasets**: Use `get_dataset_details` para obter uma visão geral das tabelas disponíveis e identificar as mais relevantes.
-3. **Examine as tabelas**: Use `get_table_details` para entender as colunas, a cobertura temporal (`temporal_coverage`) e relações com outras tabelas (`reference_table_id`).
-4. **Construa e execute a consulta SQL**: Com base nos metadados, construa e execute uma consulta para responder à pergunta. Siga rigorosamente o **Protocolo de Consultas SQL**, que detalha como lidar com cobertura temporal e como usar JOINs com tabelas de referência (preferencialmente) ou a ferramenta `decode_table_values` (como alternativa) para colunas codificadas.
+3. **Examine as tabelas**: Use `get_table_details` para obter os detalhes de uma tabela. Preste atenção no período de cobertura (`period_start` e `period_end`), nas colunas particionadas (`partitioned_by`), e identifique quais colunas precisam de tradução (`reference_table_id` e `needs_decoding`).
+4. **Construa e execute a consulta SQL**: Com base nos metadados, construa e execute uma consulta para responder à pergunta. Siga rigorosamente o **Protocolo de Consultas SQL**, que detalha como lidar com o período de cobertura das tabelas e com colunas codificadas.
 5. Se uma ferramenta falhar, analise o erro, ajuste a estratégia e tente novamente.
 
 ---
 
 # Regras de Fundamentação dos Fatos (CRÍTICO)
-**TODA** afirmação sobre dados específicos (números, estatísticas, nomes de datasets/tabelas/colunas, cobertura temporal, valores codificados) **deve** ser fundamentada pelos resultados de ferramentas obtidos nessa conversa. **NUNCA** responda citando dados específicos a partir do seu conhecimento prévio, nem invente valores plausíveis para preencher lacunas. Isso é **essencial** para que o usuário confie em você.
+**TODA** afirmação sobre dados específicos (números, estatísticas, nomes de datasets/tabelas/colunas, períodos de cobertura, valores codificados) **deve** ser fundamentada pelos resultados de ferramentas obtidos nessa conversa. **NUNCA** responda citando dados específicos a partir do seu conhecimento prévio, nem invente valores plausíveis para preencher lacunas. Isso é **essencial** para que o usuário confie em você.
+
+A data de corte do seu treinamento é anterior à data atual. Confie nos campos `period_start` / `period_end` retornados por `get_table_details` para saber o período de cobertura dos dados — **não** assuma que datas após o seu treinamento são inválidas.
 
 É permitido responder sem chamar ferramentas **apenas** quando:
 - Você está explicando a plataforma Base dos Dados ou suas próprias capacidades.
@@ -72,30 +74,40 @@
 # Protocolo de Consultas SQL
 - **Referencie IDs completos:** `projeto.dataset.tabela`.
 - **Selecione colunas específicas**: Não use `SELECT *`.
-- **Acesso read-only**: Não use `CREATE`, `ALTER`, `DROP`, `INSERT`, `UPDATE`, `DELETE`.
+- **Acesso read-only**: Somente instruções `SELECT` são permitidas.
+- **Particionamento**: Verifique o campo `partitioned_by` do resultado de `get_table_details`. Se a tabela for particionada, inclua sempre um filtro em pelo menos uma das colunas particionadas. Isso é **obrigatório** para reduzir os bytes processados — consultas sem esse filtro tendem a escanear a tabela inteira e podem ultrapassar o limite de processamento. Em consultas com `JOIN`, **cada** tabela particionada referenciada precisa do seu próprio filtro de partição — não basta filtrar apenas a tabela principal, pois as demais serão escaneadas integralmente.
 - **Estilo**: Use nomes de colunas específicos, `ORDER BY` e comentários SQL (`--`).
 
-## Cobertura Temporal
-Sempre que você estiver prestes a escrever uma consulta SQL que envolva uma dimensão temporal (colunas como `ano`, `mes`, `data`, `semestre`), siga este procedimento:
-1. Recupere o campo `temporal_coverage` do resultado de `get_table_details` para a tabela que será consultada.
-2. Se o usuário especificou um período:
-   - Valide que o período solicitado está contido dentro de `temporal_coverage`. Se não estiver, informe o usuário sobre o período disponível e ajuste a consulta.
-3. Se o usuário NÃO especificou um período:
-   - Extraia o valor final de `temporal_coverage` (ex.: o ano mais recente disponível).
-   - Utilize esse valor como filtro padrão na consulta (ex.: `WHERE ano = 2020`).
-   - Informe o usuário na resposta que você utilizou o período mais recente disponível.
-**NUNCA** execute `SELECT MIN(ano)`, `SELECT MAX(ano)` ou `SELECT DISTINCT ano` para descobrir o período disponível. O campo `temporal_coverage` é a fonte autoritativa sobre o período dos dados — use-o sempre.
-
-## Tabelas de Referência
-Sempre que você decidir usar uma coluna que possui o campo `reference_table_id`, siga este procedimento:
-1. Chame `get_table_details` passando esse ID para obter os detalhes da tabela de referência.
-2. Com os detalhes da tabela de referência em mãos, utilize-os para:
-   - Realizar JOINs na consulta SQL, conectando a coluna codificada à tabela de referência.
-   - Filtrar valores utilizando nomes legíveis (ex.: `WHERE nome_regiao = 'Nordeste'` em vez de `WHERE id_regiao = '2'`).
-   - Incluir nomes descritivos no `SELECT` para que o resultado seja compreensível.
-3. Se a tabela de referência não puder ser acessada, use `decode_table_values` como alternativa.
-4. Colunas com `reference_table_id` que não serão utilizadas na consulta não precisam ser resolvidas.
-**NUNCA** escreva consultas SQL que filtrem, agrupem ou exibam colunas codificadas sem antes resolver suas tabelas de referência. Valores codificados sem contexto tornam o resultado incompreensível.
+## Período de Cobertura
+Para qualquer consulta envolvendo uma dimensão temporal (colunas como `ano`, `mes`, `data`, `semestre`), use os campos `period_start` e `period_end` do resultado de `get_table_details` como fonte autoritativa do período disponível.
+
+O formato dos valores **varia por tabela** — pode ser um ano (`2024`), uma data (`'2026-04-12'`), etc. Use o valor **exatamente** como retornado, no filtro da coluna temporal correspondente (ano para anos, data para datas, etc.).
+
+- **Se o usuário especificou um período**: valide que está dentro de `[period_start, period_end]`. Se não estiver, informe o usuário sobre o período disponível e ajuste a consulta.
+- **Se o usuário NÃO especificou um período**: use `period_end` como filtro padrão. Informe o usuário na resposta que você utilizou o período mais recente disponível.
+
+**NUNCA** execute `SELECT MIN/MAX/DISTINCT` em colunas temporais para descobrir o período — `period_start`/`period_end` já contêm essa informação.
+
+## Colunas Codificadas
+Algumas colunas armazenam valores opacos (IDs, códigos numéricos, siglas, etc.) que devem ser traduzidos para nomes legíveis antes de aparecerem em **qualquer** consulta. Os metadados definem como traduzi-las:
+
+- **`reference_table_id` presente**: Chame `get_table_details` com esse ID e faça `JOIN` com a tabela de referência. Filtre, agregue e exiba valores pelos nomes legíveis (ex.: `WHERE nome_regiao = 'Nordeste'` em vez de `WHERE id_regiao = '2'`).
+- **`needs_decoding: true`**: Chame `decode_table_values` para obter o dicionário de chave/valor e traduzir os valores.
+
+Colunas codificadas não usadas na consulta não precisam ser traduzidas.
+
+**NUNCA** escreva consultas SQL que filtrem, agreguem ou exibam colunas codificadas sem antes traduzi-las. Valores codificados sem contexto tornam o resultado incompreensível e levam a filtros incorretos.
+
+## Resultado Vazio
+Quando `execute_bigquery_sql` retornar 0 linhas, revise os filtros:
+1. Para filtros em coluna categórica/codificada:
+   - Se a coluna tem `reference_table_id`, faça JOIN com a tabela de referência.
+   - Se a coluna tem `needs_decoding: true`, use `decode_table_values` para verificar os pares chave/valor.
+2. Para filtros temporais: revalide contra `period_start` / `period_end`.
+3. Para filtros em strings: considere case, acentos, zeros à esquerda (ex.: `'1'` vs `'01'`), espaços em branco.
+
+Somente depois de revisar os filtros, reescreva a consulta com valores verificados.
+Se após a revisão o resultado vazio for legítimo (os dados realmente não existem para o recorte solicitado), **pare de tentar e informe o usuário**.
 
 ---
 
@@ -122,5 +134,5 @@
 Antes de escrever a resposta final, você deve realizar uma revisão **estritamente interna**, verificando se todas as restrições mencionadas nas instruções foram cumpridas. Reflita:
 
 1. **Falha Crítica — Fundamentação**: Minha resposta está fundamentada em resultados obtidos através das ferramentas disponíveis?
-2. **Falha Crítica — Consultas SQL**: Executei as consultas SQL em conformidade com o **Protocolo de Consultas SQL**, atentando-me à cobertura temporal das tabelas e fazendo JOINs com tabelas de referência?
+2. **Falha Crítica — Consultas SQL**: Executei as consultas SQL em conformidade com o **Protocolo de Consultas SQL**, respeitando o período de cobertura das tabelas, fazendo JOINs com tabelas de referência e traduzindo colunas codificadas?
 3. **Falha Crítica — Resposta Final**: Inclui todos os elementos requeridos na resposta final?"""

From a433218b47a335482a61786b99dda9181988c811 Mon Sep 17 00:00:00 2001
From: vrtornisiello <vrtornisiello@gmail.com>
Date: Wed, 6 May 2026 10:20:46 -0300
Subject: [PATCH 4/5] fix: latent bug in `get_table_details` tool

---
 app/agent/tools/api.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/app/agent/tools/api.py b/app/agent/tools/api.py
index 14c6900..841eafc 100644
--- a/app/agent/tools/api.py
+++ b/app/agent/tools/api.py
@@ -23,15 +23,15 @@
 # maximum number of datasets returned on search
 PAGE_SIZE = 10
 
-# url for searching datasets
+# directory datasets to skip
+SKIP_DIRECTORY_DATASETS = {"br_bd_diretorios_data_tempo"}
+
+# URL for searching datasets
 SEARCH_URL = f"{settings.BASEDOSDADOS_BASE_URL}/search/"
 
 # URL for fetching dataset details
 GRAPHQL_URL = f"{settings.BASEDOSDADOS_BASE_URL}/graphql"
 
-# datasets to skip
-SKIP_DIRECTORY_DATASETS = {"br_bd_diretorios_data_tempo"}
-
 # URL for fetching usage guides
 BASE_USAGE_GUIDE_URL = "https://raw.githubusercontent.com/basedosdados/website/refs/heads/main/next/content/userGuide/pt"
 
@@ -252,7 +252,7 @@ async def get_table_details(table_id: str) -> str:
     table_id = table["id"].split("TableNode:")[-1]
     table_name = table["name"]
     table_description = table.get("description")
-    table_temporal_coverage = table.get("temporalCoverage", {})
+    table_temporal_coverage = table.get("temporalCoverage") or {}
 
     cloud_table_edges = table["cloudTables"]["edges"]
     if cloud_table_edges:

From 16e80432bee5bfeadd5562bfa54b28b7605c5430 Mon Sep 17 00:00:00 2001
From: vrtornisiello <vrtornisiello@gmail.com>
Date: Wed, 6 May 2026 10:22:30 -0300
Subject: [PATCH 5/5] test: add and update test cases for bigquery tools

---
 tests/app/agent/tools/test_api.py      | 125 ++++++++++++++++++++-----
 tests/app/agent/tools/test_bigquery.py |  33 ++++++-
 2 files changed, 136 insertions(+), 22 deletions(-)

diff --git a/tests/app/agent/tools/test_api.py b/tests/app/agent/tools/test_api.py
index 2d8b1ff..5a971a0 100644
--- a/tests/app/agent/tools/test_api.py
+++ b/tests/app/agent/tools/test_api.py
@@ -4,7 +4,12 @@
 import pytest
 import respx
 
-from app.agent.tools.api import get_dataset_details, get_table_details, search_datasets
+from app.agent.tools.api import (
+    SKIP_DIRECTORY_DATASETS,
+    get_dataset_details,
+    get_table_details,
+    search_datasets,
+)
 from app.settings import settings
 
 
@@ -21,7 +26,6 @@ async def test_search_datasets_returns_overviews(self):
                 {
                     "id": "dataset-1",
                     "name": "Test Dataset",
-                    "slug": "test_dataset",
                     "description": "Dataset description",
                     "tags": [{"name": "tag1"}, {"name": "tag2"}],
                     "themes": [{"name": "theme1"}, {"name": "theme2"}],
@@ -43,7 +47,6 @@ async def test_search_datasets_returns_overviews(self):
 
         assert dataset["id"] == "dataset-1"
         assert dataset["name"] == "Test Dataset"
-        assert dataset["slug"] == "test_dataset"
         assert dataset["description"] == "Dataset description"
         assert dataset["tags"] == ["tag1", "tag2"]
         assert dataset["themes"] == ["theme1", "theme2"]
@@ -77,7 +80,6 @@ def mock_response(self):
                             "node": {
                                 "id": "DatasetNode:dataset-1",
                                 "name": "Test Dataset",
-                                "slug": "test_dataset",
                                 "description": "Dataset description",
                                 "tags": {"edges": [{"node": {"name": "tag1"}}]},
                                 "themes": {"edges": [{"node": {"name": "theme1"}}]},
@@ -92,7 +94,6 @@ def mock_response(self):
                                             "node": {
                                                 "id": "TableNode:table-1",
                                                 "name": "Test Table",
-                                                "slug": "test_table",
                                                 "description": "Table description",
                                                 "temporalCoverage": {
                                                     "start": "2020",
@@ -138,7 +139,6 @@ async def test_get_dataset_details_success(self, mock_response):
 
         assert dataset["id"] == "dataset-1"
         assert dataset["name"] == "Test Dataset"
-        assert dataset["slug"] == "test_dataset"
         assert dataset["description"] == "Dataset description"
         assert dataset["tags"] == ["tag1"]
         assert dataset["themes"] == ["theme1"]
@@ -152,9 +152,7 @@ async def test_get_dataset_details_success(self, mock_response):
         assert table["id"] == "table-1"
         assert table["gcp_id"] == "basedosdados.test_dataset.test_table"
         assert table["name"] == "Test Table"
-        assert table["slug"] == "test_table"
         assert table["description"] == "Table description"
-        assert table["temporal_coverage"] == {"start": "2020", "end": "2023"}
 
     @respx.mock
     async def test_get_dataset_details_success_with_usage_guide(self, mock_response):
@@ -173,8 +171,8 @@ async def test_get_dataset_details_success_with_usage_guide(self, mock_response)
         assert dataset["usage_guide"] == "# This is a usage guide."
 
     @respx.mock
-    async def test_table_without_tags_themes_orgs(self):
-        """Test dataset with table that has no tags, themes and orgs."""
+    async def test_get_dataset_details_without_tags_themes_orgs(self):
+        """Test dataset details that has no tags, themes and orgs."""
         mock_response = {
             "data": {
                 "allDataset": {
@@ -183,7 +181,6 @@ async def test_table_without_tags_themes_orgs(self):
                             "node": {
                                 "id": "dataset-1",
                                 "name": "Test Dataset",
-                                "slug": "test_dataset",
                                 "description": "Dataset description",
                                 "tags": {"edges": [{"node": {}}]},
                                 "themes": {"edges": [{"node": {}}]},
@@ -194,7 +191,6 @@ async def test_table_without_tags_themes_orgs(self):
                                             "node": {
                                                 "id": "table-1",
                                                 "name": "Test Table",
-                                                "slug": "test_table",
                                                 "description": "Table description",
                                                 "temporalCoverage": {
                                                     "start": "2020",
@@ -239,7 +235,7 @@ async def test_table_without_tags_themes_orgs(self):
 
     @respx.mock
     async def test_table_without_cloud_tables(self):
-        """Test dataset with table that has no cloud tables."""
+        """Test dataset details with table that has no cloud tables."""
         mock_response = {
             "data": {
                 "allDataset": {
@@ -325,7 +321,6 @@ def mock_response(self):
                             "node": {
                                 "id": "TableNode:table-1",
                                 "name": "Test Table",
-                                "slug": "test_table",
                                 "description": "Table description",
                                 "temporalCoverage": {
                                     "start": "2020",
@@ -350,6 +345,8 @@ def mock_response(self):
                                                 "name": "peso_liquido",
                                                 "description": "Peso líquido",
                                                 "measurementUnit": "kg",
+                                                "coveredByDictionary": False,
+                                                "isPartition": False,
                                                 "bigqueryType": {"name": "FLOAT64"},
                                                 "directoryPrimaryKey": None,
                                             }
@@ -357,13 +354,67 @@ def mock_response(self):
                                         {
                                             "node": {
                                                 "id": "col-2",
+                                                "name": "status",
+                                                "description": "Status",
+                                                "measurementUnit": None,
+                                                "coveredByDictionary": True,
+                                                "isPartition": False,
+                                                "bigqueryType": {"name": "STRING"},
+                                                "directoryPrimaryKey": None,
+                                            }
+                                        },
+                                        {
+                                            "node": {
+                                                "id": "col-3",
                                                 "name": "id_municipio",
                                                 "description": "ID do município",
                                                 "measurementUnit": None,
+                                                "coveredByDictionary": False,
+                                                "isPartition": False,
                                                 "bigqueryType": {"name": "STRING"},
                                                 "directoryPrimaryKey": {
                                                     "table": {
-                                                        "id": "TableNode:dir-table-1"
+                                                        "id": "TableNode:dir-table-1",
+                                                        "cloudTables": {
+                                                            "edges": [
+                                                                {
+                                                                    "node": {
+                                                                        "gcpDatasetId": "directory_dataset",
+                                                                        "gcpTableId": "directory_table",
+                                                                    }
+                                                                }
+                                                            ]
+                                                        },
+                                                    }
+                                                },
+                                            }
+                                        },
+                                        {
+                                            "node": {
+                                                "id": "col-4",
+                                                "name": "ano",
+                                                "description": "Ano",
+                                                "measurementUnit": None,
+                                                "coveredByDictionary": False,
+                                                "isPartition": True,
+                                                "bigqueryType": {"name": "INT64"},
+                                                "directoryPrimaryKey": {
+                                                    "table": {
+                                                        "id": "TableNode:dir-table-2",
+                                                        "cloudTables": {
+                                                            "edges": [
+                                                                {
+                                                                    "node": {
+                                                                        "gcpDatasetId": next(
+                                                                            iter(
+                                                                                SKIP_DIRECTORY_DATASETS
+                                                                            )
+                                                                        ),
+                                                                        "gcpTableId": "ano",
+                                                                    }
+                                                                }
+                                                            ]
+                                                        },
                                                     }
                                                 },
                                             }
@@ -390,23 +441,55 @@ async def test_get_table_details_success(self, mock_response):
         assert table["id"] == "table-1"
         assert table["gcp_id"] == "basedosdados.test_dataset.test_table"
         assert table["name"] == "Test Table"
-        assert table["slug"] == "test_table"
         assert table["description"] == "Table description"
-        assert table["temporal_coverage"] == {"start": "2020", "end": "2023"}
+        assert table["period_start"] == "2020"
+        assert table["period_end"] == "2023"
+        assert table["partitioned_by"] == ["ano"]
 
-        assert len(table["columns"]) == 2
+        assert len(table["columns"]) == 4
 
         assert table["columns"][0]["name"] == "peso_liquido"
         assert table["columns"][0]["type"] == "FLOAT64"
         assert table["columns"][0]["description"] == "Peso líquido"
         assert table["columns"][0]["unit"] == "kg"
+        assert table["columns"][0]["needs_decoding"] is False
         assert "reference_table_id" not in table["columns"][0]
 
-        assert table["columns"][1]["name"] == "id_municipio"
+        assert table["columns"][1]["name"] == "status"
         assert table["columns"][1]["type"] == "STRING"
-        assert table["columns"][1]["description"] == "ID do município"
-        assert table["columns"][1]["reference_table_id"] == "dir-table-1"
+        assert table["columns"][1]["description"] == "Status"
+        assert table["columns"][1]["needs_decoding"] is True
         assert "unit" not in table["columns"][1]
+        assert "reference_table_id" not in table["columns"][1]
+
+        assert table["columns"][2]["name"] == "id_municipio"
+        assert table["columns"][2]["type"] == "STRING"
+        assert table["columns"][2]["description"] == "ID do município"
+        assert table["columns"][2]["reference_table_id"] == "dir-table-1"
+        assert table["columns"][2]["needs_decoding"] is False
+        assert "unit" not in table["columns"][2]
+
+        assert table["columns"][3]["name"] == "ano"
+        assert table["columns"][3]["type"] == "INT64"
+        assert table["columns"][3]["description"] == "Ano"
+        assert table["columns"][3]["needs_decoding"] is False
+        assert "reference_table_id" not in table["columns"][3]
+        assert "unit" not in table["columns"][3]
+
+    @respx.mock
+    async def test_get_table_details_null_temporal_coverage(self, mock_response):
+        """Test table details when temporalCoverage is null."""
+        mock_response["data"]["allTable"]["edges"][0]["node"]["temporalCoverage"] = None
+
+        respx.post(self.GRAPHQL_URL).mock(
+            return_value=httpx.Response(200, json=mock_response)
+        )
+
+        result = await get_table_details.ainvoke({"table_id": "table-1"})
+        table = json.loads(result)
+
+        assert table["period_start"] is None
+        assert table["period_end"] is None
 
     @respx.mock
     async def test_get_table_details_without_cloud_tables(self, mock_response):
diff --git a/tests/app/agent/tools/test_bigquery.py b/tests/app/agent/tools/test_bigquery.py
index 6547341..b48ea9a 100644
--- a/tests/app/agent/tools/test_bigquery.py
+++ b/tests/app/agent/tools/test_bigquery.py
@@ -46,6 +46,37 @@ def test_successful_query(self, mocker: MockerFixture, mock_config: dict):
 
         assert output == [{"col1": "value1"}, {"col1": "value2"}]
 
+    def test_successful_query_empty_result(
+        self, mocker: MockerFixture, mock_config: dict
+    ):
+        """Test successful SELECT query execution with empty result"""
+        mock_dry_run_query_job = MagicMock()
+        mock_dry_run_query_job.statement_type = "SELECT"
+
+        mock_query_job = MagicMock()
+        mock_query_job.result.return_value = []
+
+        mock_bigquery_client = MagicMock(spec=bq.Client)
+        mock_bigquery_client.query.side_effect = [
+            mock_dry_run_query_job,
+            mock_query_job,
+        ]
+
+        mocker.patch(
+            "app.agent.tools.bigquery._get_client", return_value=mock_bigquery_client
+        )
+
+        result = execute_bigquery_sql.invoke(
+            {"sql_query": "SELECT * FROM project.dataset.table", "config": mock_config}
+        )
+
+        output = json.loads(result)
+
+        assert (
+            output
+            == "Query returned 0 rows. Review the filters per the empty-result protocol."
+        )
+
     def test_forbidden_statement_type(self, mocker: MockerFixture, mock_config: dict):
         """Test error when statement is not SELECT."""
         mock_dry_run_query_job = MagicMock()
@@ -97,7 +128,7 @@ def test_bytes_billed_limit_exceeded(
         assert output["status"] == "error"
         assert output["message"] == (
             f"Query exceeds the {MAX_BYTES_BILLED // 10**9}GB processing limit. "
-            "Add WHERE filters or select fewer columns."
+            "Filter by partitioned columns."
         )
 
     def test_google_api_error_reraise(self, mocker: MockerFixture, mock_config: dict):