In [3]:
import os

from pathlib import Path

import duckdb
import pandas as pd

from dotenv import load_dotenv

load_dotenv()

True

In [4]:
DB_PATH = Path(os.getenv("SILVER_DUCKDB_PATH", ""))
DB_PATH.parent.mkdir(parents=True, exist_ok=True)

TMP_DIR = "/tmp/duckdb_tmp"  # make sure it exists

con = duckdb.connect(DB_PATH)
SAMPLE_ROWS = 10             # how many rows per table
INCLUDE_VIEWS = False        # set True to include views

In [5]:

tables = con.execute("""
    SELECT table_schema, table_name
    FROM information_schema.tables
    WHERE table_type = 'BASE TABLE'
    ORDER BY table_schema, table_name
""").fetchall()


In [6]:

md = []
md.append("# DuckDB Schema & Data Preview\n")

for schema, name in tables:
    full = f'"{schema}"."{name}"' if schema not in ('main', '') else f'"{name}"'

    # Get schema info (columns + types)
    cols = con.execute(f"PRAGMA table_info({full})").fetchdf()
    col_defs = ",\n  ".join(f"{row['name']} {row['type']}" for _, row in cols.iterrows())
    ddl = f"CREATE TABLE {full} (\n  {col_defs}\n);"

    # Sample data
    df = con.execute(f"SELECT * FROM {full} LIMIT {SAMPLE_ROWS}").fetchdf()

    md.append(f"\n## {schema}.{name}\n")
    md.append("**Create statement:**")
    md.append("\n```sql\n" + ddl + "\n```\n")

    if df.empty:
        md.append("_No rows to preview._\n")
    else:
        # Convert dataframe to markdown table
        df_display = df.copy()
        df_display = df_display.applymap(lambda x: (str(x)[:200] + "…") if isinstance(x, str) and len(x) > 200 else x)

        headers = "| " + " | ".join(map(str, df_display.columns)) + " |"
        sep = "| " + " | ".join(["---"] * len(df_display.columns)) + " |"
        rows = ["| " + " | ".join("" if pd.isna(v) else str(v) for v in row) + " |" for row in df_display.itertuples(index=False)]
        md.extend([headers, sep, *rows, ""])

# Write Markdown to file
with open("duckdb_schema_preview.md", "w", encoding="utf-8") as f:
    f.write("\n".join(md))

print("\n".join(md))

  df_display = df_display.applymap(lambda x: (str(x)[:200] + "…") if isinstance(x, str) and len(x) > 200 else x)
  df_display = df_display.applymap(lambda x: (str(x)[:200] + "…") if isinstance(x, str) and len(x) > 200 else x)
  df_display = df_display.applymap(lambda x: (str(x)[:200] + "…") if isinstance(x, str) and len(x) > 200 else x)
  df_display = df_display.applymap(lambda x: (str(x)[:200] + "…") if isinstance(x, str) and len(x) > 200 else x)
  df_display = df_display.applymap(lambda x: (str(x)[:200] + "…") if isinstance(x, str) and len(x) > 200 else x)
  df_display = df_display.applymap(lambda x: (str(x)[:200] + "…") if isinstance(x, str) and len(x) > 200 else x)
  df_display = df_display.applymap(lambda x: (str(x)[:200] + "…") if isinstance(x, str) and len(x) > 200 else x)
  df_display = df_display.applymap(lambda x: (str(x)[:200] + "…") if isinstance(x, str) and len(x) > 200 else x)
  df_display = df_display.applymap(lambda x: (str(x)[:200] + "…") if isinstance(x, str) and len(

# DuckDB Schema & Data Preview


## main.autores_camara

**Create statement:**

```sql
CREATE TABLE "autores_camara" (
  id_autor BIGINT,
  cod_tipo BIGINT,
  uri VARCHAR,
  ordem_assinatura INTEGER,
  proponente BOOLEAN,
  id_proposicao BIGINT,
  year BIGINT
);
```

| id_autor | cod_tipo | uri | ordem_assinatura | proponente | id_proposicao | year |
| --- | --- | --- | --- | --- | --- | --- |
| 1 | 10000 | https://dadosabertos.camara.leg.br/api/v2/deputados/73538 | 1 | True | 15009 | 2020 |
| 2 | 10000 | https://dadosabertos.camara.leg.br/api/v2/deputados/73910 | 1 | True | 15532 | 2020 |
| 3 | 10000 | https://dadosabertos.camara.leg.br/api/v2/deputados/74316 | 1 | True | 15749 | 2020 |
| 4 | 20 | https://dadosabertos.camara.leg.br/api/v2/orgaos/262 | 1 | True | 15990 | 2020 |
| 5 | 40000 | https://dadosabertos.camara.leg.br/api/v2/orgaos/78 | 1 | True | 16481 | 2020 |
| 6 | 40000 | https://dadosabertos.camara.leg.br/api/v2/orgaos/78 | 1 | True | 16969 | 2020 |
| 7 | 10000 | https://d