In [None]:
import duckdb
import os
from pathlib import Path
import pandas as pd

In [None]:
# Recursively find all .duckdb files in ../tiger_data and select the most recent one
base_dir = Path("../tiger_data").resolve()
duckdb_files = []
for root, dirs, files in os.walk(base_dir):
    for file in files:
        if file.endswith(".duckdb"):
            duckdb_files.append(Path(root) / file)

if not duckdb_files:
    print("No DuckDB files found.")
    db_file = None
else:
    # Sort by modification time, descending
    duckdb_files.sort(key=lambda p: p.stat().st_mtime, reverse=True)
    db_file = duckdb_files[0]
    print(f"Most recent DuckDB file: {db_file}")


In [None]:
db_conn = duckdb.connect(database=str(db_file) if db_file else ':memory:')

In [None]:
# List all tables in the selected DuckDB file
table_list = []
if db_file:
    db_conn = duckdb.connect(database=str(db_file) if db_file else ':memory:')
    with db_conn as conn:
        result = conn.execute("SHOW TABLES").fetchall()
        table_list = [row[0] for row in result]
    print("Tables in the database:")
    for t in table_list:
        print(t)
else:
    print("No DuckDB file selected.")


In [None]:
table_list

In [None]:
# Query all tables to DataFrames with dynamic names
table_dfs = {}
if db_file:
    with duckdb.connect(database=str(db_file)) as conn:
        for t in table_list:
            print(f"Table: {t}")
            df = conn.execute(f"SELECT * FROM {t} LIMIT 10").df()
            table_dfs[f"df_{t}"] = df
            globals()[f"df_{t}"] = df  # Assign to notebook namespace
            display(df)
else:
    print("No DuckDB file selected.")


In [None]:
db_conn.close()