# DuckDB Example Queries

This notebook demonstrates how to connect to and query the DuckDB database.

In [None]:

import duckdb

In [None]:
# Connect to DuckDB database
db_path = "/home/jovyan/work/data/duckdb/cct_env.duckdb"
con = duckdb.connect(db_path)
print(f"Connected to DuckDB at: {db_path}")

In [None]:
# Show all tables
tables = con.execute("SHOW TABLES").fetchall()
print("Available tables:")
for table in tables:
    print(f"  - {table[0]}")

In [None]:
# Describe table schemas
print("Station dimension table schema:")
con.execute("DESCRIBE dim_station").df()

In [None]:
print("Measurement fact table schema:")
con.execute("DESCRIBE fact_measurement").df()

In [None]:
# Sample queries - these will work once data is loaded

# Count of measurements
print("Total measurements:")
result = con.execute("SELECT COUNT(*) as total_measurements FROM fact_measurement").fetchone()
print(f"  {result[0]} measurements")

# Count of stations
print("\nTotal stations:")
result = con.execute("SELECT COUNT(*) as total_stations FROM dim_station").fetchone()
print(f"  {result[0]} stations")

In [None]:
# Example: Latest measurements per station (when data is available)
query = """
SELECT 
    s.name as station_name,
    m.metric,
    m.value,
    m.unit,
    m.ts as timestamp
FROM fact_measurement m
JOIN dim_station s ON m.station_id = s.station_id
WHERE m.ts = (
    SELECT MAX(ts) 
    FROM fact_measurement m2 
    WHERE m2.station_id = m.station_id AND m2.metric = m.metric
)
ORDER BY s.name, m.metric
LIMIT 10;
"""

# Convert to pandas DataFrame for better display
df = con.execute(query).df()
print(f"Latest measurements (showing {len(df)} results):")
df

In [None]:
# Close connection when done
con.close()
print("Connection closed.")