In [2]:
import duckdb
import pandas as pd
import matplotlib.pyplot as plt

# -------------------------------------------------------------------
# üîå Connect to DuckDB database (adjust path to your project)
# -------------------------------------------------------------------
con = duckdb.connect("dbt_models/dev.duckdb")

# Load extensions for S3/MinIO access (optional)
con.execute("INSTALL httpfs;")
con.execute("LOAD httpfs;")

# Configure S3/MinIO credentials (‚ö° update if you want direct MinIO)
con.execute("SET s3_endpoint='localhost:9000';")
con.execute("SET s3_url_style='path';")
con.execute("SET s3_use_ssl=false;")
con.execute("SET s3_access_key_id='minioadmin';")
con.execute("SET s3_secret_access_key='minioadmin';")

# -------------------------------------------------------------------
# üìä Explore the stocks_prices table
# -------------------------------------------------------------------
print("Available Tables:")
print(con.execute("SHOW TABLES").fetchdf())

# Simple preview
stocks_df = con.execute("SELECT ticker, price, ts, volume FROM stocks_prices LIMIT 20").df()
stocks_df.head()



In [1]:
# -------------------------------------------------------------------
# üßê Aggregations
# -------------------------------------------------------------------
# Count by symbol
symbol_counts = con.execute("""
    SELECT ticker, COUNT(*) AS num_records
    FROM stocks_prices
    GROUP BY ticker
    ORDER BY num_records DESC
""").df()

# Plot top 10 symbols
symbol_counts.head(10).plot(
    x='symbol', y='num_records', kind='bar', legend=False, figsize=(8,4)
)
plt.title("Top 10 Symbols by Record Count")
plt.xlabel("Symbol")
plt.ylabel("Records")
plt.show()



NameError: name 'con' is not defined

In [None]:
# -------------------------------------------------------------------
# üìà Time-series Example (for one stock)
# -------------------------------------------------------------------
one_stock = con.execute("""
    SELECT ts::DATE AS dt, AVG(price) AS avg_price
    FROM stocks_prices
    WHERE ticker = 'AAPL'
    GROUP BY dt
    ORDER BY dt
""").df()

one_stock.plot(x='dt', y='avg_price', kind='line', figsize=(10,4))
plt.title("AAPL Average Price Over Time")
plt.xlabel("Date")
plt.ylabel("Average Price")
plt.grid(True)
plt.show()

# -------------------------------------------------------------------
# ‚úÖ Done ‚Äî extend with your own queries below
# -------------------------------------------------------------------
