# MinIO + DuckDB SPY 1-min Dashboard
This notebook reads daily SPY 1-minute bars directly from MinIO (S3-compatible) using DuckDB httpfs and plots a candlestick + volume chart.

In [1]:
# Imports and Plotly renderer
import os
import duckdb
import pandas as pd
import plotly.graph_objects as go
import plotly.io as pio
pio.renderers.default = os.getenv('PLOTLY_RENDERER', 'plotly_mimetype')  # 'vscode' also works in VS Code

In [2]:
# Configure DuckDB httpfs from environment (.env)
S3_ENDPOINT = (os.getenv('S3_ENDPOINT_URL', '127.0.0.1:9100').replace('http://','').replace('https://',''))
S3_REGION = os.getenv('S3_REGION', 'us-east-1')
S3_USE_SSL = os.getenv('S3_USE_SSL', 'false')
S3_AK = os.getenv('S3_ACCESS_KEY_ID') or os.getenv('MINIO_ROOT_USER', 'minioadmin')
S3_SK = os.getenv('S3_SECRET_ACCESS_KEY') or os.getenv('MINIO_ROOT_PASSWORD', 'minioadmin')
BUCKET = os.getenv('MINIO_BUCKET', 'antman-lake')

con = duckdb.connect()
con.execute('INSTALL httpfs; LOAD httpfs;')
con.execute("SET s3_url_style='path';")
con.execute(f"SET s3_use_ssl='{S3_USE_SSL}';")
con.execute(f"SET s3_region='{S3_REGION}';")
con.execute(f"SET s3_endpoint='{S3_ENDPOINT}';")
con.execute("SET s3_access_key_id=$1;", [S3_AK])
con.execute("SET s3_secret_access_key=$1;", [S3_SK])
print('Configured DuckDB httpfs for MinIO at', S3_ENDPOINT)
print('Bucket:', BUCKET)

Configured DuckDB httpfs for MinIO at localhost:9100
Bucket: antman-lake


In [3]:
# Helper: load one trading day from MinIO
def load_day(dt_str: str, ticker: str = 'SPY') -> pd.DataFrame:
    path = f"s3://{BUCKET}/silver/symbol={ticker}/resolution=1min/dt={dt_str}/*.parquet"
    try:
        df = con.execute("SELECT * FROM read_parquet($1, filename=true)", [path]).df()
    except Exception as e:
        print('read_parquet failed:', e)
        return pd.DataFrame()
    # Normalize timestamp column
    ts_col = None
    for c in ['ts','utc_timestamp','timestamp','datetime']:
        if c in df.columns:
            ts_col = c; break
    if ts_col is None:
        return pd.DataFrame()
    df['ts'] = pd.to_datetime(df[ts_col], utc=True, errors='coerce')
    kept = [c for c in ['ts','open','high','low','close','volume'] if c in df.columns]
    df = df[kept].dropna(subset=['ts']).sort_values('ts').reset_index(drop=True)
    df['ts'] = df['ts'].dt.round('ms')
    return df

In [4]:
# Load a sample date
dt = os.getenv('SAMPLE_DT', '2025-03-11')
df = load_day(dt)
print(f'Loaded {len(df)} rows for {dt}')
df.head()

Loaded 390 rows for 2025-03-11


Unnamed: 0,ts,open,high,low,close,volume
0,2025-03-11 14:30:00+00:00,555.95,557.24,555.9,556.63,318164
1,2025-03-11 14:31:00+00:00,556.625,557.03,556.4928,556.6,142751
2,2025-03-11 14:32:00+00:00,556.57,556.71,556.28,556.31,111371
3,2025-03-11 14:33:00+00:00,556.3,556.5,556.1087,556.435,126317
4,2025-03-11 14:34:00+00:00,556.45,556.48,555.8,556.11,157529


In [5]:
# Plot candlestick + volume
if not df.empty:
    fig = go.Figure(data=[go.Candlestick(x=df['ts'], open=df.get('open'), high=df.get('high'), low=df.get('low'), close=df.get('close'), name='SPY')])
    if 'volume' in df.columns:
        fig.add_trace(go.Bar(x=df['ts'], y=df['volume'], name='Volume', yaxis='y2', marker_color='lightgray', opacity=0.4))
        fig.update_layout(yaxis2=dict(overlaying='y', side='right', showgrid=False, range=[0, float(df['volume'].max())*4]))
    fig.update_layout(title=f'SPY 1-min {dt}', xaxis_title='Time', yaxis_title='Price', xaxis_rangeslider_visible=False, template='plotly_white', height=600)
    fig.show()
else:
    print('No data to plot; try another dt or check MinIO path.')

## Tips
- MinIO console: http://localhost:9101
- Ensure `.env` has: `MINIO_BUCKET`, `S3_ENDPOINT_URL=127.0.0.1:9100`, and credentials.
- Weekends/holidays may return empty days; pick a trading day.

## Interactive TA dashboard (EMAs, Bollinger, RSI, MACD)

Pick a trading day and render a multi-panel chart using data loaded directly from MinIO via DuckDB.

In [None]:
import numpy as np
import ipywidgets as W
from IPython.display import display
import traceback
import plotly.io as pio

# TA helpers

def ema(s, span):
    return s.ewm(span=span, adjust=False).mean()

def bollinger(s, n=20, k=2.0):
    m = s.rolling(n).mean()
    sd = s.rolling(n).std(ddof=0)
    return m, m + k*sd, m - k*sd

def rsi(s, n=14):
    d = s.diff()
    up = d.clip(lower=0)
    dn = -d.clip(upper=0)
    roll_up = up.ewm(alpha=1/n, adjust=False).mean()
    roll_dn = dn.ewm(alpha=1/n, adjust=False).mean()
    rs = np.where(roll_dn==0, np.nan, roll_up/roll_dn)
    rsi = 100 - (100/(1+rs))
    return pd.Series(rsi, index=s.index)

print(f"[setup] plotly renderer={pio.renderers.default}")

# UI widgets
_dt = W.Text(value=os.getenv('SAMPLE_DT', '2025-03-11'), description='Date (YYYY-MM-DD):')
_ema_fast = W.IntSlider(value=9, min=3, max=50, step=1, description='EMA fast')
_ema_slow = W.IntSlider(value=21, min=5, max=100, step=1, description='EMA slow')
_bb_n = W.IntSlider(value=20, min=10, max=50, step=1, description='BB n')
_bb_k = W.FloatSlider(value=2.0, min=1.0, max=3.0, step=0.1, description='BB k')

_out = W.Output()


def render(*_):
    with _out:
        _out.clear_output(wait=True)
        try:
            print(f"[render] dt={_dt.value}")
            df = load_day(_dt.value)
            print(f"[render] loaded rows={len(df)} cols={list(df.columns)}")
            if df.empty:
                print('No data for', _dt.value)
                return
            # Indicators
            if 'close' in df.columns:
                df['ema_fast'] = ema(df['close'], _ema_fast.value)
                df['ema_slow'] = ema(df['close'], _ema_slow.value)
                m, up, dn = bollinger(df['close'], _bb_n.value, _bb_k.value)
                df['bb_mid'] = m
                df['bb_up'] = up
                df['bb_dn'] = dn
                r = rsi(df['close'], 14)
                print(
                    f"[render] indicators ok: ema_fast={_ema_fast.value}, ema_slow={_ema_slow.value}, bb_n={_bb_n.value}, bb_k={_bb_k.value}"
                )
                print(f"[render] ts range: {df['ts'].min()} -> {df['ts'].max()}")
            else:
                print('Missing close column; columns=', list(df.columns))
                return
            # Price pane
            fig = go.Figure()
            fig.add_trace(
                go.Candlestick(
                    x=df['ts'],
                    open=df.get('open'),
                    high=df.get('high'),
                    low=df.get('low'),
                    close=df.get('close'),
                    name='Price',
                )
            )
            fig.add_trace(
                go.Scatter(x=df['ts'], y=df['ema_fast'], name=f'EMA{_ema_fast.value}', line=dict(width=1))
            )
            fig.add_trace(
                go.Scatter(x=df['ts'], y=df['ema_slow'], name=f'EMA{_ema_slow.value}', line=dict(width=1))
            )
            fig.add_trace(
                go.Scatter(x=df['ts'], y=df['bb_up'], name='BB up', line=dict(width=1, color='gray'))
            )
            fig.add_trace(
                go.Scatter(x=df['ts'], y=df['bb_mid'], name='BB mid', line=dict(width=1, color='lightgray'))
            )
            fig.add_trace(
                go.Scatter(x=df['ts'], y=df['bb_dn'], name='BB dn', line=dict(width=1, color='gray'))
            )
            # Volume on secondary axis
            if 'volume' in df.columns:
                fig.add_trace(
                    go.Bar(
                        x=df['ts'],
                        y=df['volume'],
                        name='Volume',
                        yaxis='y2',
                        marker_color='lightgray',
                        opacity=0.4,
                    )
                )
                fig.update_layout(
                    yaxis2=dict(
                        overlaying='y', side='right', showgrid=False, range=[0, float(df['volume'].max()) * 4]
                    )
                )
            fig.update_layout(
                title=f'SPY {_dt.value}',
                xaxis_title='Time',
                yaxis_title='Price',
                xaxis_rangeslider_visible=False,
                template='plotly_white',
                height=700,
            )
            # Important: use display(fig) so output is captured by the Output widget
            display(fig)

            # RSI pane
            fig2 = go.Figure()
            fig2.add_trace(go.Scatter(x=df['ts'], y=r, name='RSI'))
            fig2.add_hline(y=70, line_dash='dot', line_color='red')
            fig2.add_hline(y=30, line_dash='dot', line_color='green')
            fig2.update_layout(height=200, template='plotly_white', title='RSI (14)')
            display(fig2)
        except Exception as e:
            print('[render] error:', repr(e))
            traceback.print_exc()

for w in (_dt, _ema_fast, _ema_slow, _bb_n, _bb_k):
    w.observe(render, names='value')

# Explicitly display the widget container; avoid losing it by having another last expression
_box = W.VBox([_dt, W.HBox([_ema_fast, _ema_slow, _bb_n, _bb_k]), _out])
display(_box)

# Initial render
render()
