# Heatmpy pro BG imbalances

In [4]:
import pandas as pd
import dash
from dash import dcc, html, ctx
from dash.dependencies import Input, Output
import dash_bootstrap_components as dbc
import plotly.express as px

# ---------- 1. Naƒçten√≠ dat ----------
df = pd.read_csv("bg_imbalance_prices_dash.csv")
df['date'] = pd.to_datetime(df['date'], errors='coerce')
df['price_direction'] = df['price_direction'].replace('zero', 'positive')
df['weekday'] = df['date'].dt.day_name()

# ---------- 2. Inicializace aplikace ----------
app = dash.Dash(__name__, external_stylesheets=[dbc.themes.BOOTSTRAP])
app.title = "Interaktivn√≠ Heatmapa BG"

# ---------- 3. Layout ----------
app.layout = dbc.Container([
    html.H1("üìà Interaktivn√≠ heatmapa bulharsk√Ωch odchylek", className="text-center my-4"),

    # V√Ωbƒõr metriky a datumu
    dbc.Row([
        dbc.Col([
            html.Label("Vyber metodu agregace:", className="fw-bold mb-1"),
            dcc.Dropdown(
                id='metric',
                options=[
                    {'label': 'üìâ Pod√≠l negativn√≠ch (%)', 'value': 'percent_negative'},
                    {'label': 'üìà Pod√≠l pozitivn√≠ch (%)', 'value': 'percent_positive'},
                    {'label': 'üí∂ Pr≈Ømƒõrn√° cena (EUR/MWh)', 'value': 'avg_price'},
                    {'label': 'üìä Smƒõrodatn√° odchylka ceny', 'value': 'std_price'},
                    {'label': 'üî¢ Poƒçet v≈°ech z√°znam≈Ø', 'value': 'total'},
                ],
                value='percent_negative',
                clearable=False,
                className='mb-4'
            ),

            html.Label("Vyber rozsah datumu:", className="fw-bold mb-1"),
            dcc.DatePickerRange(
                id='date-range',
                min_date_allowed=df['date'].min().date(),
                max_date_allowed=df['date'].max().date(),
                start_date=df['date'].min().date(),
                end_date=df['date'].max().date(),
                display_format='DD.MM.YYYY',
                className='mb-3',
                style={
                    'backgroundColor': 'white',
                    'padding': '8px',
                    'borderRadius': '5px',
                    'border': '1px solid #ced4da',
                    'width': '100%'
                }
            ),

            html.Div(id='date-summary', className='mt-2', style={'fontStyle': 'italic'}),
            html.Button("Reset filtru", id='reset-button', n_clicks=0,
                        className='btn btn-outline-secondary mt-2')
        ], width=6)
    ]),

    # Prvn√≠ heatmapa
    dbc.Row([
        dbc.Col([
            dcc.Graph(id='heatmap', config={'displayModeBar': False})
        ])
    ]),

    html.Hr(),

    # Druh√° heatmapa: den v t√Ωdnu √ó perioda
    html.H3("üìä Heatmapa smƒõru ceny podle dne v t√Ωdnu a periody", className="text-center my-4"),

    dbc.Row([
        dbc.Col([
            html.Label("Vyber smƒõr ceny:", className="fw-bold"),
            dcc.Dropdown(
                id='direction-type',
                options=[
                    {'label': 'üìâ Pod√≠l negativn√≠ch', 'value': 'negative'},
                    {'label': 'üìà Pod√≠l pozitivn√≠ch', 'value': 'positive'}
                ],
                value='negative',
                clearable=False,
                style={'width': '100%'}
            )
        ], width=4)
    ], className="mb-4"),

    dbc.Row([
        dbc.Col([
            dcc.Graph(id='direction-heatmap', config={'displayModeBar': False})
        ])
    ])
], fluid=True)

# ---------- 4. Callback: hlavn√≠ heatmapa + text o datumu ----------
@app.callback(
    Output('heatmap', 'figure'),
    Output('date-summary', 'children'),
    Input('metric', 'value'),
    Input('date-range', 'start_date'),
    Input('date-range', 'end_date')
)
def update_main_heatmap(metric, start_date, end_date):
    filtered_df = df[(df['date'] >= pd.to_datetime(start_date)) & (df['date'] <= pd.to_datetime(end_date))]

    # V√Ωpoƒçet pod√≠lu vybran√Ωch dat
    total_count = len(df)
    filtered_count = len(filtered_df)
    percent = (filtered_count / total_count * 100) if total_count > 0 else 0

    date_info_text = (
        f"üìÖ Dataset obsahuje data od {df['date'].min().date()} do {df['date'].max().date()}. "
        f"Vybr√°no {filtered_count:,} z {total_count:,} z√°znam≈Ø "
        f"({percent:.1f} %)"
    )

    if filtered_df.empty:
        return px.imshow([[0]], labels=dict(x="Typ hodiny", y="Sez√≥na / Pracovn√≠ den", color="Nen√≠ data")), date_info_text

    grouped = filtered_df.groupby(['season', 'pracovni_den', 'peak_offpeak'])

    agg_df = grouped.agg(
        total=('price', 'count'),
        count_negative=('price_direction', lambda x: (x == 'negative').sum()),
        count_positive=('price_direction', lambda x: (x == 'positive').sum()),
        avg_price=('price', 'mean'),
        std_price=('price', 'std')
    ).reset_index()

    agg_df['percent_negative'] = agg_df['count_negative'] / agg_df['total'] * 100
    agg_df['percent_positive'] = agg_df['count_positive'] / agg_df['total'] * 100
    agg_df['label'] = agg_df['season'].astype(str) + ' / ' + agg_df['pracovni_den'].astype(str)

    metric_labels = {
        'percent_negative': 'üìâ Pod√≠l negativn√≠ch (%)',
        'percent_positive': 'üìà Pod√≠l pozitivn√≠ch (%)',
        'avg_price': 'üí∂ Pr≈Ømƒõrn√° cena (EUR/MWh)',
        'std_price': 'üìä Smƒõrodatn√° odchylka ceny',
        'total': 'üî¢ Poƒçet z√°znam≈Ø'
    }

    fig = px.density_heatmap(
        agg_df,
        x='peak_offpeak',
        y='label',
        z=agg_df[metric],
        color_continuous_scale='Blues',
        labels={'peak_offpeak': 'Typ hodiny', 'label': 'Sez√≥na / Pracovn√≠ den'},
        nbinsx=2,
        text_auto='.1f'
    )

    fig.update_layout(
        title=metric_labels.get(metric, 'Heatmapa'),
        xaxis_title="Typ hodiny",
        yaxis_title="Sez√≥na / Pracovn√≠ den",
        coloraxis_colorbar_title=metric_labels.get(metric, ''),
        plot_bgcolor='white'
    )

    return fig, date_info_text

# ---------- 5. Callback: reset filtru ----------
@app.callback(
    Output('date-range', 'start_date'),
    Output('date-range', 'end_date'),
    Input('reset-button', 'n_clicks'),
    prevent_initial_call=True
)
def reset_date_filter(n_clicks):
    return df['date'].min().date(), df['date'].max().date()

# ---------- 6. Callback: druh√° heatmapa ----------
@app.callback(
    Output('direction-heatmap', 'figure'),
    Input('direction-type', 'value'),
    Input('date-range', 'start_date'),
    Input('date-range', 'end_date')
)
def update_direction_heatmap(direction, start_date, end_date):
    filtered_df = df[(df['date'] >= pd.to_datetime(start_date)) & (df['date'] <= pd.to_datetime(end_date))]

    heatmap_data = (
        filtered_df.groupby(['weekday', 'Perioda'])['price_direction']
        .apply(lambda x: (x == direction).mean())
        .reset_index(name='ratio')
    )

    ordered_days = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']
    heatmap_data['weekday'] = pd.Categorical(heatmap_data['weekday'], categories=ordered_days, ordered=True)
    heatmap_data = heatmap_data.sort_values(['weekday', 'Perioda'])

    title_map = {
        'negative': 'üìâ Pod√≠l negativn√≠ch smƒõr≈Ø ceny',
        'positive': 'üìà Pod√≠l pozitivn√≠ch smƒõr≈Ø ceny'
    }

    fig = px.density_heatmap(
        heatmap_data,
        x='Perioda',
        y='weekday',
        z='ratio',
        nbinsx=96,
        color_continuous_scale='YlGnBu',
        text_auto='.1f',
        labels={
            'ratio': 'Pod√≠l',
            'weekday': 'Den v t√Ωdnu',
            'Perioda': 'Perioda (1‚Äì96)'
        }
    )

    fig.update_layout(
        title=title_map[direction],
        height=520,
        margin=dict(t=60, l=60, r=40, b=40),
        coloraxis_colorbar_title="Pod√≠l",
        xaxis_nticks=24,
        plot_bgcolor='white'
    )

    return fig


# ---------- 7. Spu≈°tƒõn√≠ ----------
if __name__ == '__main__':
    app.run(debug=True, port=8102)


#  N√°stroj pro predikce 

In [2]:
import pandas as pd
import numpy as np

from xgboost import XGBClassifier
from sklearn.metrics import classification_report, confusion_matrix, f1_score

import dash
from dash import dcc, html, dash_table, Input, Output, State
import dash_bootstrap_components as dbc
import plotly.express as px
import plotly.graph_objects as go


# ======================================================
# CONFIG
# ======================================================
DATA_PATH = "bg_imbalance_prices_dash.csv"

DEFAULT_HC_LOW = 0.35
DEFAULT_HC_HIGH = 0.65

# Day-ahead safe features only:
DAY_AHEAD_FEATURES = [
    "perioda_sin",
    "perioda_cos",
    "hour",
    "is_peak",
    "day_of_week",
]

# ======================================================
# DATA LOAD + PREP (once)
# ======================================================
df = pd.read_csv(DATA_PATH)
df["date"] = pd.to_datetime(df["date"])
df = df.sort_values("date").reset_index(drop=True)

# Target
df["is_positive"] = (df["price"] > 0).astype(int)

# Time features (safe day-ahead)
df["hour"] = df["date"].dt.hour
df["day_of_week"] = df["date"].dt.dayofweek
df["is_peak"] = df["hour"].between(8, 20).astype(int)

# 15-min period (assumes consistent 15-min resolution in the dataset)
df["Perioda"] = df.index % 96
df["perioda_sin"] = np.sin(2 * np.pi * df["Perioda"] / 96)
df["perioda_cos"] = np.cos(2 * np.pi * df["Perioda"] / 96)

df = df.dropna(subset=DAY_AHEAD_FEATURES + ["is_positive"]).reset_index(drop=True)

# Available days (full days in data)
df["day"] = df["date"].dt.floor("D")
available_days = sorted(df["day"].unique())

min_day = available_days[0]
max_day = available_days[-1]

# Default day = last full day in data (or second last; here last)
default_day = max_day


# ======================================================
# Helpers
# ======================================================
def optimize_threshold_f1(model, X_train, y_train, t_min=0.3, t_max=0.7, steps=41):
    probs = model.predict_proba(X_train)[:, 1]
    thresholds = np.linspace(t_min, t_max, steps)
    scores = []
    for t in thresholds:
        preds = (probs > t).astype(int)
        scores.append(f1_score(y_train, preds))
    best_t = thresholds[int(np.argmax(scores))]
    best_f1 = float(np.max(scores))
    return float(best_t), best_f1


def fit_day_ahead_model(train_df):
    X_train = train_df[DAY_AHEAD_FEATURES]
    y_train = train_df["is_positive"]

    # guard
    pos = (y_train == 1).sum()
    neg = (y_train == 0).sum()
    if pos == 0 or neg == 0:
        raise ValueError("Train set obsahuje jen jednu t≈ô√≠du (v≈°echno 0 nebo v≈°echno 1).")

    scale_pos_weight = neg / pos

    model = XGBClassifier(
        n_estimators=500,
        max_depth=4,
        learning_rate=0.05,
        subsample=0.8,
        colsample_bytree=0.8,
        scale_pos_weight=scale_pos_weight,
        objective="binary:logistic",
        eval_metric="logloss",
        random_state=42,
        n_jobs=0,
    )
    model.fit(X_train, y_train)

    best_t, best_f1 = optimize_threshold_f1(model, X_train, y_train)
    return model, best_t, best_f1


def day_slice(df_all, day_ts):
    day_ts = pd.to_datetime(day_ts).floor("D")
    day_end = day_ts + pd.Timedelta(days=1)
    test_df = df_all[(df_all["date"] >= day_ts) & (df_all["date"] < day_end)].copy()
    return test_df


def make_confusion_heatmap(cm):
    # cm = [[tn, fp], [fn, tp]]
    z = cm
    x = ["Pred 0 (Z√°porn√°)", "Pred 1 (Kladn√°)"]
    y = ["Real 0 (Z√°porn√°)", "Real 1 (Kladn√°)"]

    fig = go.Figure(
        data=go.Heatmap(
            z=z,
            x=x,
            y=y,
            showscale=True,
            hovertemplate="%(y)s<br>%(x)s<br>Count: %{z}<extra></extra>",
        )
    )
    fig.update_layout(
        margin=dict(l=40, r=20, t=30, b=40),
        height=320,
        title="Confusion matrix",
    )
    return fig


def compute_outputs_for_day(selected_day, hc_low, hc_high):
    selected_day = pd.to_datetime(selected_day).floor("D")

    train_df = df[df["date"] < selected_day].copy()
    test_df = day_slice(df, selected_day)

    if train_df.empty:
        raise ValueError("Train dataset je pr√°zdn√Ω (vybran√Ω den je p≈ô√≠li≈° brzy).")
    if test_df.empty:
        raise ValueError("Test dataset je pr√°zdn√Ω (vybran√Ω den nen√≠ v datech).")

    # Fit
    model, best_threshold, best_f1 = fit_day_ahead_model(train_df)

    # Predict day
    probs = model.predict_proba(test_df[DAY_AHEAD_FEATURES])[:, 1]
    preds = (probs > best_threshold).astype(int)

    out = test_df[["date", "price", "is_positive"]].copy()
    out["prob_positive"] = probs
    out["pred_is_positive"] = preds

    # Metrics
    cm = confusion_matrix(out["is_positive"], out["pred_is_positive"])
    report = classification_report(out["is_positive"], out["pred_is_positive"], digits=4)
    daily_acc = float((out["is_positive"] == out["pred_is_positive"]).mean())

    # HC
    hc = out[(out["prob_positive"] > hc_high) | (out["prob_positive"] < hc_low)].copy()
    if len(hc) > 0:
        hc_acc = float((hc["is_positive"] == hc["pred_is_positive"]).mean())
        coverage = float(len(hc) / len(out))
    else:
        hc_acc = None
        coverage = float(0.0)

    # Feature importance
    importance = pd.Series(model.feature_importances_, index=DAY_AHEAD_FEATURES).sort_values(ascending=False)

    return {
        "out": out,
        "cm": cm,
        "report": report,
        "daily_acc": daily_acc,
        "hc_acc": hc_acc,
        "coverage": coverage,
        "best_threshold": best_threshold,
        "best_f1": best_f1,
        "importance": importance,
    }


# ======================================================
# Dash App
# ======================================================
app = dash.Dash(__name__, external_stylesheets=[dbc.themes.BOOTSTRAP])
app.title = "Imbalance Day-Ahead Sign Prediction"


app.layout = dbc.Container(
    fluid=True,
    children=[
        html.H3("Day-ahead predikce znam√©nka imbalance ceny (cel√Ω den)"),

        dbc.Row(
            [
                dbc.Col(
                    dbc.Card(
                        dbc.CardBody(
                            [
                                html.Div("Vyber den k vyhodnocen√≠ (predikce cel√©ho dne):"),
                                dcc.DatePickerSingle(
                                    id="day-picker",
                                    min_date_allowed=min_day,
                                    max_date_allowed=max_day,
                                    date=default_day,
                                    display_format="YYYY-MM-DD",
                                ),
                                html.Hr(),

                                html.Div("High-confidence prahy:"),
                                dbc.Row(
                                    [
                                        dbc.Col(
                                            [
                                                html.Small("HC_LOW"),
                                                dcc.Slider(
                                                    id="hc-low",
                                                    min=0.05, max=0.49, step=0.01,
                                                    value=DEFAULT_HC_LOW,
                                                    tooltip={"placement": "bottom", "always_visible": False},
                                                ),
                                            ],
                                            width=6,
                                        ),
                                        dbc.Col(
                                            [
                                                html.Small("HC_HIGH"),
                                                dcc.Slider(
                                                    id="hc-high",
                                                    min=0.51, max=0.95, step=0.01,
                                                    value=DEFAULT_HC_HIGH,
                                                    tooltip={"placement": "bottom", "always_visible": False},
                                                ),
                                            ],
                                            width=6,
                                        ),
                                    ],
                                    className="g-2",
                                ),
                                html.Div(id="hc-warning", style={"color": "#b30000", "marginTop": "0.5rem"}),
                                html.Hr(),

                                dbc.Button("Spustit predikci", id="run-btn", color="primary", n_clicks=0),
                                html.Div(id="status", style={"marginTop": "0.75rem"}),
                                html.Small(
                                    "Pozn.: Model se tr√©nuje na v≈°ech datech p≈ôed vybran√Ωm dnem (day-ahead re≈æim).",
                                    style={"color": "#666"},
                                ),
                            ]
                        )
                    ),
                    width=4,
                ),

                dbc.Col(
                    dbc.Row(
                        [
                            dbc.Col(dbc.Card(dbc.CardBody([html.H5("Souhrn"), html.Div(id="summary")])), width=12),
                            dbc.Col(dbc.Card(dbc.CardBody([dcc.Graph(id="cm-graph")])), width=6),
                            dbc.Col(dbc.Card(dbc.CardBody([dcc.Graph(id="importance-graph")])), width=6),
                        ],
                        className="g-3",
                    ),
                    width=8,
                ),
            ],
            className="g-3",
        ),

        html.Br(),

        dbc.Row(
            [
                dbc.Col(dbc.Card(dbc.CardBody([dcc.Graph(id="prob-graph")])), width=12),
            ],
            className="g-3",
        ),

        html.Br(),

        dbc.Row(
            [
                dbc.Col(
                    dbc.Card(
                        dbc.CardBody(
                            [
                                html.H5("V√Ωsledky po intervalech"),
                                dash_table.DataTable(
                                    id="results-table",
                                    page_size=20,
                                    sort_action="native",
                                    filter_action="native",
                                    style_table={"overflowX": "auto"},
                                    style_cell={"fontFamily": "sans-serif", "fontSize": 13, "padding": "6px"},
                                    style_header={"fontWeight": "bold"},
                                ),
                            ]
                        )
                    ),
                    width=12,
                ),
            ],
            className="g-3",
        ),
    ],
)


# ======================================================
# Callbacks
# ======================================================
@app.callback(
    Output("hc-warning", "children"),
    Input("hc-low", "value"),
    Input("hc-high", "value"),
)
def validate_hc(hc_low, hc_high):
    if hc_low is None or hc_high is None:
        return ""
    if hc_low >= hc_high:
        return "HC_LOW mus√≠ b√Ωt men≈°√≠ ne≈æ HC_HIGH."
    if hc_low >= 0.5 or hc_high <= 0.5:
        return "Doporuƒçen√≠: HC_LOW < 0.5 a HC_HIGH > 0.5."
    return ""


@app.callback(
    Output("status", "children"),
    Output("summary", "children"),
    Output("cm-graph", "figure"),
    Output("importance-graph", "figure"),
    Output("prob-graph", "figure"),
    Output("results-table", "data"),
    Output("results-table", "columns"),
    Input("run-btn", "n_clicks"),
    State("day-picker", "date"),
    State("hc-low", "value"),
    State("hc-high", "value"),
)
def run_model(n_clicks, selected_day, hc_low, hc_high):
    if n_clicks == 0:
        # empty placeholders
        empty_fig = go.Figure().update_layout(height=320, margin=dict(l=20, r=20, t=20, b=20))
        empty_fig2 = go.Figure().update_layout(height=380, margin=dict(l=20, r=20, t=20, b=20))
        return (
            "Vyber den a klikni na ‚ÄûSpustit predikci‚Äú.",
            "",
            empty_fig,
            empty_fig,
            empty_fig2,
            [],
            [],
        )

    try:
        if hc_low >= hc_high:
            raise ValueError("HC_LOW mus√≠ b√Ωt men≈°√≠ ne≈æ HC_HIGH.")

        outputs = compute_outputs_for_day(selected_day, hc_low, hc_high)
        out = outputs["out"]

        # Summary block
        day_str = pd.to_datetime(selected_day).strftime("%Y-%m-%d")
        hc_acc = outputs["hc_acc"]
        coverage = outputs["coverage"]

        summary_children = [
            html.Div([html.B("Den: "), day_str]),
            html.Div([html.B("Denn√≠ accuracy: "), f"{outputs['daily_acc']:.2%}"]),
            html.Div([html.B("Optim√°ln√≠ threshold (train F1): "), f"{outputs['best_threshold']:.2f}"]),
            html.Div([html.B("Train F1 p≈ôi best threshold: "), f"{outputs['best_f1']:.4f}"]),
            html.Hr(),
            html.Div([html.B("High-confidence coverage: "), f"{coverage:.2%}"]),
            html.Div([html.B("High-confidence accuracy: "), ("‚Äî" if hc_acc is None else f"{hc_acc:.2%}")]),
            html.Hr(),
            html.Pre(outputs["report"], style={"whiteSpace": "pre-wrap", "margin": 0}),
        ]

        # Confusion matrix fig
        cm_fig = make_confusion_heatmap(outputs["cm"])

        # Importance fig
        imp = outputs["importance"].reset_index()
        imp.columns = ["feature", "importance"]
        imp_fig = px.bar(imp, x="importance", y="feature", orientation="h", title="Feature importance")
        imp_fig.update_layout(height=320, margin=dict(l=20, r=20, t=40, b=20))
        imp_fig.update_yaxes(categoryorder="total ascending")

        # Probabilities over the day
        plot_df = out.copy()
        plot_df["Realita"] = plot_df["is_positive"].map({1: "Kladn√°", 0: "Z√°porn√°"})
        plot_df["Predikce"] = plot_df["pred_is_positive"].map({1: "Kladn√°", 0: "Z√°porn√°"})

        prob_fig = go.Figure()
        prob_fig.add_trace(go.Scatter(x=plot_df["date"], y=plot_df["prob_positive"], mode="lines", name="P(kladn√°)"))
        prob_fig.add_trace(go.Scatter(
            x=plot_df["date"],
            y=plot_df["is_positive"],
            mode="markers",
            name="Realita (0/1)",
        ))
        prob_fig.add_trace(go.Scatter(
            x=plot_df["date"],
            y=plot_df["pred_is_positive"],
            mode="markers",
            name="Predikce (0/1)",
        ))
        prob_fig.update_layout(
            title="Pravdƒõpodobnost kladn√© ceny + realita/predikce p≈ôes den",
            height=380,
            margin=dict(l=20, r=20, t=50, b=20),
            yaxis=dict(range=[-0.05, 1.05]),
        )

        # Table
        table_df = plot_df[["date", "price", "Realita", "Predikce", "prob_positive"]].copy()
        table_df["date"] = table_df["date"].dt.strftime("%Y-%m-%d %H:%M:%S")
        table_df["prob_positive"] = table_df["prob_positive"].round(4)
        table_df["price"] = table_df["price"].round(4)

        columns = [
            {"name": "date", "id": "date"},
            {"name": "price", "id": "price"},
            {"name": "Realita", "id": "Realita"},
            {"name": "Predikce", "id": "Predikce"},
            {"name": "prob_positive", "id": "prob_positive"},
        ]

        status = "Hotovo ‚úÖ"
        return status, summary_children, cm_fig, imp_fig, prob_fig, table_df.to_dict("records"), columns

    except Exception as e:
        empty_fig = go.Figure().update_layout(height=320, margin=dict(l=20, r=20, t=20, b=20))
        empty_fig2 = go.Figure().update_layout(height=380, margin=dict(l=20, r=20, t=20, b=20))
        return (
            f"Chyba: {e}",
            "",
            empty_fig,
            empty_fig,
            empty_fig2,
            [],
            [],
        )


if __name__ == "__main__":
    app.run(debug=True, port=8103)
