In [20]:
import numpy as np
import pandas as pd
import plotly.graph_objects as go
import torch
import torch.nn as nn
from pathlib import Path
from sklearn.preprocessing import MinMaxScaler

# data: keep date and last_value aligned (dropna on full row)
data_path = Path("../data/crobex_history.csv")
df = pd.read_csv(data_path, sep=";", quotechar='"', decimal=",", parse_dates=["date"])
df = df.sort_values("date").dropna(subset=["last_value"])
dates = df["date"].values
ts = df["last_value"].values.astype(np.float32).reshape(-1, 1)
scaler = MinMaxScaler()
scaled_vals = scaler.fit_transform(ts).flatten()

# labels: is there a weekend after this day? (gap >= 2 calendar days = weekend/non-trading in between)
gaps_days = (pd.Series(dates).diff().dt.days).values[1:]
is_weekend_after = (gaps_days >= 2).astype(np.int64)
# weekday as extra input (0=mon..6=sun), normalized for the model
weekday = pd.Series(dates).dt.weekday.values.astype(np.float32) / 6.0

# sequence labeling: per-step label (0/1) for each position in the window
# X: (num_windows, win, 2) — value + weekday; Y: (num_windows, win) — 0/1 per step
win = 20
num_windows = len(scaled_vals) - win
X = np.zeros((num_windows, win, 2), dtype=np.float32)
Y = np.zeros((num_windows, win), dtype=np.int64)
for idx in range(num_windows):
    X[idx, :, 0] = scaled_vals[idx : idx + win]
    X[idx, :, 1] = weekday[idx : idx + win]
    # is_weekend_after[t] = is there a weekend between dates[t] and dates[t+1]
    Y[idx, :] = is_weekend_after[idx : idx + win]

# class weights (weekend is rarer)
counts = np.bincount(Y.ravel(), minlength=2)
counts = np.maximum(counts, 1)
inv_freq = 1.0 / np.sqrt(counts.astype(np.float32))
class_weights = torch.tensor(inv_freq / inv_freq.mean(), dtype=torch.float32)

# model: RNN(2) -> FC(2), binary class per step; loss over flattened (batch*win, 2)
rnn = nn.RNN(2, 32, batch_first=True)
fc = nn.Linear(32, 2)
opt = torch.optim.Adam(list(rnn.parameters()) + list(fc.parameters()), 1e-3)

batch_size = 64
for _ in range(50):
    for batch_start in range(0, len(X), batch_size):
        x = torch.tensor(X[batch_start : batch_start + batch_size], dtype=torch.float32)
        y_b = torch.tensor(Y[batch_start : batch_start + batch_size], dtype=torch.long)
        out, _ = rnn(x)
        logits = fc(out)
        logits_flat = logits.reshape(-1, 2)
        y_flat = y_b.reshape(-1)
        loss = nn.functional.cross_entropy(logits_flat, y_flat, weight=class_weights)
        opt.zero_grad()
        loss.backward()
        opt.step()

rnn.eval()
fc.eval()
with torch.no_grad():
    out, _ = rnn(torch.tensor(X, dtype=torch.float32))
    pred = fc(out).argmax(-1).numpy()

In [22]:
win = 20
start, end = 500, min(700, len(is_weekend_after))   # day range (is_weekend_after has len(dates)-1)

start_win = start - (win - 1)
end_win = end - (win - 1)
day_idx = np.arange(start, end)
actual = is_weekend_after[start:end]
predicted = pred[start_win:end_win, -1]

fig = go.Figure()
fig.add_trace(
    go.Scatter(
        x=day_idx,
        y=actual,
        mode="lines",
        name="actual",
        line=dict(color="blue", shape="hv"),
    )
)
fig.add_trace(
    go.Scatter(
        x=day_idx,
        y=predicted,
        mode="lines",
        name="predicted",
        line=dict(color="orange", shape="hv", dash="dash"),
    )
)
fig.update_layout(
    title=f"weekend (non-trading day) per step (window = {win} days)",
    xaxis_title="day (index in series)",
    yaxis_title="0=work run  1=weekend in between",
    yaxis=dict(range=[-0.5, 1.5]),
    height=400,
    legend=dict(yanchor="top", y=0.99, xanchor="left", x=0.01),
)

fig.show()