In [13]:
import numpy as np
import torch
import torch.nn as nn
from pathlib import Path
from sklearn.preprocessing import MinMaxScaler
import pandas as pd
import plotly.graph_objects as go

# data: date and last_value aligned
data_path = Path("../data/crobex_history.csv")
df = pd.read_csv(data_path, sep=";", quotechar='"', decimal=",", parse_dates=["date"])
df = df.sort_values("date").dropna(subset=["last_value"])
dates = df["date"].values
ts = df["last_value"].values.astype(np.float32).reshape(-1, 1)
scaler = MinMaxScaler()
scaled_vals = scaler.fit_transform(ts).flatten()

# labels: weekend = missing day in data (gap >= 2 days)
gaps_days = (pd.Series(dates).diff().dt.days).values[1:]
is_weekend_after = (gaps_days >= 2).astype(np.int64)
weekday = pd.Series(dates).dt.weekday.values.astype(np.float32) / 6.0

# sequence classification: one window -> one label (does the window contain a weekend between any consecutive pair)
# Y[i] = 1 if there is a weekend between any consecutive pair of days in the window
win = 20
num_windows = len(scaled_vals) - win
X = np.zeros((num_windows, win, 2), dtype=np.float32)
Y = np.zeros(num_windows, dtype=np.int64)
for idx in range(num_windows):
    X[idx, :, 0] = scaled_vals[idx : idx + win]
    X[idx, :, 1] = weekday[idx : idx + win]
    Y[idx] = 1 if np.any(is_weekend_after[idx : idx + win - 1]) else 0

# model: RNN(2) -> last hidden -> FC(2); one logit vector per sequence
rnn = nn.RNN(2, 32, batch_first=True)
fc = nn.Linear(32, 2)
opt = torch.optim.Adam(list(rnn.parameters()) + list(fc.parameters()), 1e-3)

for _ in range(50):
    for batch_start in range(0, len(X), 64):
        x = torch.tensor(X[batch_start : batch_start + 64], dtype=torch.float32)
        y = torch.tensor(Y[batch_start : batch_start + 64], dtype=torch.long)
        _, h = rnn(x)
        logits = fc(h.squeeze(0))
        loss = nn.functional.cross_entropy(logits, y)
        opt.zero_grad()
        loss.backward()
        opt.step()

rnn.eval()
fc.eval()
with torch.no_grad():
    _, h = rnn(torch.tensor(X, dtype=torch.float32))
    pred = fc(h.squeeze(0)).argmax(-1).numpy()


In [15]:
# visualization
start, end = 500, min(700, num_windows)
window_idx = np.arange(end - start)
fig = go.Figure()
fig.add_trace(
    go.Scatter(
        x=window_idx,
        y=Y[start:end],
        mode="lines",
        name="actual",
        line=dict(color="blue", shape="hv"),
    )
)
fig.add_trace(
    go.Scatter(
        x=window_idx,
        y=pred[start:end],
        mode="lines",
        name="predicted",
        line=dict(color="orange", shape="hv", dash="dash"),
    )
)
fig.update_layout(
    title="does the window contain a weekend?",
    xaxis_title="window index",
    yaxis_title="0=no  1=yes",
    height=350,
    legend=dict(yanchor="top", y=0.99, xanchor="left", x=0.01),
)

fig.show()