# Time Series Preprocessing

## Objectives
This notebook prepares the electricity load time series for forecasting and anomaly detection with leakage-safe preprocessing.

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from pathlib import Path
from typing import Tuple

plt.rcParams['figure.figsize'] = (14, 5)

## Load Dataset

In [None]:
DATA_PATH = Path('../data/raw/LD2011_2014.txt')

df = pd.read_csv(
    DATA_PATH,
    sep=';',
    index_col=0,
    parse_dates=True,
    decimal=','
)

series_id = df.columns[0]
ts = df[series_id].rename('load')

ts.head()

## Temporal Train / Validation / Test Split

In [None]:
def temporal_split(series: pd.Series, train_frac=0.7, val_frac=0.15):
    n = len(series)
    train_end = int(n * train_frac)
    val_end = int(n * (train_frac + val_frac))
    return series.iloc[:train_end], series.iloc[train_end:val_end], series.iloc[val_end:]

train_ts, val_ts, test_ts = temporal_split(ts)
len(train_ts), len(val_ts), len(test_ts)

## Scaling (Training Only)

In [None]:
train_mean = train_ts.mean()
train_std = train_ts.std()

def scale(series):
    return (series - train_mean) / train_std

train_scaled = scale(train_ts)
val_scaled = scale(val_ts)
test_scaled = scale(test_ts)

## Sliding Window Construction

In [None]:
def create_windows(series, input_length, forecast_horizon):
    values = series.values
    X, y = [], []
    for i in range(len(values) - input_length - forecast_horizon + 1):
        X.append(values[i:i+input_length])
        y.append(values[i+input_length:i+input_length+forecast_horizon])
    return np.array(X), np.array(y)

INPUT_LENGTH = 24 * 7
FORECAST_HORIZON = 24

X_train, y_train = create_windows(train_scaled, INPUT_LENGTH, FORECAST_HORIZON)
X_val, y_val = create_windows(val_scaled, INPUT_LENGTH, FORECAST_HORIZON)
X_test, y_test = create_windows(test_scaled, INPUT_LENGTH, FORECAST_HORIZON)

X_train.shape, y_train.shape

## PyTorch Dataset

In [None]:
import torch
from torch.utils.data import Dataset

class TimeSeriesDataset(Dataset):
    def __init__(self, X, y):
        self.X = torch.tensor(X, dtype=torch.float32)
        self.y = torch.tensor(y, dtype=torch.float32)

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

train_ds = TimeSeriesDataset(X_train, y_train)
val_ds = TimeSeriesDataset(X_val, y_val)
test_ds = TimeSeriesDataset(X_test, y_test)

## Preprocessing Summary

The data is now ready for deep learning forecasting and anomaly detection.