<h1>Model training

In [None]:
import numpy as np
import json
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader, random_split
from sklearn.preprocessing import OneHotEncoder, StandardScaler
import joblib
import os
import pandas as pd
from torch.nn import HuberLoss, MSELoss, L1Loss

# Lists to store models and scalers
models = {}
scalers = {}
encoders = {}
input_sizes = {}
losses = {}

# Define MLP class
class MLP(nn.Module):
    def __init__(self, input_size):
        super(MLP, self).__init__()
        self.fc1 = nn.Linear(input_size, 256)
        self.fc2 = nn.Linear(256, 128)
        self.fc3 = nn.Linear(128, 64)
        self.fc4 = nn.Linear(64, 1)
        self.relu1 = nn.ReLU()
        self.relu2 = nn.ReLU()
        self.relu3 = nn.ReLU()
        self.dropout = nn.Dropout(0.3366)

    def forward(self, x):
        x = self.relu1(self.fc1(x))
        x = self.relu2(self.fc2(x))
        x = self.relu3(self.fc3(x))
        x = self.dropout(x)
        x = self.fc4(x)
        return x

# PyTorch Dataset Class
class WindPowerDataset(Dataset):
    def __init__(self, features, targets):
        self.features = features
        self.targets = targets

    def __len__(self):
        return len(self.targets)

    def __getitem__(self, index):
        x = self.features[index]
        y = self.targets[index]
        return torch.tensor(x, dtype=torch.float32), torch.tensor(y, dtype=torch.float32)

input_dir = r"..\data\WPPs+production+reforecast"

for file in os.listdir(input_dir):
    file_path = os.path.join(input_dir, file)
    if os.path.isfile(file_path):  # Ensure it's a file (not a folder)
        lead_time = int(file.split("_")[-1].replace(".json", ""))
        with open(file_path, "r", encoding="utf-8") as file:
            forecast_data = json.load(file)

    print(f"Processing lead time: {lead_time}")

    print(f"    Data preparation")

    all_turbine_types = []
    all_hub_heights = []
    all_capacities = []
    all_commissioning_dates = []
    all_production_data = []

    for unique_key, wpp in forecast_data.items():
        all_turbine_types.append(str(wpp["Turbine"]))
        all_hub_heights.append(wpp["Hub_height"])
        all_capacities.append(wpp["Capacity"])
        all_commissioning_dates.append(f"{wpp['Commissioning_date']}/06" if isinstance(wpp["Commissioning_date"], str) and "/" not in wpp["Commissioning_date"] else wpp["Commissioning_date"])
        all_production_data.append(wpp["Time Series"])

    # One-Hot-Encoding for turbine types
    encoder = OneHotEncoder(sparse_output=False)
    turbine_types_onehot = encoder.fit_transform(np.array(all_turbine_types).reshape(-1, 1))

    # convert to datetime
    standardised_dates = pd.to_datetime(all_commissioning_dates, format='%Y/%m')

    # calculate age
    ref_date = pd.Timestamp("2024-12-01")
    ages = ref_date.year * 12 + ref_date.month - (standardised_dates.year * 12 + standardised_dates.month)

    # create combined features and output lists
    combined_features_raw = []
    output_raw = []
    
    # convert data in feature arrays
    for idx, production_data in enumerate(all_production_data):
        num_rows = len(production_data)

        # Repetitions for common features
        turbine_type_repeated = np.tile(turbine_types_onehot[idx], (num_rows, 1))
        hub_height_repeated = np.full((num_rows, 1), float(all_hub_heights[idx]))
        age_repeated = np.full((num_rows, 1), ages[idx])

        # Extract production values and wind speeds
        production_values = np.array([entry[1] for entry in production_data]).reshape(-1, 1) / all_capacities[idx]
        wind_speeds = np.array([entry[2] for entry in production_data]).reshape(-1, 1)

        # combine all features
        combined_chunk = np.hstack((
            turbine_type_repeated,
            hub_height_repeated,
            age_repeated,
            wind_speeds
        ))

        # add the data
        combined_features_raw.append(combined_chunk)
        output_raw.append(production_values)

    # combine all data chunks to one array
    combined_features = np.vstack(combined_features_raw)
    output = np.vstack(output_raw)

    # Interpolate missing values (linear interpolation) in pandas
    wind_speed_series = pd.Series(combined_features[:, -1])
    wind_speed_series.interpolate(method='linear', inplace=True)
    combined_features[:, -1] = wind_speed_series.to_numpy()

    # round all values to two decimal places
    combined_features = np.round(combined_features, decimals=4)
    output = np.round(output, decimals=4)
        
    # Normalise numerical features
    scaler_wind = StandardScaler()
    scaler_ages = StandardScaler()
    scaler_hub_heights = StandardScaler()

    # Skalieren der einzelnen Features
    combined_features[:, -1] = scaler_wind.fit_transform(combined_features[:, -1].reshape(-1, 1)).flatten() # scale wind speeds
    combined_features[:, -2] = scaler_ages.fit_transform(combined_features[:, -2].reshape(-1, 1)).flatten()  # scale ages
    combined_features[:, -3] = scaler_hub_heights.fit_transform(combined_features[:, -3].reshape(-1, 1)).flatten()  # scale hub heights
    
    # Convert to PyTorch Dataset
    dataset = WindPowerDataset(combined_features, output)
    
    params = {"batch_size": 128,
              "lr": 0.00010155,
              "number_epochs": 10}
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    # Train-test split
    train_size = int(0.8 * len(dataset))
    test_size = len(dataset) - train_size
    train_dataset, test_dataset = random_split(dataset, [train_size, test_size])
    
    # shuffling doesn't matter here, has already taken place during train_test_split
    train_loader = DataLoader(train_dataset, batch_size=params["batch_size"], shuffle=True)
    test_loader = DataLoader(test_dataset, batch_size=params["batch_size"], shuffle=False)
    
    # Model setup
    input_size = combined_features.shape[1]

    # use static instead of dynamic computational graphs
    model = torch.jit.script(MLP(input_size=input_size)).to(device)
    model.to(device)
    
    # Trainings-Konfiguration
    mae_criterion = L1Loss()
    mse_criterion = MSELoss()
    huber_criterion = HuberLoss()
    optimizer = optim.Adam(model.parameters(), lr=params["lr"])

    # Training
    print(f"    Training")
    for epoch in range(params["number_epochs"]):
        print(f"        Epoch {epoch + 1}/{params['number_epochs']}")
        model.train()
        train_loss_mae, train_loss_mse, train_loss_huber = 0, 0, 0

        for batch_x, batch_y in train_loader:
            batch_x, batch_y = batch_x.to(device), batch_y.to(device)
            outputs = model(batch_x)
            
            # Calculate losses for each criterion
            loss_mae = mae_criterion(outputs, batch_y)
            loss_mse = mse_criterion(outputs, batch_y)
            loss_huber = huber_criterion(outputs, batch_y)

            optimizer.zero_grad()
            loss_huber.backward()
            optimizer.step()

            # Accumulate losses for logging
            train_loss_mae += loss_mae.item()
            train_loss_mse += loss_mse.item()
            train_loss_huber += loss_huber.item()

        train_loss_mae /= len(train_loader)
        train_loss_mse /= len(train_loader)
        train_loss_huber /= len(train_loader)

    # Testen
    print(f"    Testing")
    model.eval()

    test_loss_mae, test_loss_mse, test_loss_huber = 0, 0, 0

    with torch.no_grad():
        for batch_x, batch_y in test_loader:
            batch_x, batch_y = batch_x.to(device), batch_y.to(device)
            preds = model(batch_x)
            
            test_loss_mae += mae_criterion(preds, batch_y).item()
            test_loss_mse += mse_criterion(preds, batch_y).item()
            test_loss_huber += huber_criterion(preds, batch_y).item()

    test_loss_mae /= len(test_loader)
    test_loss_mse /= len(test_loader)
    test_loss_huber /= len(test_loader)
    
    models[lead_time] = model.state_dict()
    
    scalers[lead_time] = {
        "wind_scaler": scaler_wind,
        "scaler_ages": scaler_ages,
        "scaler_hub_heights": scaler_hub_heights
    }

    encoders[lead_time] = encoder

    input_sizes[lead_time] = input_size

    losses[lead_time] = {
        "Training": {
            "Huber": train_loss_huber,
            "MAE": train_loss_mae,
            "MSE":train_loss_mse
        },
        "Testing": {
            "Huber": test_loss_huber,
            "MAE": test_loss_mae,
            "MSE": test_loss_mse
        },
    }

# Save all parameters
joblib.dump(models, "parameters/models.pth")
joblib.dump(scalers, "parameters/scalers.pkl")
joblib.dump(encoders, "parameters/encoders.pkl")
joblib.dump(input_sizes, "parameters/input_sizes.pkl")
joblib.dump(losses, "parameters/losses.pkl")
print("All parameters saved successfully.")

Processing lead time: 102
    Data preparation
    Training
        Epoch 1/10
        Epoch 2/10
        Epoch 3/10
        Epoch 4/10
        Epoch 5/10
        Epoch 6/10
        Epoch 7/10
        Epoch 8/10
        Epoch 9/10
        Epoch 10/10
    Testing
Processing lead time: 105
    Data preparation
    Training
        Epoch 1/10
        Epoch 2/10
        Epoch 3/10
        Epoch 4/10
        Epoch 5/10
        Epoch 6/10
        Epoch 7/10
        Epoch 8/10
        Epoch 9/10
        Epoch 10/10
    Testing
Processing lead time: 108
    Data preparation
    Training
        Epoch 1/10
        Epoch 2/10
        Epoch 3/10
        Epoch 4/10
        Epoch 5/10
        Epoch 6/10
        Epoch 7/10
        Epoch 8/10
        Epoch 9/10
        Epoch 10/10
    Testing
Processing lead time: 111
    Data preparation
    Training
        Epoch 1/10
        Epoch 2/10
        Epoch 3/10
        Epoch 4/10
        Epoch 5/10
        Epoch 6/10
        Epoch 7/10
        Epoch 8/10