In [None]:
import numpy as np
import json
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader, random_split
from sklearn.preprocessing import OneHotEncoder, StandardScaler
import joblib
import os

# Load the forecast production data
with open("forecast_production_data.json", "r", encoding="utf-8") as file:
    forecast_data = json.load(file)

# Define lead times
lead_times = list(forecast_data.keys())

# Lists to store models and scalers
models = {}
scalers = {}

# Define MLP class
class MLP(nn.Module):
    def __init__(self, input_size):
        super(MLP, self).__init__()
        self.fc1 = nn.Linear(input_size, 256)
        self.fc2 = nn.Linear(256, 128)
        self.fc3 = nn.Linear(128, 64)
        self.fc4 = nn.Linear(64, 1)
        self.relu1 = nn.ReLU()
        self.relu2 = nn.ReLU()
        self.relu3 = nn.ReLU()
        self.dropout = nn.Dropout(0.3)

    def forward(self, x):
        x = self.relu1(self.fc1(x))
        x = self.relu2(self.fc2(x))
        x = self.relu3(self.fc3(x))
        x = self.dropout(x)
        x = self.fc4(x)
        return x

# PyTorch Dataset Class
class WindPowerDataset(Dataset):
    def __init__(self, features, targets):
        self.features = features
        self.targets = targets

    def __len__(self):
        return len(self.targets)

    def __getitem__(self, index):
        x = self.features[index]
        y = self.targets[index]
        return torch.tensor(x, dtype=torch.float32), torch.tensor(y, dtype=torch.float32)

# Train an MLP for each lead time
for lead_time in lead_times:
    print(f"Processing lead time: {lead_time}")
    
    all_features = []
    all_targets = []
    turbine_types = []
    hub_heights = []
    capacities = []
    ages = []
    
    for unique_key, wpp in forecast_data[lead_time].items():
        turbine_types.append(wpp["Turbine"])
        hub_heights.append(wpp["Hub_height"] if wpp["Hub_height"] else 100)
        capacities.append(wpp["Capacity"])
        commissioning_date = wpp["Commissioning_date"] if wpp["Commissioning_date"] else "2015/06"
        ages.append(2024 - int(commissioning_date.split("/")[0]))
        
        for entry in wpp["Production"]:
            timestamp, production, wind_speed = entry
            all_features.append([wind_speed])
            all_targets.append(production / wpp["Capacity"])
    
    # Convert lists to numpy arrays
    all_features = np.array(all_features)
    all_targets = np.array(all_targets).reshape(-1, 1)
    
    # One-Hot Encode turbine types
    encoder = OneHotEncoder(sparse_output=False)
    turbine_types_onehot = encoder.fit_transform(np.array(turbine_types).reshape(-1, 1))
    
    # Normalize numerical features
    scaler_hub = StandardScaler()
    scaler_age = StandardScaler()
    scaler_wind = StandardScaler()
    
    hub_heights_scaled = scaler_hub.fit_transform(np.array(hub_heights).reshape(-1, 1))
    ages_scaled = scaler_age.fit_transform(np.array(ages).reshape(-1, 1))
    wind_speeds_scaled = scaler_wind.fit_transform(all_features)
    
    # Combine features
    combined_features = np.hstack((turbine_types_onehot, hub_heights_scaled, ages_scaled, wind_speeds_scaled))
    
    # Convert to PyTorch Dataset
    dataset = WindPowerDataset(combined_features, all_targets)
    
    # Train-test split
    train_size = int(0.8 * len(dataset))
    val_size = len(dataset) - train_size
    train_dataset, val_dataset = random_split(dataset, [train_size, val_size])
    
    # DataLoader
    train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)
    
    # Model setup
    input_size = combined_features.shape[1]
    model = MLP(input_size)
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)
    
    criterion = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr=1e-3)
    
    # Training loop
    for epoch in range(10):
        model.train()
        epoch_loss = 0
        for batch_x, batch_y in train_loader:
            batch_x, batch_y = batch_x.to(device), batch_y.to(device)
            optimizer.zero_grad()
            outputs = model(batch_x)
            loss = criterion(outputs, batch_y)
            loss.backward()
            optimizer.step()
            epoch_loss += loss.item()
        print(f"Lead Time {lead_time} - Epoch {epoch+1}: Loss = {epoch_loss/len(train_loader):.4f}")
    
    # Save model
    os.makedirs("models", exist_ok=True)
    torch.save(model.state_dict(), f"models/model_lead_time_{lead_time}.pth")
    
    # Save scalers
    scalers[lead_time] = {
        "turbine_encoder": encoder,
        "hub_scaler": scaler_hub,
        "age_scaler": scaler_age,
        "wind_scaler": scaler_wind,
    }
    
# Save all scalers
joblib.dump(scalers, "models/scalers.pkl")
print("All models and scalers saved successfully.")