<h1>Load crowdsourced data and fetch weather data

In [None]:
import os
import pandas as pd
import cdsapi  # for reanalysis data
from ecmwfapi import ECMWFService  # for reforecast data

input_dir = "data/crowdsourced_data/"
output_dir = "data/weather_crowdsourcing"

# Ensure output directory exists
os.makedirs(output_dir, exist_ok=True)

for file in os.listdir(input_dir):
    if file.endswith(".xlsx"):
        file_path = os.path.join(input_dir, file)
        sheets = pd.read_excel(file_path, sheet_name=["Time Series", "Metadata"])
        time_series_data = sheets["Time Series"]
        metadata = sheets["Metadata"]

        timestamp = file.removeprefix("time_series_").removesuffix(".xlsx")

        time_series_data["Date"] = pd.to_datetime(time_series_data["Date"], errors='coerce')

        # Extract unique years, months, days, and hours
        years = sorted(set(time_series_data["Date"].dt.year.dropna().astype(str)))
        months = sorted(set(time_series_data["Date"].dt.month.dropna().astype(str).str.zfill(2)))
        days = sorted(set(time_series_data["Date"].dt.day.dropna().astype(str).str.zfill(2)))
        hours = sorted(set(time_series_data["Date"].dt.hour.dropna().astype(str).str.zfill(2)))

        # Extract location
        lat, lon = metadata.iloc[0]["Latitude"], metadata.iloc[0]["Longitude"]

        # =================== REANALYSIS DATA (ERA5) ===================

        # API key saved in .cdsapirc
        client = cdsapi.Client()
        reanalysis_file = os.path.join(output_dir, f"reanalysis_{timestamp}.grib")

        if not os.path.exists(reanalysis_file):  # Skip if already downloaded
            print(f"Downloading ERA5 reanalysis data for {timestamp}...")

            request = {
                "product_type": "reanalysis",
                "variable": ["100m_u_component_of_wind", "100m_v_component_of_wind"],
                "year": years,
                "month": months,
                "day": days,
                "time": hours,
                "format": "grib",
                "area": [lat+1, lon-1, lat-1, lon+1],  # N/W/S/E
            }

            client.retrieve("reanalysis-era5-single-levels", request, reanalysis_file)
            print(f"Saved reanalysis data: {reanalysis_file}")
        else:
            print(f"Skipping ERA5 reanalysis data for {timestamp}, already exists.")

        # =================== REFORECAST DATA (ECMWF) ===================

        # API key under https://api.ecmwf.int/v1/key/, saved in .ecmwfapirc
        server = ECMWFService("mars")
        reforecast_file = os.path.join(output_dir, f"reforecast_{timestamp}.grib")

        if not os.path.exists(reforecast_file):  # Skip if already downloaded
            print(f"Downloading ECMWF reforecast data for {timestamp}...")

            server.execute(
                {
                    "class": "od",
                    "dataset": "oper",
                    "expver": "1",
                    "stream": "oper",
                    "type": "fc",
                    "levtype": "sfc",
                    "param": "165.128/166.128",  # U/V wind components
                    "levelist": "100",  # Wind at 100 meters
                    "date": f"{min(years)}-{min(months)}-01/to/{max(years)}-{max(months)}-31",  # Ensure correct format
                    "time": ["00:00", "12:00"],  # Forecast times
                    "step": [str(i) for i in range(0, 145, 3)],  # Convert to list of strings
                    "grid": "0.25/0.25",
                    "area": [lat, lon, lat, lon],  # N/W/S/E
                    "format": "grib2",
                },
                reforecast_file
            )
            print(f"Saved reforecast data: {reforecast_file}")
        else:
            print(f"Skipping ECMWF reforecast data for {timestamp}, already exists.")

print("All downloads completed.")


2025-03-03 19:13:33,158 INFO [2024-09-26T00:00:00] Watch our [Forum](https://forum.ecmwf.int/) for Announcements, news and other discussed topics.


Skipping ERA5 reanalysis data for time_series_20250227_141730.xlsx, already exists.
Downloading ECMWF reforecast data for time_series_20250227_141730.xlsx...
2025-03-03 19:13:33 ECMWF API python library 1.6.3
2025-03-03 19:13:33 ECMWF API at https://api.ecmwf.int/v1
2025-03-03 19:13:34 Welcome Alexander Peters
2025-03-03 19:13:36 In case of problems, please check https://confluence.ecmwf.int/display/WEBAPI/Web+API+FAQ or contact servicedesk@ecmwf.int
2025-03-03 19:13:37 Request submitted
2025-03-03 19:13:37 Request id: 67c5f152f86a24e21f09aab7
2025-03-03 19:13:37 Request is submitted
2025-03-03 19:13:40 Request is active
2025-03-03 19:28:00 mars - INFO   - 20250303.181339 - Welcome to MARS
2025-03-03 19:28:00 mars - INFO   - 20250303.181339 - MARS Client build stamp: 20240618101041
2025-03-03 19:28:00 mars - INFO   - 20250303.181339 - MARS Client bundle version: 6.33.19.4
2025-03-03 19:28:00 mars - INFO   - 20250303.181339 - package mars-client version: 6.33.19
2025-03-03 19:28:00 mars

2025-03-03 19:28:04,706 INFO [2024-09-26T00:00:00] Watch our [Forum](https://forum.ecmwf.int/) for Announcements, news and other discussed topics.


Downloading ERA5 reanalysis data for time_series_20250301_182711.xlsx...


2025-03-03 19:28:05,098 INFO Request ID is 74d02e8f-a5bf-4bfd-866f-20555b1e5192
2025-03-03 19:28:05,194 INFO status has been updated to accepted
2025-03-03 19:28:13,925 INFO status has been updated to running
2025-03-03 19:28:19,062 INFO status has been updated to accepted
2025-03-03 19:28:26,728 INFO status has been updated to running
2025-03-03 19:28:55,347 INFO status has been updated to successful


cd9300515129f9708102ce0b3cff3dac.grib:   0%|          | 0.00/101k [00:00<?, ?B/s]

Saved reanalysis data: data/weather_crowdsourcing\time_series_20250301_182711.xlsx_reanalysis.grib
Downloading ECMWF reforecast data for time_series_20250301_182711.xlsx...
2025-03-03 19:28:57 ECMWF API python library 1.6.3
2025-03-03 19:28:57 ECMWF API at https://api.ecmwf.int/v1
2025-03-03 19:28:58 Welcome Alexander Peters
2025-03-03 19:29:02 In case of problems, please check https://confluence.ecmwf.int/display/WEBAPI/Web+API+FAQ or contact servicedesk@ecmwf.int
2025-03-03 19:29:03 Request submitted
2025-03-03 19:29:03 Request id: 67c5f4effa807164d909a9b5
2025-03-03 19:29:03 Request is submitted
2025-03-03 19:29:06 Request is active
2025-03-03 19:41:25 mars - INFO   - 20250303.182904 - Welcome to MARS
2025-03-03 19:41:25 mars - INFO   - 20250303.182904 - MARS Client build stamp: 20240618101041
2025-03-03 19:41:25 mars - INFO   - 20250303.182904 - MARS Client bundle version: 6.33.19.4
2025-03-03 19:41:25 mars - INFO   - 20250303.182904 - package mars-client version: 6.33.19
2025-03-0

FileNotFoundError: [Errno 2] No such file or directory: 'data/crowdsourced_data/time_series_20250303_115110.xlsx'

<h1>Build datasets

In [None]:
import os
import json
import numpy as np
import pandas as pd
import xarray as xr
from scipy.interpolate import interp2d
import warnings
warnings.filterwarnings("ignore", category=DeprecationWarning)

# Define lead times
lead_times = list(range(0, 145, 3))

# Directories
reforecast_dir = "data/crowdsourcing_weather"
output_dir = "data/crowdsourcing_weather/json_lead_times"
input_dir = "data/crowdsourced_data"

os.makedirs(output_dir, exist_ok=True)

# Load crowdsourced WPP production data
wpp_files = [f for f in os.listdir(input_dir) if f.endswith(".xlsx")]
wpp_data = []

for file in wpp_files:
    file_path = os.path.join(input_dir, file)
    sheets = pd.read_excel(file_path, sheet_name=["Time Series", "Metadata"])
    time_series_data = sheets["Time Series"]
    metadata = sheets["Metadata"]

    time_series_data["Date"] = pd.to_datetime(time_series_data["Date"], errors='coerce')
    lat, lon = metadata.iloc[0]["Latitude"], metadata.iloc[0]["Longitude"]

    wpp_data.append({
        "Name": metadata.iloc[0]["Name"],
        "Latitude": lat,
        "Longitude": lon,
        "Production": time_series_data.values.tolist()  # Store entire time series
    })

# Process each reforecast file
for file in os.listdir(reforecast_dir):
    if file.endswith(".grib"):
        year, month = file.split("_")[1:3]
        grib_path = os.path.join(reforecast_dir, file)
        json_output_path = os.path.join(output_dir, f"WPPs+production+wind_lead_times_{year}_{month}.json")

        if os.path.exists(json_output_path):
            print(f"Skipping {json_output_path}, already exists.")
            continue

        print(f"Processing {file}...")
        ds = xr.open_dataset(grib_path, engine="cfgrib", chunks={"time": 100})
        times = pd.to_datetime(ds["valid_time"].values)
        latitudes = ds["latitude"].values
        longitudes = ds["longitude"].values
        u = ds["u100"].values
        v = ds["v100"].values

        lead_time_dicts = {str(lt): {} for lt in lead_times}

        for wpp in wpp_data:
            lon, lat = wpp["Longitude"], wpp["Latitude"]
            production = wpp["Production"]

            interpolated_data = []

            for entry in production:
                time_str, production_value = entry
                timestamp = pd.to_datetime(time_str)

                for j, forecast_time in enumerate(times):
                    if timestamp in forecast_time:
                        time_index = forecast_time.get_loc(timestamp)
                        lead_time = lead_times[time_index]

                        wind_speeds = np.sqrt(u[j]**2 + v[j]**2)
                        spatial_interpolator = interp2d(longitudes, latitudes, wind_speeds, kind="linear")
                        wind_speed_value = spatial_interpolator(lon, lat)[0]
                        interpolated_data.append([lead_time, time_str, production_value, round(wind_speed_value, 2)])

            for lead_time in lead_times:
                lead_time_str = str(lead_time)
                lead_time_data = [entry for entry in interpolated_data if entry[0] == lead_time]

                if not lead_time_data:
                    continue

                lead_time_dicts[lead_time_str][wpp["Name"]] = {
                    "Latitude": wpp["Latitude"],
                    "Longitude": wpp["Longitude"],
                    "Time Series": [[entry[1], entry[2], entry[3]] for entry in lead_time_data]
                }

        with open(json_output_path, "w", encoding="utf-8") as f:
            json.dump(lead_time_dicts, f, indent=4)
        print(f"Saved reforecast JSON: {json_output_path}")


Add weather data (suggestion ChatGPT)

In [None]:
import os
import json
import numpy as np
import pandas as pd
import xarray as xr
from scipy.interpolate import interp2d
import warnings
warnings.filterwarnings("ignore", category=DeprecationWarning)

# Define lead times
lead_times = list(range(0, 145, 3))

# Directories
reforecast_dir = "data/weather_crowdsourcing"
output_dir = "data/weather_crowdsourcing/json_lead_times"
input_dir = "data/crowdsourced_data"

os.makedirs(output_dir, exist_ok=True)

# Load crowdsourced WPP production data
wpp_files = [f for f in os.listdir(input_dir) if f.endswith(".xlsx")]
wpp_data = []

for file in wpp_files:
    file_path = os.path.join(input_dir, file)
    sheets = pd.read_excel(file_path, sheet_name=["Time Series", "Metadata"])
    time_series_data = sheets["Time Series"]
    metadata = sheets["Metadata"]

    time_series_data["Date"] = pd.to_datetime(time_series_data["Date"], errors='coerce')
    lat, lon = metadata.iloc[0]["Latitude"], metadata.iloc[0]["Longitude"]

    wpp_data.append({
        "Name": metadata.iloc[0]["Name"],
        "Latitude": lat,
        "Longitude": lon,
        "Production": time_series_data.values.tolist()  # Store entire time series
    })

# Process each reforecast file
for file in os.listdir(reforecast_dir):
    if file.endswith(".grib"):
        year, month = file.split("_")[1:3]
        grib_path = os.path.join(reforecast_dir, file)
        json_output_path = os.path.join(output_dir, f"WPPs+production+wind_lead_times_{year}_{month}.json")

        if os.path.exists(json_output_path):
            print(f"Skipping {json_output_path}, already exists.")
            continue

        print(f"Processing {file}...")
        ds = xr.open_dataset(grib_path, engine="cfgrib", chunks={"time": 100})
        times = pd.to_datetime(ds["valid_time"].values)
        latitudes = ds["latitude"].values
        longitudes = ds["longitude"].values
        u = ds["u100"].values
        v = ds["v100"].values

        lead_time_dicts = {str(lt): {} for lt in lead_times}

        for wpp in wpp_data:
            lon, lat = wpp["Longitude"], wpp["Latitude"]
            production = wpp["Production"]

            interpolated_data = []

            for entry in production:
                time_str, production_value = entry
                timestamp = pd.to_datetime(time_str)

                for j, forecast_time in enumerate(times):
                    if timestamp in forecast_time:
                        time_index = forecast_time.get_loc(timestamp)
                        lead_time = lead_times[time_index]

                        wind_speeds = np.sqrt(u[j]**2 + v[j]**2)
                        spatial_interpolator = interp2d(longitudes, latitudes, wind_speeds, kind="linear")
                        wind_speed_value = spatial_interpolator(lon, lat)[0]
                        interpolated_data.append([lead_time, time_str, production_value, round(wind_speed_value, 2)])

            for lead_time in lead_times:
                lead_time_str = str(lead_time)
                lead_time_data = [entry for entry in interpolated_data if entry[0] == lead_time]

                if not lead_time_data:
                    continue

                lead_time_dicts[lead_time_str][wpp["Name"]] = {
                    "Latitude": wpp["Latitude"],
                    "Longitude": wpp["Longitude"],
                    "Time Series": [[entry[1], entry[2], entry[3]] for entry in lead_time_data]
                }

        with open(json_output_path, "w", encoding="utf-8") as f:
            json.dump(lead_time_dicts, f, indent=4)
        print(f"Saved reforecast JSON: {json_output_path}")


old Crowdsourcing code

In [None]:


        # Read GRIB file using xarray and cfgrib
        ds = xr.open_dataset(file_name, engine='cfgrib')

        # Extract u and v wind components, latitudes, longitudes, and times
        u_wind = ds["u100"].values
        v_wind = ds["v100"].values
        latitudes = ds["latitude"].values
        longitudes = ds["longitude"].values
        times = pd.to_datetime(ds["time"].values)

        # Prepare static features (shared across timestamps)
        hub_height = metadata.iloc[0]["Hub Height"]
        commissioning_year = metadata.iloc[0]["Commissioning Year"]
        commissioning_month = metadata.iloc[0]["Commissioning Month"]
        ref_date = pd.Timestamp("2024-12-01")
        age_months = (ref_date.year - commissioning_year) * 12 + (ref_date.month - commissioning_month)
        capacity = metadata.iloc[0]["Capacity (MW)"]
        turbine_type = metadata.iloc[0]["Turbine Type"]

        scaled_hub_height = scalers["hub_heights"].transform([[hub_height]])[0][0]
        scaled_age = scalers["ages"].transform([[age_months]])[0][0]

        num_rows = len(time_series_data["Date"])
        hub_height_repeated = np.full((num_rows, 1), scaled_hub_height)
        age_repeated = np.full((num_rows, 1), scaled_age)

        turbine_type_encoded = np.zeros(len(known_turbine_types))
        if turbine_type in known_turbine_types:
            turbine_type_encoded[np.where(known_turbine_types == turbine_type)[0][0]] = 1
        turbine_type_repeated = np.tile(turbine_type_encoded, (num_rows, 1))

        interpolated_production = []

        # Iterate over each timestamp in the uploaded data
        for _, row in time_series_data.iterrows():
            timestamp = row["Date"]
            production_value = row["Production (kW)"] / 1e3 # convert to MW

            if timestamp in times.values:
                time_index = times.get_loc(timestamp)

                u = u_wind[time_index]
                v = v_wind[time_index]
                wind_speed = np.sqrt(u**2 + v**2)

                interpolator = interp2d(longitudes, latitudes, wind_speed, kind='linear')
                wind_speed_value = interpolator(lon, lat)[0]
                wind_speed_value = round(wind_speed_value, 2)

                interpolated_production.append([timestamp, production_value, wind_speed_value])

        productions = np.array([interpolated_value[1] for interpolated_value in interpolated_production]).reshape(-1, 1)
        wind_speeds = np.array([interpolated_value[2] for interpolated_value in interpolated_production])
        
        scaled_wind_speeds = scalers["winds"].transform(wind_speeds.reshape(-1, 1))

        # Prepare input features
        input_features = np.hstack([turbine_type_repeated, hub_height_repeated, age_repeated, scaled_wind_speeds])
        input_tensor = torch.tensor(input_features, dtype=torch.float32).to(device)

        with torch.no_grad():
            predictions = model(input_tensor)

        huber_loss = nn.HuberLoss()(predictions, torch.tensor(productions)).item()

        # Update model if the huber loss is < 1
        if huber_loss < 1:
            model.train()
            optimizer = optim.Adam(model.parameters(), lr=1e-3)
            for epoch in range(10):
                optimizer.zero_grad()
                output = model(input_tensor)
                loss = nn.HuberLoss()(output, torch.tensor(productions, dtype=torch.float32).to(device))
                loss.backward()
                optimizer.step()

            # Save updated model
            timestamp_str = datetime.now().strftime("%Y%m%d_%H%M%S")
            updated_model_path = os.path.join(model_path, f"trained_parameters_{timestamp_str}.pth")
            torch.save(model.state_dict(), updated_model_path)
            print(f"Updated model saved at {updated_model_path}")


copied code to train model C

In [None]:
import numpy as np
import json
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from sklearn.preprocessing import OneHotEncoder, StandardScaler
import joblib
import os
import pandas as pd
from torch.nn import HuberLoss, MSELoss, L1Loss

# Lists to store models and scalers
models = {}
scalers = {}
encoders = {}
input_sizes = {}
metrics = {}

# Define MLP class
class MLP(nn.Module):
    def __init__(self, input_size):
        super(MLP, self).__init__()
        self.fc1 = nn.Linear(input_size, 256)
        self.fc2 = nn.Linear(256, 128)
        self.fc3 = nn.Linear(128, 64)
        self.fc4 = nn.Linear(64, 1)
        self.relu1 = nn.ReLU()
        self.relu2 = nn.ReLU()
        self.relu3 = nn.ReLU()
        self.dropout = nn.Dropout(0.3366)

    def forward(self, x):
        x = self.relu1(self.fc1(x))
        x = self.relu2(self.fc2(x))
        x = self.relu3(self.fc3(x))
        x = self.dropout(x)
        x = self.fc4(x)
        return x

# PyTorch Dataset Class
class WindPowerDataset(Dataset):
    def __init__(self, features, targets):
        self.features = features
        self.targets = targets

    def __len__(self):
        return len(self.targets)

    def __getitem__(self, index):
        x = self.features[index]
        y = self.targets[index]
        return torch.tensor(x, dtype=torch.float32), torch.tensor(y, dtype=torch.float32)

input_dir = r"E:\MA_data\WPPs+production+reforecast"

for file in os.listdir(input_dir):
    file_path = os.path.join(input_dir, file)
    if os.path.isfile(file_path):  # Ensure it's a file (not a folder)
        lead_time = int(file.split("_")[-1].replace(".json", ""))
        with open(file_path, "r", encoding="utf-8") as file:
            forecast_data = json.load(file)

    print(f"Processing lead time: {lead_time}")

    print(f"    Data preparation")

    all_turbine_types = []
    all_hub_heights = []
    all_capacities = []
    all_commissioning_dates = []
    all_production_data = []

    for unique_key, wpp in forecast_data.items():
        all_turbine_types.append(str(wpp["Turbine"]))
        all_hub_heights.append(wpp["Hub_height"])
        all_capacities.append(wpp["Capacity"])
        all_commissioning_dates.append(f"{wpp['Commissioning_date']}/06" if isinstance(wpp["Commissioning_date"], str) and "/" not in wpp["Commissioning_date"] else wpp["Commissioning_date"])
        all_production_data.append(wpp["Time Series"])

    # One-Hot-Encoding for turbine types
    encoder = OneHotEncoder(sparse_output=False)
    turbine_types_onehot = encoder.fit_transform(np.array(all_turbine_types).reshape(-1, 1))

    # convert to datetime
    standardised_dates = pd.to_datetime(all_commissioning_dates, format='%Y/%m')

    # calculate age
    ref_date = pd.Timestamp("2024-12-01")
    ages = ref_date.year * 12 + ref_date.month - (standardised_dates.year * 12 + standardised_dates.month)

    # create combined features and output lists
    combined_features_raw = []
    output_raw = []
    
    # convert data in feature arrays
    for idx, production_data in enumerate(all_production_data):
        num_rows = len(production_data)

        # Repetitions for common features
        turbine_type_repeated = np.tile(turbine_types_onehot[idx], (num_rows, 1))
        hub_height_repeated = np.full((num_rows, 1), float(all_hub_heights[idx]))
        age_repeated = np.full((num_rows, 1), ages[idx])

        # Extract production values and wind speeds
        production_values = np.array([entry[1] for entry in production_data]).reshape(-1, 1) / all_capacities[idx]
        wind_speeds = np.array([entry[2] for entry in production_data]).reshape(-1, 1)

        # combine all features
        combined_chunk = np.hstack((
            turbine_type_repeated,
            hub_height_repeated,
            age_repeated,
            wind_speeds
        ))

        # add the data
        combined_features_raw.append(combined_chunk)
        output_raw.append(production_values)

    # combine all data chunks to one array
    combined_features = np.vstack(combined_features_raw)
    output = np.vstack(output_raw)

    # Interpolate missing values (linear interpolation) in pandas
    wind_speed_series = pd.Series(combined_features[:, -1])
    wind_speed_series.interpolate(method='linear', inplace=True)
    combined_features[:, -1] = wind_speed_series.to_numpy()

    # round all values to four decimal places
    combined_features = np.round(combined_features, decimals=4)
    output = np.round(output, decimals=4)
        
    # Normalise numerical features
    scaler_wind = StandardScaler()
    scaler_ages = StandardScaler()
    scaler_hub_heights = StandardScaler()

    # Skalieren der einzelnen Features
    combined_features[:, -1] = scaler_wind.fit_transform(combined_features[:, -1].reshape(-1, 1)).flatten() # scale wind speeds
    combined_features[:, -2] = scaler_ages.fit_transform(combined_features[:, -2].reshape(-1, 1)).flatten()  # scale ages
    combined_features[:, -3] = scaler_hub_heights.fit_transform(combined_features[:, -3].reshape(-1, 1)).flatten()  # scale hub heights
    
    # Convert to PyTorch Dataset
    dataset = WindPowerDataset(combined_features, output)
    
    params = {"batch_size": 128,
              "lr": 0.00010155,
              "number_epochs": 10}
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    # shuffling matters here
    data_loader = DataLoader(dataset, batch_size=params["batch_size"], shuffle=True)
    
    # Model setup
    input_size = combined_features.shape[1]

    # use static instead of dynamic computational graphs
    model = torch.jit.script(MLP(input_size=input_size)).to(device)
    model.to(device)
    
    # Trainings-Konfiguration
    mae_criterion = L1Loss()
    mse_criterion = MSELoss()
    huber_criterion = HuberLoss()
    optimizer = optim.Adam(model.parameters(), lr=params["lr"])

    # Training
    print(f"    Training")
    for epoch in range(params["number_epochs"]):
        print(f"        Epoch {epoch + 1}/{params['number_epochs']}")
        model.train()
        train_loss_mae, train_loss_mse, train_loss_huber = 0, 0, 0

        for batch_x, batch_y in data_loader:
            batch_x, batch_y = batch_x.to(device), batch_y.to(device)
            outputs = model(batch_x)
            
            # Calculate metrics for each criterion
            loss_mae = mae_criterion(outputs, batch_y)
            loss_mse = mse_criterion(outputs, batch_y)
            loss_huber = huber_criterion(outputs, batch_y)

            optimizer.zero_grad()
            loss_huber.backward()
            optimizer.step()

            # Accumulate metrics for logging
            train_loss_mae += loss_mae.item()
            train_loss_mse += loss_mse.item()
            train_loss_huber += loss_huber.item()

        train_loss_mae /= len(data_loader)
        train_loss_mse /= len(data_loader)
        train_loss_huber /= len(data_loader)
    
    models[lead_time] = model.state_dict()
    
    scalers[lead_time] = {
        "winds": scaler_wind,
        "ages": scaler_ages,
        "hub_heights": scaler_hub_heights
    }

    encoders[lead_time] = encoder

    input_sizes[lead_time] = input_size

    metrics[lead_time] = {
        "Huber": train_loss_huber,
        "MAE": train_loss_mae,
        "MSE":train_loss_mse,
        "RMSE": np.sqrt(train_loss_mse)
    }

# Save all parameters
torch.save(models, "parameters_deployment/models.pth")
joblib.dump(scalers, "parameters_deployment/scalers.pkl")
joblib.dump(encoders, "parameters_deployment/encoders.pkl")
joblib.dump(input_sizes, "parameters_deployment/input_sizes.pkl")
joblib.dump(metrics, "parameters_deployment/metrics.pkl")
print("All parameters saved successfully.")