<h1>1. Preprocessing of historical production data: discard data of unwanted power plants, retain monthly files</h1>
<p>duplicates are avoided by creating a set unique_windfarms_set and comparing, if already added<br>
takes 1 to 2 minutes per month, partially because only values for full hours are retained

In [None]:
import pandas as pd
import os
import json
import warnings
warnings.filterwarnings("ignore", category=FutureWarning)

# Basisverzeichnisse
input_dir = r"E:\MA_data\raw production history ENTSO-E"
output_dir = r"C:\Users\alexa\Documents\Webapp\data\production_history\processed_new\JSON"

# Liste der Monate von 2015-01 bis 2024-10 generieren
months = pd.date_range(start="2019-09", end="2019-09", freq="MS").strftime("%Y_%m").tolist()

# For-Schleife für jede Datei
for month in months:
    # Dateipfad erstellen
    input_file = os.path.join(input_dir, f"{month}_ActualGenerationOutputPerGenerationUnit_16.1.A_r2.1.csv")
    output_file = os.path.join(output_dir, f"production_summary_{month}.json")

    # Überprüfen, ob die Datei existiert
    if not os.path.exists(input_file):
        print(f"Datei nicht gefunden: {input_file}")
        continue

    # Datei einlesen
    print(f"Bearbeite Datei: {input_file}")
    data = pd.read_csv(input_file, sep='\t')

    # Filtere nach GenerationUnitType == 'Wind Onshore' oder 'Wind Offshore'
    filtered_data = data[(data['GenerationUnitType'] == 'Wind Onshore ') | (data['GenerationUnitType'] == 'Wind Offshore ')]

    # Konvertiere 'DateTime (UTC)' direkt in das ISO-8601-Format
    filtered_data.loc[:, 'DateTime (UTC)'] = pd.to_datetime(filtered_data['DateTime (UTC)']).dt.strftime('%Y-%m-%dT%H:%M:%S')

    # Wichtige Spalten identifizieren, 'AreaCode', 'AreaDisplayName', 'AreaTypeCode' and 'MapCode' of identical WPPs may differ --> use at least one of them as a criterion to identify unique windfarms, and sort out the duplicates manually, because otherwise, the production data are appended twice to the same wind farm
    unique_windfarms = filtered_data[['GenerationUnitName', 'GenerationUnitCode', 'GenerationUnitType', 'GenerationUnitInstalledCapacity(MW)', 'AreaCode']].drop_duplicates()
    unique_windfarms_set = set(unique_windfarms['GenerationUnitName'])

    # Listen für die Produktion zu jeder Stunde hinzufügen
    production_data = []
    for _, row in unique_windfarms.iterrows():
        # Filtern der Daten für das aktuelle Windkraftwerk
        windfarm_data = filtered_data[
            (filtered_data['GenerationUnitName'] == row['GenerationUnitName']) &
            (filtered_data['AreaCode'] == row['AreaCode']) # important to avoid adding to a wind farm production data of all its duplicates
        ]

        # Erstelle 2D-Array (Liste von Listen) mit Zeit und Produktion, da JSON keine Arrays speichern kann
        production_array = [
            [time, production]
            for time, production in zip(
                windfarm_data['DateTime (UTC)'],
                windfarm_data['ActualGenerationOutput(MW)']
            )
            if pd.notna(production) and pd.to_datetime(time).minute == 0  # Nur volle Stunden übernehmen (Resolution of weather data is hourly), although it significantly increases the execution time of the programme, und fehlende Werte überspringen
        ]

        # Daten für das Windkraftwerk hinzufügen
        row_data = {
            'GenerationUnitName': row['GenerationUnitName'],
            'GenerationUnitCode': row['GenerationUnitCode'],
            'GenerationUnitType': row['GenerationUnitType'],
            'GenerationUnitInstalledCapacity(MW)': row['GenerationUnitInstalledCapacity(MW)'],
            'Production': production_array
        }

        # don't add duplicates
        if row_data['GenerationUnitName'] in unique_windfarms_set:
            production_data.append(row_data)
            unique_windfarms_set.discard(row_data['GenerationUnitName'])

    # JSON-Datei speichern
    with open(output_file, 'w', encoding='utf-8') as json_file:
        json.dump(production_data, json_file, ensure_ascii=False, indent=4)

    print(f"JSON-Datei wurde erfolgreich erstellt: {output_file}")


<h1>2. merge all monthly production data files to one combined file

In [None]:
import pandas as pd
import os
import json

# Verzeichnisse
input_dir = r"C:\Users\alexa\Documents\Webapp\data\production_history\processed_new\JSON"
output_file = r"C:\Users\alexa\Documents\Webapp\data\production_history\production_summary_all.json"

# Liste der Monate von 2015_01 bis 2024_10
months = pd.date_range(start="2015-01", end="2024-10", freq="MS").strftime("%Y_%m").tolist()

columns_merge = ['GenerationUnitName', 'GenerationUnitCode', 'GenerationUnitType', 'GenerationUnitInstalledCapacity(MW)']
final_data = {}

# Einlesen der einzelnen Dateien
for month in months:
    input_file = os.path.join(input_dir, f"production_summary_{month}.json")

    # Überprüfen, ob die Datei existiert
    if not os.path.exists(input_file):
        print(f"Datei nicht gefunden: {input_file}")
        continue

    # Datei einlesen
    print(f"Verarbeite Datei: {input_file}")
    with open(input_file, 'r', encoding='utf-8') as file:
        monthly_data = json.load(file)

    # Zusammenführen: Gleiche Windkraftanlagen zusammenführen
    for windfarm in monthly_data:
        key = tuple(windfarm[col] for col in columns_merge) # unique key per WPP, defined by column_merge (name, code, type, capacity) --> duplicates are not added (already assured in previous cell)
        if key not in final_data:
            # Neu hinzufügen
            final_data[key] = windfarm
        else:
            # Produktion zusammenführen
            final_data[key]['Production'].extend(windfarm['Production'])

# Finales JSON-Datenformat vorbereiten
merged_data = list(final_data.values())

# JSON-Datei speichern
with open(output_file, 'w', encoding='utf-8') as json_file:
    json.dump(merged_data, json_file, ensure_ascii=False, indent=4)

print(f"Zusammengeführte JSON-Datei wurde erfolgreich gespeichert unter: {output_file}")


<h1>3. Perform manual assignment to The Wind Power database indices

Create Excel File with WPPs in JSON file

In [None]:
import pandas as pd
import json

# Datei laden
file_path = r"C:\Users\alexa\Documents\Webapp\data\production_history\production_summary_all.json"
output_excel_path = r"C:\Users\alexa\Documents\Webapp\data\production_history\JSON_File.xlsx"

# JSON-Datei einlesen
with open(file_path, 'r', encoding='utf-8') as file:
    data = json.load(file)

# assign an ID to each WPP in the JSON file that corresponds to its position in the list of dictionaries
for i, item in enumerate(data):
    item['JSON-ID'] = i

# Relevante Daten extrahieren
df = pd.DataFrame(data)[['GenerationUnitName', 'GenerationUnitCode', 'GenerationUnitType', 'GenerationUnitInstalledCapacity(MW)', 'JSON-ID']]

# Nach 'GenerationUnitName' sortieren
df_sorted = df.sort_values(by='GenerationUnitName')

# Daten in eine Excel-Datei speichern
df_sorted.to_excel(output_excel_path, index=False)

# Fertigmeldung
output_excel_path

Download table to find common name for UK power plants from website

In [None]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

# URL der Webseite
url = "https://osuked.github.io/Power-Station-Dictionary/dictionary.html"

# Abrufen der Webseite
response = requests.get(url)
if response.status_code != 200:
    print("Fehler beim Abrufen der Webseite.")
    exit()

# Parsing der Webseite mit BeautifulSoup
soup = BeautifulSoup(response.text, 'html.parser')

# Finden aller Tabellen auf der Seite
tables = soup.find_all('table')
if len(tables) < 3:
    print("Weniger als 3 Tabellen auf der Seite gefunden.")
    exit()

# Die dritte Tabelle auswählen (Index 2, da Python nullbasiert zählt)
table = tables[2]

# Spaltennamen extrahieren
headers = [header.text.strip() for header in table.find_all('th')]

# Zeilen extrahieren
rows = []
for row in table.find_all('tr')[1:]:  # Überspringt die Kopfzeile
    cells = [cell.text.strip() for cell in row.find_all('td')]
    rows.append(cells)

# DataFrame erstellen
df = pd.DataFrame(rows, columns=headers)

# Datei speichern
output_path = "data/WPPs/Power_Station_Dictionary.xlsx"
df.to_excel(output_path, index=False)
print(f"Die dritte Tabelle wurde erfolgreich als '{output_path}' gespeichert.")

Add common names from downloaded to assignment file

In [None]:
import pandas as pd

# Load the two provided files
file_1_path = r"C:\Users\alexa\Documents\Webapp\data\WPPs\Power_Station_Dictionary.xlsx"
file_2_path = r"C:\Users\alexa\Documents\Webapp\data\production_history\JSON_File.xlsx"

# read the data
df1 = pd.read_excel(file_1_path)
df2 = pd.read_excel(file_2_path)

# introduce new columns at the end
df2['Names_UK_Plants'] = None
df2['ID_The-Wind-Power'] = None
df2['Comment'] = None

# Iterate over rows in df2 to match and update the "Names_UK_Plants" column
for index, row in df2.iterrows():
    generation_unit_name = row['GenerationUnitName']
    
    # Check if this name appears in the "National Grid BMU ID" of the first file
    matching_rows = df1[df1['National Grid BMU ID'].str.contains(generation_unit_name, na=False, case=False)]
    
    if not matching_rows.empty:
        # Get the "Common Name" value(s) and update the "Names_UK_Plants" column in df2
        common_names = matching_rows['Common Name'].tolist()
        df2.at[index, 'Names_UK_Plants'] = ', '.join(common_names)

# Save the updated DataFrame to a new Excel file
output_path = r"C:\Users\alexa\Documents\Webapp\data\Assignment.xlsx"
df2.to_excel(output_path, index=False)

output_path

perform manual assignment

load WPPs and assignment file (after manual assignment has been conducted) and combine the information - To Do !!!!!!!!!!<br>
assignment file matches parquet file (ID_The-Wind-Power) to json file (JSON-ID)<br>
all three files are uploaded and a new json file is created<br>
the rows in the excel file correspond excatly to the rows in the json file (same number)<br>
JSON-IDs in outgoing JSON-file are those of WPPs with matching in the wind power database (no "not found"), and more specifically that of the first WPP when WPP production data are added

In [None]:
import pandas as pd

# Laden der Daten
df_wind_power = pd.read_parquet("data/WPPs/The_Wind_Power.parquet")
df_assignment = pd.read_excel("data/Assignment.xlsx", sheet_name="Sheet1")
with open(r"C:\Users\alexa\Documents\Webapp\data\production_history\production_summary_all.json", "r") as file:
    df_json = json.load(file)

In [None]:
output_file = "data/WPPs+production.json"

# Filtere nur Zeilen, bei denen "ID_The-Wind-Power" nicht "not found" ist
df_assignment = df_assignment[df_assignment["ID_The-Wind-Power"] != "not found"]

# set wirh unique generation unit codes
generation_unit_code_set = set(df_assignment['GenerationUnitCode'])

# Extrahiere und entpacke alle gültigen IDs aus der Spalte "ID_The-Wind-Power"
def extract_ids(value):
    # Überprüfen, ob der Wert eine Liste ist, und ggf. in einzelne IDs zerlegen
    if isinstance(value, str) and value.startswith("[") and value.endswith("]"):
        return eval(value)  # Konvertiert die Zeichenkette in eine Liste
    elif isinstance(value, (int, str)):
        return [int(value)]  # Einzelne IDs werden in eine Liste gewandelt
    return []

valid_ids = set()
df_assignment["ID_The-Wind-Power"].apply(lambda x: valid_ids.update(extract_ids(x)))

df_filtered = df_wind_power[df_wind_power['ID'].isin(valid_ids)].copy()
actual_ids = set(df_filtered['ID'])
suspended_ids = valid_ids - actual_ids

print("number potential WPPs:", len(valid_ids))
print("number actual WPPs:", len(actual_ids))
print("number suspended WPPs (no name, location, capacity or status not in operation):", len(suspended_ids))

production_data = [] # neues JSON-File mit Produktionsdaten für die WPPs
temporal_wpps = [] # WPPs, die temporär gespeichert werden, um sie später zu aktualisieren

# Gehe durch jede Zeile der Assignment-Datei und füge Produktionsdaten hinzu
for _, row in df_assignment.iterrows():
    
    ids_in_row = extract_ids(row["ID_The-Wind-Power"])
    first_id = ids_in_row[0] # dismiss other ids in the same row, because the capacity of the WPP is not taken from the wind power database anyway and other statistics should be the same for all indices

    if first_id in suspended_ids:
        continue # jump to next iteration, because following line would fail for suspended_ids

    production_array = df_json[row['JSON-ID']]['Production']
    capacity = row['GenerationUnitInstalledCapacity(MW)']

    if first_id not in actual_ids: # several lines in assignment files for one WPP in The Wind Power file
        if row['GenerationUnitCode'] not in generation_unit_code_set: # another row with the same generation unit code as a previous row --> create new WPP although its first_id is identical, because the capacity differs
            pass # continue at current_index = ...
        else: # add production data to existing WPP
            pass
            for _, wpp in enumerate(production_data):
                if wpp['ID_The-Wind-Power'] == first_id:

                    existing_production = wpp['Production']

                    # Vergleiche Zeitstempel und addiere nur bei Übereinstimmung
                    i, j = 0, 0  # Zwei Zeiger für existing_production und production_array
                    updated_production = []

                    while i < len(existing_production) and j < len(production_array):
                        time, existing_value = existing_production[i]
                        time_comp, new_value = production_array[j]

                        if time == time_comp:
                            updated_production.append([time, existing_value + new_value])
                            i += 1
                            j += 1
                        elif time < time_comp:
                            i += 1
                        else:
                            j += 1

                    if updated_production != []:
                        wpp['Production'] = updated_production # update production data (# Ergebnisliste enthält nur Einträge mit übereinstimmenden Zeitstempeln)
                        wpp['Capacity'] = wpp['Capacity'] + capacity # update capacity
                        temporal_wpps.append(wpp)
            continue # don't add another time to the production data
    else: # after wpps' production has been changed, treat temporal_wpps. Only possible now, because some wpps were needed multiple times
        if len(temporal_wpps) > 0:
            for wpp_new in temporal_wpps:
                # if available, delete the wpp from production data (recognised by GenerationUnitCode and GenerationUnitInstalledCapacity(MW))
                production_data = [wpp for wpp in production_data if not (wpp['Code'] == wpp_new['Code'] and wpp['Capacity'] == wpp_new['Capacity'])]
                production_data.append(wpp_new)
            temporal_wpps = []

    current_index = df_filtered.loc[df_filtered['ID'] == first_id].index[0]

    # Daten für das Windkraftwerk hinzufügen
    row_data = {
        'Name': row['GenerationUnitName'], # from assignment file
        'ID_The-Wind-Power': first_id, # from assignment file
        'JSON-ID': row['JSON-ID'], # from assignment file
        'Code': row['GenerationUnitCode'], # from assignment file
        'Type': row['GenerationUnitType'], # from assignment file
        'Capacity': capacity, # from assignment file
        'Hub_height': df_filtered.at[current_index, "Hub height"], # from The Wind Power file
        'Commission_date': df_filtered.at[current_index, "Commissioning date"], # from The Wind Power file
        'Number_of_turbines': int(df_filtered.at[current_index, "Number of turbines"]), # from The Wind Power file (value only valid for latest WPPs)
        'Turbine': df_filtered.at[current_index, "Turbine"], # from The Wind Power file
        'Latitude': df_filtered.at[current_index, "Latitude"], # from The Wind Power file
        'Longitude': df_filtered.at[current_index, "Longitude"], # from The Wind Power file
        'Production': production_array # from JSON file
    }

    production_data.append(row_data)

    # keep track of treated generation unit codes
    generation_unit_code_set.discard(row['GenerationUnitCode'])

    # keep track of treated IDs to not try deleting rows twice 
    for id in ids_in_row:
        if id in actual_ids:
            actual_ids.discard(id)

print("number WPPs after clustering", len(production_data))

# JSON-Datei speichern
with open(output_file, 'w', encoding='utf-8') as json_file:
    json.dump(production_data, json_file, ensure_ascii=False, indent=4)

print(f"Zusammengeführte JSON-Datei wurde erfolgreich gespeichert unter: {output_file}")

# Convert the list to a DataFrame
df_production_data = pd.DataFrame(production_data)

# Save the DataFrame to an Excel file
df_production_data.to_excel("data/WPPs+production.xlsx", index=False)

<h1>4. Add weather data

In [None]:
import xarray as xr

# Name of wind_speed_file must remain correct during execution of the next cell, because data are lazy loaded. Segmentation of files into years necessary, because datapoints at large indices of too large files can't be loaded into memory during lazy loading
wind_speed_file = r"C:\Users\alexa\Documents\Webapp\data\weather_history\2015.grib"
# Chunkgröße für die Dimension "time" anpassen, sonst funktioniert die Berechnung wind_speeds = np.sqrt(wind_speeds_month['u100']**2 + wind_speeds_month['v100']**2) in der folgenden Zelle nicht
wind_speed = xr.open_dataset(wind_speed_file, engine="cfgrib", chunks={"time": 100})

import pandas as pd

times = pd.to_datetime(wind_speed['time'].values)
latitudes = wind_speed['latitude'].values
longitudes = wind_speed['longitude'].values

In [None]:
import numpy as np
import pandas as pd
from scipy.interpolate import interp2d
import warnings
warnings.filterwarnings("ignore", category=DeprecationWarning)

WPP_production = pd.read_json("data/WPPs+production.json")

In [None]:
# Iterate over each wind power plant
for i, wpp in WPP_production.iterrows():
    print(f"Working on wind power plant {i+1}/{len(WPP_production)}")
    lon = wpp['Longitude']
    lat = wpp['Latitude']
    production = wpp['Production']

    # Filter production data to keep only entries from 2015
    production_2015 = [entry for entry in production if '2015' in entry[0]]

    # Interpolate wind speeds for each production entry
    interpolated_production = []
    for j, entry in enumerate(production_2015):
        print(f"Interpolating wind speed for entry {j+1}/{len(production_2015)}")
        time_str, production_value = entry
        time = pd.to_datetime(time_str)
        if time in times:
            time_index = times.get_loc(time)
            wind_speeds = np.sqrt(wind_speed['u100'][time_index].values**2 + wind_speed['v100'][time_index].values**2)
            spatial_interpolator = interp2d(longitudes, latitudes, wind_speeds, kind='linear')
            wind_speed_value = spatial_interpolator(lon, lat)[0]
            wind_speed_value = round(wind_speed_value, 2)
            interpolated_production.append([time_str, production_value, wind_speed_value])

    # Update the production data with interpolated wind speeds
    WPP_production.at[index, 'Production'] = interpolated_production

# Save the updated production data to a new JSON file
output_file = 'data/WPPs+production+wind.json'
with open(output_file, 'w', encoding='utf-8') as json_file:
    json.dump(WPP_production.to_dict(orient='records'), json_file, ensure_ascii=False, indent=4)

print(f"Updated JSON file with wind speeds saved to: {output_file}")

In [None]:
from scipy.interpolate import RegularGridInterpolator

# Iterate over each wind power plant
for index, wpp in WPP_production.iterrows():
    print(f"Working on wind power plant {index+1}/{len(WPP_production)}")
    lon = wpp['Longitude']
    lat = wpp['Latitude']
    production = wpp['Production']

    # Filter production data to keep only entries from 2015
    production_2015 = [entry for entry in production if '2015' in entry[0]]

    # Interpolate wind speeds for each production entry
    interpolated_production = []
    for i, entry in enumerate(production_2015):
        print(f"Interpolating wind speed for entry {i+1}/{len(production_2015)}")
        time_str, production_value = entry
        time = pd.to_datetime(time_str)
        if time in times:
            time_index = times.get_loc(time)
            wind_speeds = np.sqrt(wind_speed['u100'][time_index].values**2 + wind_speed['v100'][time_index].values**2)
            
            # Round the wind speeds to reduce precision
            wind_speeds = np.round(wind_speeds, 2)
            
            # Use RegularGridInterpolator for interpolation
            spatial_interpolator = RegularGridInterpolator((latitudes, longitudes), wind_speeds, method='cubic')
            wind_speed_value = spatial_interpolator((lat, lon))
            wind_speed_value = np.round(wind_speed_value, 2)  # Round to two decimal places
            interpolated_production.append([time_str, production_value, wind_speed_value])

    # Update the production data with interpolated wind speeds
    WPP_production.at[index, 'Production'] = interpolated_production

# Save the updated production data to a new JSON file
output_file = 'data/WPPs+production+wind.json'
with open(output_file, 'w', encoding='utf-8') as json_file:
    json.dump(WPP_production.to_dict(orient='records'), json_file, ensure_ascii=False, indent=4)

print(f"Updated JSON file with wind speeds saved to: {output_file}")

In [194]:
import pandas as pd
import numpy as np
import json
from scipy.interpolate import interp2d

# Interpolierte Produktion für alle Windkraftwerke und Zeitschritte
for time_index, time in enumerate(times):
    print(f"Processing time step {time_index + 1}/{len(times)}: {time}")

    # Berechne die Windgeschwindigkeiten für den aktuellen Zeitschritt
    wind_speeds = np.sqrt(wind_speed['u100'][time_index]**2 + wind_speed['v100'][time_index]**2)
    
    # Erstelle den Interpolator für den aktuellen Zeitschritt
    spatial_interpolator = interp2d(longitudes, latitudes, wind_speeds, kind='linear')
    
    # Iteriere über jedes Windkraftwerk
    for index, wpp in WPP_production.iterrows():
        print(f"Working on wind power plant {index+1}/{len(WPP_production)}")
        lon = wpp['Longitude']
        lat = wpp['Latitude']
        production = wpp['Production']

        # Filtere Produktionsdaten, um nur Einträge aus 2015 zu behalten
        production_2015 = [entry for entry in production if '2015' in entry[0]]

        # Interpoliere Windgeschwindigkeiten für jeden Produktionseintrag
        interpolated_production = []
        for entry in production_2015:
            time_str, production_value = entry
            entry_time = pd.to_datetime(time_str)
            if entry_time == time:
                wind_speed_value = spatial_interpolator(lon, lat)[0]
                wind_speed_value = round(wind_speed_value, 2)
                interpolated_production.append([time_str, production_value, wind_speed_value])

        # Aktualisiere die Produktionsdaten mit interpolierten Windgeschwindigkeiten
        if interpolated_production:
            if 'InterpolatedProduction' not in WPP_production.columns:
                WPP_production['InterpolatedProduction'] = [[] for _ in range(len(WPP_production))]
            WPP_production.at[index, 'InterpolatedProduction'].extend(interpolated_production)

# Speichere die aktualisierten Produktionsdaten in einer neuen JSON-Datei
output_file = 'data/WPPs+production+wind.json'
with open(output_file, 'w', encoding='utf-8') as json_file:
    json.dump(WPP_production.to_dict(orient='records'), json_file, ensure_ascii=False, indent=4)

print(f"Updated JSON file with wind speeds saved to: {output_file}")

Processing time step 1/8760: 2015-01-01 00:00:00
Working on wind power plant 1/106
Working on wind power plant 2/106
Working on wind power plant 3/106
Working on wind power plant 4/106
Working on wind power plant 5/106
Working on wind power plant 6/106
Working on wind power plant 7/106
Working on wind power plant 8/106


KeyboardInterrupt: 

In [None]:
import numpy as np
import pandas as pd
from scipy.interpolate import interp2d
import warnings
warnings.filterwarnings("ignore", category=DeprecationWarning)

WPP_production = pd.read_excel("data/WPPs+production.xlsx")

# all WPPs
# ids = WPP_production['ID'].values
# lats_plants = WPP_production['Latitude'].values
# lons_plants = WPP_production['Longitude'].values

# only two WPPs for test reasons
ids = WPP_production['ID'].iloc[0:2]
lats_plants = WPP_production['Latitude'].iloc[0:2]
lons_plants = WPP_production['Longitude'].iloc[0:2]

months = [f"{year}_{month:02d}" for year in range(2024, 2025) for month in range(10, 11) # range(2015, 2025) for month in range(1, 13)
            if f"{year}_{month:02d}" in WPP_production.columns]

for month in months:
    print(f"month {month}")

    month_data = times[times.strftime('%Y_%m') == month]
    start = times.get_loc(month_data[0])
    end = times.get_loc(month_data[-1])
    wind_speeds_month = wind_speed.isel(time=slice(start, end+1))
    # this operation requires chunking
    wind_speeds = xr.apply_ufunc(
        np.sqrt,
        wind_speed['u100']**2 + wind_speed['v100']**2,
        dask="parallelized")
    wind_speeds = wind_speeds.load()

    for j in range(len(ids)):
        print(f"Wind Power Plant {j+1} / {len(ids)}")
        lon = lons_plants[j]
        lat = lats_plants[j]
        if WPP_production.at[j, month] != "[]":  # Check if there is production data
            interpolated_wind_speeds = np.zeros(len(month_data))
            for i, _ in enumerate(month_data):
                wind_speeds_i = wind_speeds[i].values
                spatial_interpolator = interp2d(longitudes, latitudes, wind_speeds_i, kind='cubic') # time in first dimension, i. e. wind_speeds[index] = wind_speeds[index, :, :]. Lazy evaluation of spatial_interpolator justifies creation of a new one not only for each time step, but also for each wind power plant
                interpolated_value = spatial_interpolator(lon, lat)[0]
                interpolated_value = round(interpolated_value, 3) # saves memory and computing resources
                interpolated_wind_speeds[i] = interpolated_value

            # Daten als NumPy-Array speichern (weniger speicherintensiv und stellt sicher, dass wind_speed und wind_power die gleiche Länge haben, die von wind_speed vorgegeben wird)
            production_data = np.array(eval(WPP_production.at[j, month]))  # Vorherige Daten als Array
            combined_data = np.stack((production_data, interpolated_wind_speeds), axis=0)
            WPP_production.at[j, month] = combined_data  # Kombinierte Daten speichern

In [None]:
# Define the columns to save
columns_to_save = [
    'ID', 'Name', '2nd name', 'Latitude', 'Longitude', 'Manufacturer', 'Turbine',
    'Hub height', 'Number of turbines', 'Total power', 'Developer', 'Operator',
    'Owner', 'Commissioning date', 'Status', '2024_09', '2024_10'
]

# Filter the DataFrame to include only the specified columns and rows where ids correspond to WPP_production['ID']
filtered_WPP_production = WPP_production[WPP_production['ID'].isin(ids)][columns_to_save]

# Save the filtered DataFrame to an Excel file
filtered_WPP_production.to_excel("data/WPPs+production+weather.xlsx", index=False)