In [1]:
import pandas as pd
import holidays

# Lade die gemergten Daten
file_path = "../data/merged_data_de.csv"
df = pd.read_csv(file_path, parse_dates=["utc_timestamp"])

# Schritt 1: Zeitmerkmale extrahieren
df["hour"] = df["utc_timestamp"].dt.hour
df["month"] = df["utc_timestamp"].dt.month
df["weekday"] = df["utc_timestamp"].dt.weekday
df["day"] = df["utc_timestamp"].dt.day
df["year"] = df["utc_timestamp"].dt.year
df["is_weekend"] = df["weekday"].isin([5, 6]).astype(int)  # Samstag = 5, Sonntag = 6

# Schritt 2: Fehlende Werte auffüllen (PV & Wind → 0 setzen)
df["DE_solar_generation_actual"] = df["DE_solar_generation_actual"].fillna(0)
df["DE_wind_onshore_generation_actual"] = df["DE_wind_onshore_generation_actual"].fillna(0)

# Schritt 3: Neue Feature – Gesamtstrahlung und Residuallast
df["radiation_total"] = df["DE_radiation_direct_horizontal"] + df["DE_radiation_diffuse_horizontal"]
df["residual_load"] = df["DE_load_actual_entsoe_transparency"] - (
    df["DE_solar_generation_actual"] + df["DE_wind_onshore_generation_actual"]
)



# Deutsche Feiertage (bundesweit) für 2015–2020
de_holidays = holidays.Germany(years=range(2015, 2021))

# Neue Spalte: Feiertag = 1, sonst 0
df["is_holiday"] = df["utc_timestamp"].dt.date.isin(de_holidays).astype(int)


#zeigen der CSV head 
print(df.head())

#speicher der DataFrame in eine neue CSV-Datei
output_file_path = "../data/processed_data_de.csv"
df.to_csv(output_file_path, index=False)


              utc_timestamp  DE_load_actual_entsoe_transparency  \
0 2015-01-01 00:00:00+00:00                             41151.0   
1 2015-01-01 01:00:00+00:00                             40135.0   
2 2015-01-01 02:00:00+00:00                             39106.0   
3 2015-01-01 03:00:00+00:00                             38765.0   
4 2015-01-01 04:00:00+00:00                             38941.0   

   DE_solar_generation_actual  DE_wind_onshore_generation_actual  \
0                         0.0                             8336.0   
1                         0.0                             8540.0   
2                         0.0                             8552.0   
3                         0.0                             8643.0   
4                         0.0                             8712.0   

   DE_temperature  DE_radiation_direct_horizontal  \
0          -0.981                             0.0   
1          -1.035                             0.0   
2          -1.109            