In [1]:
import pandas as pd

### 1. Clean Real Demand Data

##### September_2025_data

In [8]:
# Load the files
real = pd.read_csv("./RealDemand_Sept2025.csv", sep=';')
real.head()

Unnamed: 0,id,name,geoid,geoname,value,datetime
0,1293,Real demand,,,23820.0,2025-09-01T00:00:00+02:00
1,1293,Real demand,,,22533.583,2025-09-01T01:00:00+02:00
2,1293,Real demand,,,21515.0,2025-09-01T02:00:00+02:00
3,1293,Real demand,,,20827.833,2025-09-01T03:00:00+02:00
4,1293,Real demand,,,20696.667,2025-09-01T04:00:00+02:00


In [9]:
# Extract datetime and value
real["datetime"] = pd.to_datetime(real["datetime"])
real = real[["datetime", "value"]].rename(columns={"value": "NLD"})

# Format to match demand.csv
# First two header rows as in demand.csv
header = pd.DataFrame({"nodes": ["techs"], "NLD": ["demand_power"]})

# Prepare final formatted time series
ts = pd.DataFrame({
    "nodes": real["datetime"].dt.strftime("%Y-%m-%d %H:%M:%S"),
    "NLD": real["NLD"]
})

# Combine them together
real_demand = pd.concat([header, ts], ignore_index=True)

# Save to new CSV
real_demand.to_csv("./demand_sept2025.csv", index=False)

real_demand.head(10)


Unnamed: 0,nodes,NLD
0,techs,demand_power
1,2025-09-01 00:00:00,23820.0
2,2025-09-01 01:00:00,22533.583
3,2025-09-01 02:00:00,21515.0
4,2025-09-01 03:00:00,20827.833
5,2025-09-01 04:00:00,20696.667
6,2025-09-01 05:00:00,21107.833
7,2025-09-01 06:00:00,23696.917
8,2025-09-01 07:00:00,26555.25
9,2025-09-01 08:00:00,27857.25


##### October2024_September2025_year_data

In [30]:
# Load the files
real_year = pd.read_csv("./RealDemand_2425.csv", sep=';')
real_year.head()

Unnamed: 0,id,name,geoid,geoname,value,datetime
0,1293,Real demand,,,23513.333333,2024-10-01T00:00:00+02:00
1,1293,Real demand,,,22478.083333,2024-10-01T01:00:00+02:00
2,1293,Real demand,,,21627.416667,2024-10-01T02:00:00+02:00
3,1293,Real demand,,,21228.416667,2024-10-01T03:00:00+02:00
4,1293,Real demand,,,21190.583333,2024-10-01T04:00:00+02:00


In [31]:
# Parse datetimes; handle timezone-aware strings by parsing with utc=True
real_year["datetime"] = pd.to_datetime(real_year["datetime"], errors="coerce", utc=True)
real_year["datetime"] = real_year["datetime"].dt.tz_convert("Europe/Madrid").dt.tz_localize(None)
real_year.head()

Unnamed: 0,id,name,geoid,geoname,value,datetime
0,1293,Real demand,,,23513.333333,2024-10-01 00:00:00
1,1293,Real demand,,,22478.083333,2024-10-01 01:00:00
2,1293,Real demand,,,21627.416667,2024-10-01 02:00:00
3,1293,Real demand,,,21228.416667,2024-10-01 03:00:00
4,1293,Real demand,,,21190.583333,2024-10-01 04:00:00


In [32]:
real_year = real_year[["datetime", "value"]].rename(columns={"value": "NLD"})
real_year.head()

Unnamed: 0,datetime,NLD
0,2024-10-01 00:00:00,23513.333333
1,2024-10-01 01:00:00,22478.083333
2,2024-10-01 02:00:00,21627.416667
3,2024-10-01 03:00:00,21228.416667
4,2024-10-01 04:00:00,21190.583333


In [33]:
# delete rows with same datetime, keep the first occurrence
real_year = real_year.drop_duplicates(subset=["datetime"], keep="first")

In [34]:
# Format to match demand.csv
# First two header rows as in demand.csv
header = pd.DataFrame({"nodes": ["techs"], "NLD": ["demand_power"]})

# Prepare final formatted time series
ts = pd.DataFrame({
    "nodes": real_year["datetime"].dt.strftime("%Y-%m-%d %H:%M:%S"),
    "NLD": real_year["NLD"]
})

# Combine them together
real_demand_year = pd.concat([header, ts], ignore_index=True)

# Save to new CSV
real_demand_year.to_csv("./demand_year.csv", index=False)

real_demand_year.head(10)

Unnamed: 0,nodes,NLD
0,techs,demand_power
1,2024-10-01 00:00:00,23513.333333
2,2024-10-01 01:00:00,22478.083333
3,2024-10-01 02:00:00,21627.416667
4,2024-10-01 03:00:00,21228.416667
5,2024-10-01 04:00:00,21190.583333
6,2024-10-01 05:00:00,21659.416667
7,2024-10-01 06:00:00,24117.75
8,2024-10-01 07:00:00,27437.333333
9,2024-10-01 08:00:00,28694.0


### 2. Clean Renewables Data

##### September_2025_data

In [20]:
# load solar data
solar = pd.read_csv("./ForecastGenerationSolar_Sept2025.csv", sep=';')
solar.tail()

Unnamed: 0,id,name,geoid,geoname,value,datetime
714,10034,Forecast generation Solar,,,3686.0,2025-09-30T19:00:00+02:00
715,10034,Forecast generation Solar,,,357.0,2025-09-30T20:00:00+02:00
716,10034,Forecast generation Solar,,,238.25,2025-09-30T21:00:00+02:00
717,10034,Forecast generation Solar,,,315.75,2025-09-30T22:00:00+02:00
718,10034,Forecast generation Solar,,,291.75,2025-09-30T23:00:00+02:00


In [21]:
# load wind data
wind = pd.read_csv("./MeasuredOnshoreWindGeneration_Sept2025.csv", sep=';')
wind.tail()

Unnamed: 0,id,name,geoid,geoname,value,datetime
715,1159,Measured Onshore wind generation,,,2857.048,2025-09-30T19:00:00+02:00
716,1159,Measured Onshore wind generation,,,3235.416,2025-09-30T20:00:00+02:00
717,1159,Measured Onshore wind generation,,,4015.845,2025-09-30T21:00:00+02:00
718,1159,Measured Onshore wind generation,,,4610.186,2025-09-30T22:00:00+02:00
719,1159,Measured Onshore wind generation,,,4986.58,2025-09-30T23:00:00+02:00


In [22]:
# add one column 'installed_capacity' with value 38659.3
solar["installed_capacity"] = 38659.3

# add one column 'CF_solar' as capacity factor, calculated as value / installed_capacity
solar["CF_solar"] = solar["value"] / solar["installed_capacity"]
solar.head()

Unnamed: 0,id,name,geoid,geoname,value,datetime,installed_capacity,CF_solar
0,10034,Forecast generation Solar,,,393.75,2025-09-01T00:00:00+02:00,38659.3,0.010185
1,10034,Forecast generation Solar,,,155.5,2025-09-01T01:00:00+02:00,38659.3,0.004022
2,10034,Forecast generation Solar,,,166.25,2025-09-01T02:00:00+02:00,38659.3,0.0043
3,10034,Forecast generation Solar,,,157.0,2025-09-01T03:00:00+02:00,38659.3,0.004061
4,10034,Forecast generation Solar,,,152.25,2025-09-01T04:00:00+02:00,38659.3,0.003938


In [23]:
# add one column 'installed_capacity' with value 33010.6
wind["installed_capacity"] = 33010.6

# add one column 'CF_wind' as capacity factor, calculated as value / installed_capacity
wind["CF_wind"] = wind["value"] / wind["installed_capacity"]
wind.head()

Unnamed: 0,id,name,geoid,geoname,value,datetime,installed_capacity,CF_wind
0,1159,Measured Onshore wind generation,,,11362.571,2025-09-01T00:00:00+02:00,33010.6,0.34421
1,1159,Measured Onshore wind generation,,,10791.856,2025-09-01T01:00:00+02:00,33010.6,0.326921
2,1159,Measured Onshore wind generation,,,10217.945,2025-09-01T02:00:00+02:00,33010.6,0.309535
3,1159,Measured Onshore wind generation,,,9947.512,2025-09-01T03:00:00+02:00,33010.6,0.301343
4,1159,Measured Onshore wind generation,,,9868.246,2025-09-01T04:00:00+02:00,33010.6,0.298942


In [27]:
# create final dataframe with datetime, CF_solar, CF_wind
renewables_df = pd.DataFrame({
    "datetime": solar["datetime"],
    "CF_solar": solar["CF_solar"],
    "CF_wind": wind["CF_wind"]
})

renewables_df.head()

Unnamed: 0,datetime,CF_solar,CF_wind
0,2025-09-01T00:00:00+02:00,0.010185,0.34421
1,2025-09-01T01:00:00+02:00,0.004022,0.326921
2,2025-09-01T02:00:00+02:00,0.0043,0.309535
3,2025-09-01T03:00:00+02:00,0.004061,0.301343
4,2025-09-01T04:00:00+02:00,0.003938,0.298942


In [28]:
# Extract datetime and value
renewables_df["datetime"] = pd.to_datetime(renewables_df["datetime"])
renewables_df = renewables_df[["datetime", "CF_solar", "CF_wind"]].rename(columns={"CF_solar": "NLD", "CF_wind": "NLD1"})

# Format to match demand.csv
# First header rows as in demand.csv
header = pd.DataFrame({"nodes": ["techs"], "NLD": ["solar_pv"], "NLD1": ["wind_onshore"]})

# Prepare final formatted time series
ts = pd.DataFrame({
    "nodes": renewables_df["datetime"].dt.strftime("%Y-%m-%d %H:%M:%S"),
    "NLD": renewables_df["NLD"],
    "NLD1": renewables_df["NLD1"]
})

# Combine them together
renewables_final = pd.concat([header, ts], ignore_index=True)

# change the name of NLD1 to NLD
renewables_final = renewables_final.rename(columns={"NLD1": "NLD"})

# Save to new CSV
renewables_final.to_csv("./renewables_sept2025.csv", index=False)

renewables_final.head(10)

Unnamed: 0,nodes,NLD,NLD.1
0,techs,solar_pv,wind_onshore
1,2025-09-01 00:00:00,0.010185,0.34421
2,2025-09-01 01:00:00,0.004022,0.326921
3,2025-09-01 02:00:00,0.0043,0.309535
4,2025-09-01 03:00:00,0.004061,0.301343
5,2025-09-01 04:00:00,0.003938,0.298942
6,2025-09-01 05:00:00,0.001449,0.294695
7,2025-09-01 06:00:00,0.001345,0.294978
8,2025-09-01 07:00:00,0.002684,0.278224
9,2025-09-01 08:00:00,0.092985,0.252151


##### October2024_September2025_year_data

In [35]:
# load solar data
solar_year = pd.read_csv("./ForecastGenerationSolar_2425.csv", sep=';')
solar_year.head()

Unnamed: 0,id,name,geoid,geoname,value,datetime
0,10034,Forecast generation Solar,,,519.25,2024-10-01T00:00:00+02:00
1,10034,Forecast generation Solar,,,500.7,2024-10-01T01:00:00+02:00
2,10034,Forecast generation Solar,,,465.825,2024-10-01T02:00:00+02:00
3,10034,Forecast generation Solar,,,434.225,2024-10-01T03:00:00+02:00
4,10034,Forecast generation Solar,,,313.325,2024-10-01T04:00:00+02:00


In [36]:
# load wind data
wind_year = pd.read_csv("./MeasuredOnshoreWindGeneration_2425.csv", sep=';')
wind_year.head()

Unnamed: 0,id,name,geoid,geoname,value,datetime
0,1159,Measured Onshore wind generation,,,4481.385,2024-10-01T00:00:00+02:00
1,1159,Measured Onshore wind generation,,,4823.612,2024-10-01T01:00:00+02:00
2,1159,Measured Onshore wind generation,,,5145.524,2024-10-01T02:00:00+02:00
3,1159,Measured Onshore wind generation,,,5508.119,2024-10-01T03:00:00+02:00
4,1159,Measured Onshore wind generation,,,5927.815,2024-10-01T04:00:00+02:00


In [37]:
# add one column 'installed_capacity' with value 38659.3
solar_year["installed_capacity"] = 38659.3

# add one column 'CF_solar' as capacity factor, calculated as value / installed_capacity
solar_year["CF_solar"] = solar_year["value"] / solar_year["installed_capacity"]
solar_year.head()

Unnamed: 0,id,name,geoid,geoname,value,datetime,installed_capacity,CF_solar
0,10034,Forecast generation Solar,,,519.25,2024-10-01T00:00:00+02:00,38659.3,0.013431
1,10034,Forecast generation Solar,,,500.7,2024-10-01T01:00:00+02:00,38659.3,0.012952
2,10034,Forecast generation Solar,,,465.825,2024-10-01T02:00:00+02:00,38659.3,0.012049
3,10034,Forecast generation Solar,,,434.225,2024-10-01T03:00:00+02:00,38659.3,0.011232
4,10034,Forecast generation Solar,,,313.325,2024-10-01T04:00:00+02:00,38659.3,0.008105


In [38]:
# add one column 'installed_capacity' with value 33010.6
wind_year["installed_capacity"] = 33010.6

# add one column 'CF_wind' as capacity factor, calculated as value / installed_capacity
wind_year["CF_wind"] = wind_year["value"] / wind_year["installed_capacity"]
wind_year.head()

Unnamed: 0,id,name,geoid,geoname,value,datetime,installed_capacity,CF_wind
0,1159,Measured Onshore wind generation,,,4481.385,2024-10-01T00:00:00+02:00,33010.6,0.135756
1,1159,Measured Onshore wind generation,,,4823.612,2024-10-01T01:00:00+02:00,33010.6,0.146123
2,1159,Measured Onshore wind generation,,,5145.524,2024-10-01T02:00:00+02:00,33010.6,0.155875
3,1159,Measured Onshore wind generation,,,5508.119,2024-10-01T03:00:00+02:00,33010.6,0.166859
4,1159,Measured Onshore wind generation,,,5927.815,2024-10-01T04:00:00+02:00,33010.6,0.179573


In [39]:
# create final dataframe with datetime, CF_solar, CF_wind
renewables_df_year = pd.DataFrame({
    "datetime": solar_year["datetime"],
    "CF_solar": solar_year["CF_solar"],
    "CF_wind": wind_year["CF_wind"]
})

renewables_df_year.head()

Unnamed: 0,datetime,CF_solar,CF_wind
0,2024-10-01T00:00:00+02:00,0.013431,0.135756
1,2024-10-01T01:00:00+02:00,0.012952,0.146123
2,2024-10-01T02:00:00+02:00,0.012049,0.155875
3,2024-10-01T03:00:00+02:00,0.011232,0.166859
4,2024-10-01T04:00:00+02:00,0.008105,0.179573


In [40]:
# Parse datetimes; handle timezone-aware strings by parsing with utc=True
renewables_df_year["datetime"] = pd.to_datetime(renewables_df_year["datetime"], errors="coerce", utc=True)
renewables_df_year["datetime"] = renewables_df_year["datetime"].dt.tz_convert("Europe/Madrid").dt.tz_localize(None)
renewables_df_year.head()

Unnamed: 0,datetime,CF_solar,CF_wind
0,2024-10-01 00:00:00,0.013431,0.135756
1,2024-10-01 01:00:00,0.012952,0.146123
2,2024-10-01 02:00:00,0.012049,0.155875
3,2024-10-01 03:00:00,0.011232,0.166859
4,2024-10-01 04:00:00,0.008105,0.179573


In [41]:
# delete rows with same datetime, keep the first occurrence
renewables_df_year = renewables_df_year.drop_duplicates(subset=["datetime"], keep="first")

In [42]:
renewables_df_year = renewables_df_year[["datetime", "CF_solar", "CF_wind"]].rename(columns={"CF_solar": "NLD", "CF_wind": "NLD1"})
renewables_df_year.head()

Unnamed: 0,datetime,NLD,NLD1
0,2024-10-01 00:00:00,0.013431,0.135756
1,2024-10-01 01:00:00,0.012952,0.146123
2,2024-10-01 02:00:00,0.012049,0.155875
3,2024-10-01 03:00:00,0.011232,0.166859
4,2024-10-01 04:00:00,0.008105,0.179573


In [43]:
# Format to match demand.csv
# First header rows as in demand.csv
header = pd.DataFrame({"nodes": ["techs"], "NLD": ["solar_pv"], "NLD1": ["wind_onshore"]})

# Prepare final formatted time series
ts = pd.DataFrame({
    "nodes": renewables_df_year["datetime"].dt.strftime("%Y-%m-%d %H:%M:%S"),
    "NLD": renewables_df_year["NLD"],
    "NLD1": renewables_df_year["NLD1"]
})

# Combine them together
renewables_final_year = pd.concat([header, ts], ignore_index=True)


renewables_final_year.head(10)

Unnamed: 0,nodes,NLD,NLD1
0,techs,solar_pv,wind_onshore
1,2024-10-01 00:00:00,0.013431,0.135756
2,2024-10-01 01:00:00,0.012952,0.146123
3,2024-10-01 02:00:00,0.012049,0.155875
4,2024-10-01 03:00:00,0.011232,0.166859
5,2024-10-01 04:00:00,0.008105,0.179573
6,2024-10-01 05:00:00,0.004536,0.179832
7,2024-10-01 06:00:00,0.002239,0.170631
8,2024-10-01 07:00:00,0.001225,0.16792
9,2024-10-01 08:00:00,0.024786,0.160734


In [44]:
# change the name of NLD1 to NLD
renewables_final_year = renewables_final_year.rename(columns={"NLD1": "NLD"})
renewables_final_year.head()

Unnamed: 0,nodes,NLD,NLD.1
0,techs,solar_pv,wind_onshore
1,2024-10-01 00:00:00,0.013431,0.135756
2,2024-10-01 01:00:00,0.012952,0.146123
3,2024-10-01 02:00:00,0.012049,0.155875
4,2024-10-01 03:00:00,0.011232,0.166859


In [45]:
# Save to new CSV
renewables_final_year.to_csv("./renewables_year.csv", index=False)