In [59]:
# Import necessary libraries
import pandas as pd
from src.utils.filter_conditions_monthly import filter_conditions_monthly
from src.utils.functions import (
    calculate_industry_demand_from_industry_power,
    calculate_totals_for_countries,
    calculate_totals_monthly,
    calculate_industry_from_power_monthly
)

# Define constants
CALCULATE_INDUSTRY_DEMAND_COUNTRIES = ['HU', 'LU', 'PT', 'RO']
CALCULATE_COUNTRY_TOTALS = ['BE', 'FR', 'HU', 'IT', 'LU', 'NL', 'PT', 'RO']

In [60]:
# 1. Load and preprocess daily demand data
df = pd.read_csv("src/data/processed/daily_demand_all.csv")
df['date'] = pd.to_datetime(df['date'].astype(str).str[:10])
df['month'] = df['date'].dt.month
df['year'] = df['date'].dt.year

# Group by relevant columns and convert to TWh
df = df.groupby(['country', 'type', 'source', 'year', 'month'], as_index=False).agg({'demand': 'sum'})
df['demand'] = df['demand'] / 1000000000

# Display first few rows
print("Daily demand data after initial processing:")
display(df.head())

Daily demand data after initial processing:


Unnamed: 0,country,type,source,year,month,demand
0,AT,power,energy-charts,2018,12,0.002566
1,AT,power,energy-charts,2019,1,3.147638
2,AT,power,energy-charts,2019,2,2.488796
3,AT,power,energy-charts,2019,3,1.331403
4,AT,power,energy-charts,2019,4,1.05502


In [61]:
# 2. Load and process Eurostat data
eurostat_df = pd.read_csv("src/data/processed/eurostat_historic.csv")
eurostat_df['date'] = pd.to_datetime(eurostat_df['date'])
eurostat_df['month'] = eurostat_df['date'].dt.month
eurostat_df['year'] = eurostat_df['date'].dt.year
df = pd.concat([df, eurostat_df], ignore_index=True)

print("\nAfter adding Eurostat data:")
display(df[df['source'] == 'eurostat'].head())


After adding Eurostat data:


Unnamed: 0,country,type,source,year,month,demand,date
5135,AT,total,eurostat,2016,1,12.491121,2016-01-01
5136,BE,total,eurostat,2016,1,21.026683,2016-01-01
5137,BG,total,eurostat,2016,1,4.403337,2016-01-01
5138,CY,total,eurostat,2016,1,0.0,2016-01-01
5139,CZ,total,eurostat,2016,1,12.906955,2016-01-01


In [62]:
display(df[(df['source'] == 'bundesnetzagentur') & (df['year'] == 2024)])

Unnamed: 0,country,type,source,year,month,demand,date
833,DE,household,bundesnetzagentur,2024,1,6.3271e-08,NaT
834,DE,household,bundesnetzagentur,2024,2,4.1267e-08,NaT
835,DE,household,bundesnetzagentur,2024,3,3.6797e-08,NaT
836,DE,household,bundesnetzagentur,2024,4,2.604e-08,NaT
837,DE,household,bundesnetzagentur,2024,5,1.116e-08,NaT
838,DE,household,bundesnetzagentur,2024,6,8.88e-09,NaT
839,DE,household,bundesnetzagentur,2024,7,6.696e-09,NaT
840,DE,household,bundesnetzagentur,2024,8,5.89e-09,NaT
841,DE,household,bundesnetzagentur,2024,9,1.059e-08,NaT
842,DE,household,bundesnetzagentur,2024,10,2.3963e-08,NaT


In [58]:
# 3. Load and process BNetzA data 
bnetza_df = pd.read_csv("src/data/processed/germany_household_historic.csv")
bnetza_df['date'] = pd.to_datetime(bnetza_df['date'])
bnetza_df['month'] = bnetza_df['date'].dt.month
bnetza_df['year'] = bnetza_df['date'].dt.year
bnetza_append = bnetza_df[['country', 'type', 'source', 'year', 'month', 'demand']]
df = pd.concat([df, bnetza_append], ignore_index=True)

print("\nAfter adding BNetzA data:")
display(df[(df['source'] == 'bundesnetzagentur') & (df['year'] == 2024)])


After adding BNetzA data:


Unnamed: 0,country,type,source,year,month,demand,date
833,DE,household,bundesnetzagentur,2024,1,6.3271e-08,NaT
834,DE,household,bundesnetzagentur,2024,2,4.1267e-08,NaT
835,DE,household,bundesnetzagentur,2024,3,3.6797e-08,NaT
836,DE,household,bundesnetzagentur,2024,4,2.604e-08,NaT
837,DE,household,bundesnetzagentur,2024,5,1.116e-08,NaT
838,DE,household,bundesnetzagentur,2024,6,8.88e-09,NaT
839,DE,household,bundesnetzagentur,2024,7,6.696e-09,NaT
840,DE,household,bundesnetzagentur,2024,8,5.89e-09,NaT
841,DE,household,bundesnetzagentur,2024,9,1.059e-08,NaT
842,DE,household,bundesnetzagentur,2024,10,2.3963e-08,NaT


In [21]:
# 4. Apply filtering conditions
conditions_df = pd.DataFrame(filter_conditions_monthly, columns=['country', 'type', 'source'])
filtered_df = df.merge(conditions_df, on=['country', 'type', 'source'])

print("\nAfter applying filters:")
display(filtered_df.head())


After applying filters:


Unnamed: 0,country,type,source,year,month,demand,date
0,AT,power,energy-charts,2018,12,0.002566,NaT
1,AT,power,energy-charts,2019,1,3.147638,NaT
2,AT,power,energy-charts,2019,2,2.488796,NaT
3,AT,power,energy-charts,2019,3,1.331403,NaT
4,AT,power,energy-charts,2019,4,1.05502,NaT


In [22]:
# 5. Aggregate data
aggregated_df = (
    filtered_df.groupby(['country', 'type', 'year', 'month'], as_index=False)
    .agg({
        'demand': 'sum',
        'source': lambda x: ', '.join(sorted(set(x))) if len(set(x)) > 1 else x.iloc[0]
    })
)

print("\nAggregated data:")
display(aggregated_df.head())


Aggregated data:


Unnamed: 0,country,type,year,month,demand,source
0,AT,power,2018,12,0.002566,energy-charts
1,AT,power,2019,1,3.147638,energy-charts
2,AT,power,2019,2,2.488796,energy-charts
3,AT,power,2019,3,1.331403,energy-charts
4,AT,power,2019,4,1.05502,energy-charts


In [23]:
# 6. Calculate industry demand for specific countries
industry_df = calculate_industry_from_power_monthly(aggregated_df, CALCULATE_INDUSTRY_DEMAND_COUNTRIES)
updated_df = pd.concat([aggregated_df, industry_df], ignore_index=True)
updated_df = updated_df.drop_duplicates()
updated_df = updated_df.sort_values(by=['country', 'type', 'year', 'month']).reset_index(drop=True)

print("\nAfter calculating industry demand:")
display(updated_df.head())

Filtering data for countries: ['HU', 'LU', 'PT', 'RO']
Filtered DataFrame shape: (588, 6)
Pivoted DataFrame shape: (296, 5)
Final industry DataFrame shape: (292, 6)

After calculating industry demand:


Unnamed: 0,country,type,year,month,demand,source
0,AT,power,2018,12,0.002566,energy-charts
1,AT,power,2019,1,3.147638,energy-charts
2,AT,power,2019,2,2.488796,energy-charts
3,AT,power,2019,3,1.331403,energy-charts
4,AT,power,2019,4,1.05502,energy-charts


In [24]:
# 7. Calculate country totals
updated_df = calculate_totals_monthly(updated_df, CALCULATE_COUNTRY_TOTALS)

print("\nAfter calculating country totals:")
display(updated_df.head())


After calculating country totals:


Unnamed: 0,country,type,year,month,demand,source
0,AT,power,2018,12,0.002566,energy-charts
1,AT,power,2019,1,3.147638,energy-charts
2,AT,power,2019,2,2.488796,energy-charts
3,AT,power,2019,3,1.331403,energy-charts
4,AT,power,2019,4,1.05502,energy-charts


In [33]:
german_pivot_df

type,country,year,month,household,power,total,industry_demand
0,DE,2016,10,,,82.648190,
1,DE,2016,11,,,107.700065,
2,DE,2016,12,,,120.862753,
3,DE,2017,1,,,147.632871,
4,DE,2017,2,,,108.025641,
...,...,...,...,...,...,...,...
95,DE,2024,9,21.180,5.053846,44.644423,18.410577
96,DE,2024,10,47.926,7.708579,63.652850,8.018271
97,DE,2024,11,87.900,14.122040,96.622909,-5.399131
98,DE,2024,12,109.244,13.324575,109.933703,-12.634872


In [25]:
# 8. Calculate German industry demand
german_filter_df = updated_df[updated_df['country'] == 'DE']
german_pivot_df = german_filter_df.pivot_table(
    index=['country', 'year', 'month'], 
    columns='type', 
    values='demand',
    aggfunc='sum'
).reset_index()

german_pivot_df['industry_demand'] = german_pivot_df['total'] - german_pivot_df['household'] - german_pivot_df['power']
german_industry_df = german_pivot_df[['country', 'year', 'month', 'industry_demand']].dropna(subset=['industry_demand'])
german_industry_df = german_industry_df.rename(columns={'industry_demand': 'demand'})
german_industry_df['type'] = 'industry'
german_industry_df['source'] = 'calculated'

updated_df = pd.concat([updated_df, german_industry_df], ignore_index=True)

print("\nAfter calculating German industry demand:")
display(updated_df[updated_df['country'] == 'DE'].head())



After calculating German industry demand:


Unnamed: 0,country,type,year,month,demand,source
768,DE,household,2018,1,118.42,bundesnetzagentur
769,DE,household,2018,2,136.696,bundesnetzagentur
770,DE,household,2018,3,120.962,bundesnetzagentur
771,DE,household,2018,4,43.92,bundesnetzagentur
772,DE,household,2018,5,23.188,bundesnetzagentur


In [26]:
# 9. Final processing and export
final_df = updated_df[updated_df['year'] >= 2019].copy()
final_df['demand'] = final_df['demand'].round(2)

print("\nFinal dataset sample:")
display(final_df.head())


Final dataset sample:


Unnamed: 0,country,type,year,month,demand,source
1,AT,power,2019,1,3.15,energy-charts
2,AT,power,2019,2,2.49,energy-charts
3,AT,power,2019,3,1.33,energy-charts
4,AT,power,2019,4,1.06,energy-charts
5,AT,power,2019,5,0.16,energy-charts


In [30]:
# drop all values where demand is 0 or missing
print(len(final_df))
final_df = final_df[final_df['demand'] != 0]
final_df = final_df[final_df['demand'].notna()]
print(len(final_df))

5282
5154


In [31]:
# Save the results
final_df.to_csv("src/data/analyzed/monthly_demand_clean.csv", index=False)