Todo 
- calcualte industry-household for country selection (already defined in first cell)
- calculate European demand for each of the indicators, return which countries are included in each calculation

In [None]:
# Import necessary libraries
import pandas as pd

# Define paths 
EUROSTAT_PATH = "src/data/raw/eurostat/latest_data.csv"
BNETZA_PATH = "src/data/raw/germany_household/latest_data.csv"


In [377]:
# 1. Load and preprocess daily demand data
df = pd.read_csv("src/data/processed/daily_demand_all.csv")
df['date'] = pd.to_datetime(df['date'].astype(str).str[:10])
df['month'] = df['date'].dt.month
df['year'] = df['date'].dt.year

# Group by relevant columns and convert to TWh
df = df.groupby(['country', 'type', 'source', 'year', 'month'], as_index=False).agg({'demand': 'sum'})
df['demand'] = df['demand'] / 1000000000

In [378]:
# 2. Load and process Eurostat data
eurostat_df = pd.read_csv(EUROSTAT_PATH)
eurostat_df['date'] = pd.to_datetime(eurostat_df['date'])
eurostat_df['month'] = eurostat_df['date'].dt.month
eurostat_df['year'] = eurostat_df['date'].dt.year
del eurostat_df['date']
df = pd.concat([df, eurostat_df], ignore_index=True)

In [379]:
# 3. Load and process BNetzA data 
bnetza_df = pd.read_csv(BNETZA_PATH)
bnetza_df['date'] = pd.to_datetime(bnetza_df['date'])
bnetza_df['month'] = bnetza_df['date'].dt.month
bnetza_df['year'] = bnetza_df['date'].dt.year
bnetza_append = bnetza_df[['country', 'type', 'source', 'year', 'month', 'demand']]
df = pd.concat([df, bnetza_append], ignore_index=True)

In [380]:
# 4. Apply filtering conditions
conditions_df = pd.DataFrame(filter_conditions_monthly, columns=['country', 'type', 'source'])
filtered_df = df.merge(conditions_df, on=['country', 'type', 'source'])

In [381]:
# 5. Aggregate data
aggregated_df = (
    filtered_df.groupby(['country', 'type', 'year', 'month'], as_index=False)
    .agg({
        'demand': 'sum',
        'source': lambda x: ', '.join(sorted(set(x))) if len(set(x)) > 1 else x.iloc[0]
    })
)

In [382]:


def track_available_demand(df):

    pivot_df = df.pivot_table(index=["country", "year", "month"], columns="type", values="demand", aggfunc="sum")
    

    records = []
    
    for index, row in pivot_df.iterrows():
        country, year, month = index
        existing_types = set(row.dropna().index)
        
        records.append({
            "country": country,
            "year": year,
            "month": month,
            "available_types": list(existing_types)
        })
    

    tracking_df = pd.DataFrame(records)
    
    return tracking_df



In [None]:
import pandas as pd

def adjust_demand(aggregated_df, tracking_available_demand, required_types, new_type, operation):

    valid_entries = tracking_available_demand[tracking_available_demand['available_types'].apply(lambda x: set(x) == required_types)]
    new_rows = []
    
    for _, row in valid_entries.iterrows():
        country, year, month = row['country'], row['year'], row['month']
        subset = aggregated_df[(aggregated_df['country'] == country) &
                               (aggregated_df['year'] == year) &
                               (aggregated_df['month'] == month)]
        
        new_demand = operation(subset)
        if new_demand is not None:
            new_rows.append({
                'country': country,
                'year': year,
                'month': month,
                'type': new_type,
                'demand': new_demand
            })
    
    if new_rows:
        new_df = pd.DataFrame(new_rows)
        aggregated_df = pd.concat([aggregated_df, new_df], ignore_index=True)
    
    # Remove 'industry-power' rows after processing
    aggregated_df = aggregated_df[aggregated_df['type'] != 'industry-power']
    
    return aggregated_df

def industry_demand_operation(subset):
    industry_power_demand = subset[subset['type'] == 'industry-power']['demand'].values
    power_demand = subset[subset['type'] == 'power']['demand'].values
    return industry_power_demand[0] - power_demand[0] if len(industry_power_demand) > 0 and len(power_demand) > 0 else None

def total_demand_operation(subset):
    return subset['demand'].sum()

def industry_household_demand_operation(subset):
    total_demand = subset[subset['type'] == 'total']['demand'].values
    power_demand = subset[subset['type'] == 'power']['demand'].values
    return total_demand[0] - power_demand[0] if len(total_demand) > 0 and len(power_demand) > 0 else None

# Running the functions
tracking_available_demand = track_available_demand(aggregated_df)
updated_aggregated_df = adjust_demand(aggregated_df, tracking_available_demand, {'industry-power', 'power', 'household'}, 'industry', industry_demand_operation)
tracking_available_demand = track_available_demand(updated_aggregated_df)
final_aggregated_df = adjust_demand(updated_aggregated_df, tracking_available_demand, {'power', 'industry', 'household'}, 'total', total_demand_operation)
tracking_available_demand = track_available_demand(final_aggregated_df)
final_aggregated_df = adjust_demand(final_aggregated_df, tracking_available_demand, {'power', 'total'}, 'industry-household', industry_household_demand_operation)


In [384]:
final_aggregated_df = final_aggregated_df[final_aggregated_df["year"]== 2022]
final_aggregated_df = final_aggregated_df[final_aggregated_df["month"]== 1]

tracking_available_demand = track_available_demand(final_aggregated_df)
tracking_available_demand

Unnamed: 0,country,year,month,available_types
0,AT,2022,1,"[power, industry-household, total]"
1,BE,2022,1,"[total, power, industry, household]"
2,BG,2022,1,"[power, industry-household, total]"
3,CZ,2022,1,"[power, industry-household, total]"
4,DE,2022,1,"[power, total, household]"
5,DK,2022,1,"[power, industry-household, total]"
6,EE,2022,1,"[power, industry-household, total]"
7,ES,2022,1,"[power, industry-household, total]"
8,FI,2022,1,"[power, industry-household, total]"
9,FR,2022,1,"[total, power, industry, household]"


In [385]:
# 8. Calculate German industry demand
german_filter_df = final_aggregated_df[final_aggregated_df['country'] == 'DE']
german_pivot_df = german_filter_df.pivot_table(
    index=['country', 'year', 'month'], 
    columns='type', 
    values='demand',
    aggfunc='sum'
).reset_index()

german_pivot_df['industry_demand'] = german_pivot_df['total'] - german_pivot_df['household'] - german_pivot_df['power']
german_industry_df = german_pivot_df[['country', 'year', 'month', 'industry_demand']].dropna(subset=['industry_demand'])
german_industry_df = german_industry_df.rename(columns={'industry_demand': 'demand'})
german_industry_df['type'] = 'industry'
german_industry_df['source'] = 'calculated'

updated_df = pd.concat([final_aggregated_df, german_industry_df], ignore_index=True)



In [386]:
# 9. Final processing and export
final_df = updated_df[updated_df['year'] >= 2019].copy()
final_df['demand'] = final_df['demand'].round(2)


In [387]:
# drop all values where demand is 0 or missing
print(len(final_df))
final_df = final_df[final_df['demand'] != 0]
final_df = final_df[final_df['demand'].notna()]
print(len(final_df))

#

83
81


In [388]:
# Save the results
final_df.to_csv("src/data/analyzed/monthly_demand_clean.csv", index=False)