In [1]:
import os
import pandas as pd
import numpy as np
import networkx as nx
import time
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import ipywidgets as widgets
from IPython.display import display, clear_output
from src.visualization import (
    visualize_network_topology,
    visualize_grid_improvement,
    visualize_network_with_failures
)
from src.functions import (
    build_and_simplify_network,
    find_failures_with_yearly_profile,
    suggest_grid_reinforcement,
    print_analysis_results,
    update_and_save_parquet
)

print("Libraries imported successfully.")

# Loading and Preparing the Data ---
data_path = "../blob-account-edh/challenge-data/"

# Load network topology data
file_path = data_path + "250903_all_stations_anon.csv"
df_full = pd.read_csv(file_path, sep=";")
print("Successfully loaded network data.")

# Load and index all yearly profiles (15-min intervals)
profiles_path = data_path + "data_parquet/"
df_consumption = pd.read_parquet(profiles_path + "base_consumption.parquet").set_index('timestamp')
df_pv = pd.read_parquet(profiles_path + "pv_profiles.parquet").set_index('timestamp')
df_ev = pd.read_parquet(profiles_path + "ev_profiles.parquet").set_index('timestamp')
df_hp = pd.read_parquet(profiles_path + "hp_profiles.parquet").set_index('timestamp')

# Data Preparation and Timestamp Mapping
profiles = {"Consumption": df_consumption, "PV": df_pv, "EV": df_ev, "HP": df_hp}
num_periods = len(df_consumption.index)
datetime_index = pd.date_range(start='2050-01-01', periods=num_periods, freq='15min')
for df in profiles.values():
    df.index = datetime_index
print(f"\nCreated new datetime index from {datetime_index.min()} to {datetime_index.max()}")
print("Successfully loaded and indexed all profiles data.")

# Load reinforcement costs for comparison
df_reinforcement = pd.read_csv(data_path + "190923_Einheitskosten_Invest.csv")
print("Successfully loaded grid reinforcement costs.")

# Calculate the Net Load
# Net Load = (Consumption + EV + HP) - PV Generation
# Note: PV generation is positive, so we subtract it. A negative net load means the customer is exporting power.
NOMINAL_VOLTAGE = 400.0
df_net_load = df_consumption.add(df_ev, fill_value=0).add(df_hp, fill_value=0).subtract(df_pv, fill_value=0)
#df_net_load = df_consumption.subtract(df_pv, fill_value=0)
print("\nSuccessfully calculated the combined net load profile for all customers.")

Libraries imported successfully.
Successfully loaded network data.

Created new datetime index from 2050-01-01 00:00:00 to 2050-12-30 23:45:00
Successfully loaded and indexed all profiles data.
Successfully loaded grid reinforcement costs.

Successfully calculated the combined net load profile for all customers.


In [2]:
# Get a list of all unique stations for the dropdown
all_stations = sorted(df_full['station'].unique())

# Create the dropdown and output widgets
station_dropdown = widgets.Dropdown(
    options=all_stations,
    description='Select Station:',
    value='station_1', # Default to a station with known issues
    disabled=False,
)
output_area = widgets.Output()

# This dictionary will store the results of our chosen station for later use
analysis_store = {}

def run_baseline_analysis(selected_station):
    """Runs the initial grid analysis and stores the results."""
    with output_area:
        clear_output(wait=True)
        print(f"--- Running full baseline analysis for station: '{selected_station}' ---")
        
        df_one_station = df_full[df_full['station'] == selected_station].copy()
        
        # Build Network
        G, consumer_props, roots = build_and_simplify_network(df_one_station)
        
        # Run Analysis
        dynamic_results = find_failures_with_yearly_profile(
            graph=G,
            net_profile_df=df_net_load,
            consumer_props=consumer_props,
            root_node_ids=roots,
            nominal_voltage=NOMINAL_VOLTAGE
        )
        
        # Store results for later use in the notebook
        analysis_store['station_id'] = selected_station
        analysis_store['graph'] = G
        analysis_store['consumer_props'] = consumer_props
        analysis_store['root_node_ids'] = roots 
        analysis_store['initial_results'] = dynamic_results
        
        print("\n--- 1. Initial Network Topology ---")
        if len(dynamic_results['link_failures'])==0:
            visualize_network_topology(graph=G, root_node_ids=roots, optimize_space=True)
        else:
            visualize_network_with_failures(
                graph=G, 
                root_node_ids=roots, 
                link_failures=dynamic_results['link_failures'], 
                fuse_failures=dynamic_results["fuse_failures"], 
                optimize_space=True,
                station_name= selected_station
            )
        
        print("\n" + "="*50)
        print_analysis_results("2. RESULTS: Baseline Yearly Profile Analysis", dynamic_results)
        print("="*50)
        print("\nAnalysis complete. The results and network graph are now stored.")
        print("Scroll down to the next section to design and apply our flexibility solution.")


def on_station_change(change):
    run_baseline_analysis(change['new'])

# Link the function and display the UI
station_dropdown.observe(on_station_change, names='value')
display(station_dropdown, output_area)

# Run the analysis for the initial default value
if station_dropdown.value:
    run_baseline_analysis(station_dropdown.value)

Dropdown(description='Select Station:', options=('station_1', 'station_2', 'station_3', 'station_4', 'station_…

Output()

In [5]:
import pandas as pd
import os

def update_and_save_parquet(new_data_df, file_path, customers_to_update):
    """
    Saves or updates a Parquet file with new profile data for a specific set of customers.
    DEFINITIVE FIX: Handles cases where the existing file has a non-datetime index (e.g., 'V1').
    It merges based on row position and applies the correct DatetimeIndex from the new data.

    Args:
        new_data_df (pd.DataFrame): DataFrame with a proper DatetimeIndex containing the new profile data.
        file_path (str): The full path to the Parquet file to be saved.
        customers_to_update (list): A list of customer IDs whose data should be updated.
    """
    # Filter the new data and ensure it has a proper DatetimeIndex
    relevant_new_data = new_data_df[customers_to_update].copy()
    relevant_new_data.index = pd.to_datetime(relevant_new_data.index)
    
    # Store the correct index for later use
    correct_index = relevant_new_data.index

    if os.path.exists(file_path):
        print(f"File '{os.path.basename(file_path)}' exists. Loading and updating...")
        try:
            existing_df = pd.read_parquet(file_path)

            # Critical Check: Ensure row counts match. If not, we cannot safely merge.
            if len(existing_df) != len(relevant_new_data):
                print(f"  [Warning] Row count mismatch! Existing file has {len(existing_df)} rows, "
                      f"new data has {len(relevant_new_data)}. Overwriting file to prevent data corruption.")
                # Force overwrite by raising an exception to jump to the 'except' block
                raise ValueError("Row count mismatch")

            # Temporarily remove indices to prepare for a position-based merge.
            # This is the key step to handle the 'V1' vs. DatetimeIndex conflict.
            existing_df_vals = existing_df.reset_index(drop=True)
            new_data_vals = relevant_new_data.reset_index(drop=True)
            
            # Identify columns to drop from the existing data
            cols_to_drop = existing_df_vals.columns.intersection(new_data_vals.columns)
            df_for_others = existing_df_vals.drop(columns=cols_to_drop)
            
            # Concatenate based on the temporary RangeIndex (0, 1, 2...)
            final_df_no_index = pd.concat([df_for_others, new_data_vals], axis=1)

            # Restore the correct DatetimeIndex
            final_df = final_df_no_index.set_index(correct_index)
            
            # (Optional but good practice) Sort columns for consistency
            final_df = final_df.reindex(sorted(final_df.columns), axis=1)
            
            print(f"Successfully merged data for {len(customers_to_update)} customers.")

        except Exception as e:
            print(f"  An error occurred during the merge process: {e}. Defaulting to overwrite with new data.")
            final_df = relevant_new_data
    else:
        print(f"File '{os.path.basename(file_path)}' does not exist. Creating new file...")
        final_df = relevant_new_data

    # Save the final DataFrame
    try:
        os.makedirs(os.path.dirname(file_path), exist_ok=True)
        final_df.to_parquet(file_path, index=True)
        print(f"Successfully saved data to '{os.path.basename(file_path)}'")
    except Exception as e:
        print(f"  [ERROR] Failed to save file {file_path}: {e}")

In [6]:
import os
import pandas as pd # Required for checking if a DataFrame is empty

# --- FIX: Ensure the output directory exists before trying to save files ---
# The path where the profiles will be saved
output_profiles_path = "./results/data_parquet/"
os.makedirs(output_profiles_path, exist_ok=True) # This line creates the folder if it doesn't exist.

# --- SETUP: Assuming these variables are defined from your analysis ---
# The list of customers that were part of the recent analysis
customers_analyzed = list(analysis_store['consumer_props'].keys())

#NEW LINE
df_battery_in = pd.DataFrame(0, index=df_net_load.index, columns=df_net_load.columns, dtype=float)
df_battery_out = pd.DataFrame(0, index=df_net_load.index, columns=df_net_load.columns, dtype=float)
df_battery_soc_kwh = pd.DataFrame(0, index=df_net_load.index, columns=df_net_load.columns, dtype=float)
df_curtailed_energy = df_net_load

# --- UPDATE: Define ALL available profiles to be saved in a structured list ---
# NOTE: This assumes all these DataFrames (df_battery_in, df_net_load, etc.) exist.
profiles_to_save = [
    # Flexibility / Battery Profiles
    {
        "df": df_battery_in,
        "filename": "battery_in_profiles.parquet",
        "description": "Battery Charging Profiles (kW)"
    },
    {
        "df": df_battery_out,
        "filename": "battery_out_profiles.parquet",
        "description": "Battery Discharging Profiles (kW)"
    },
    {
        "df": df_battery_soc_kwh,
        "filename": "battery_soc_profiles.parquet",
        "description": "Battery State of Charge Profiles (kWh)"
    },
    {
        "df": df_curtailed_energy,
        "filename": "curtailed_energy_profiles.parquet",
        "description": "Curtailed Energy Profiles (kWh)"
    },
    # Base Load, Generation, and Net Profiles
    {
        "df": df_consumption,
        "filename": "base_consumption.parquet",
        "description": "Base Consumption Profiles (kW)"
    },
    {
        "df": df_pv,
        "filename": "pv_profiles.parquet",
        "description": "PV Generation Profiles (kW)"
    },
    {
        "df": df_ev,
        "filename": "ev_profiles.parquet",
        "description": "EV Charging Profiles (kW)"
    },
    {
        "df": df_hp,
        "filename": "hp_profiles.parquet",
        "description": "Heat Pump Consumption Profiles (kW)"
    },
    {
        "df": df_net_load,
        "filename": "net_load_profiles.parquet",
        "description": "Net Load Profiles (kW)"
    }
]

# --- Saving Logic (Unchanged from your original code) ---
print("\n" + "="*50)
print(f"Saving results for {len(customers_analyzed)} customers from station: {analysis_store['station_id']}")
print("="*50 + "\n")

# Loop through the list and save each profile
for profile in profiles_to_save:
    # Check if the dataframe to save is not empty or None
    if profile['df'] is not None and not profile['df'].empty:
        print(f"-> Saving: {profile['description']}...")
        # NOTE: This assumes your 'update_and_save_parquet' function is defined elsewhere
        update_and_save_parquet(
            new_data_df=profile['df'],
            file_path=os.path.join(output_profiles_path, profile['filename']),
            customers_to_update=customers_analyzed
        )
    else:
        print(f"-> Skipping: {profile['description']} (DataFrame is empty or None).")


print("\n--- All available profiles have been saved successfully. ---")


Saving results for 48 customers from station: station_1

-> Saving: Battery Charging Profiles (kW)...
File 'battery_in_profiles.parquet' exists. Loading and updating...
Successfully merged data for 48 customers.
Successfully saved data to 'battery_in_profiles.parquet'
-> Saving: Battery Discharging Profiles (kW)...
File 'battery_out_profiles.parquet' exists. Loading and updating...
Successfully merged data for 48 customers.
Successfully saved data to 'battery_out_profiles.parquet'
-> Saving: Battery State of Charge Profiles (kWh)...
File 'battery_soc_profiles.parquet' exists. Loading and updating...
Successfully merged data for 48 customers.
Successfully saved data to 'battery_soc_profiles.parquet'
-> Saving: Curtailed Energy Profiles (kWh)...
File 'curtailed_energy_profiles.parquet' exists. Loading and updating...
Successfully merged data for 48 customers.
Successfully saved data to 'curtailed_energy_profiles.parquet'
-> Saving: Base Consumption Profiles (kW)...
File 'base_consumptio