# Geodata Preprocessing II - Create a graph for the possible future road network
For each component of stands that is not connected to any roads (respective: big roads), we wanna create a network of possible future road segments which strictly follow the boundaries of the forest stands.

In [28]:
# import packages
import os
import re
import csv

import numpy as np
import pandas as pd
import geopandas as gpd
import networkx as nx

from shapely.geometry import LineString

import matplotlib.pyplot as plt
import seaborn as sns
import matplotlib.patches as patches
import matplotlib.collections as mcoll
import matplotlib.patches as mpatches
from mpl_toolkits.axes_grid1.inset_locator import inset_axes
from matplotlib_scalebar.scalebar import ScaleBar

from collections import defaultdict
from collections import Counter

import folium

import missingno as msno

from scipy.spatial import cKDTree
from shapely.geometry import MultiPolygon, Polygon, Point, LineString
from shapely import wkt

## Step 0. Load data

In [29]:
# set input path
#base_folder = "1_Preprocessed_Data/Stand_Components/unconnected_to_roads"
base_folder = "1_Preprocessed_Data/Stand_Components/unconnected_to_bigroads"

In [30]:

components = []

# Get all subfolders in the base folder
component_folders = [f.path for f in os.scandir(base_folder) if f.is_dir()]

for component_folder in component_folders:
    # Look for shapefiles within the component folder
    shapefile_path = os.path.join(component_folder, f"component_{os.path.basename(component_folder).split('_')[-1]}.shp")
    
    if os.path.exists(shapefile_path):
        # Load the shapefile into a GeoDataFrame
        component_gdf = gpd.read_file(shapefile_path)
        
        # Append the GeoDataFrame to the list of components
        components.append(component_gdf)
        
        print(f"Component from {component_folder} loaded with {len(component_gdf)} stands.")
    else:
        print(f"Shapefile not found in {component_folder}.")


Component from 1_Preprocessed_Data/Stand_Components/unconnected_to_bigroads\comp_1 loaded with 2 stands.
Component from 1_Preprocessed_Data/Stand_Components/unconnected_to_bigroads\comp_10 loaded with 68 stands.
Component from 1_Preprocessed_Data/Stand_Components/unconnected_to_bigroads\comp_11 loaded with 15 stands.
Component from 1_Preprocessed_Data/Stand_Components/unconnected_to_bigroads\comp_12 loaded with 4 stands.
Component from 1_Preprocessed_Data/Stand_Components/unconnected_to_bigroads\comp_13 loaded with 42 stands.
Component from 1_Preprocessed_Data/Stand_Components/unconnected_to_bigroads\comp_14 loaded with 2 stands.
Component from 1_Preprocessed_Data/Stand_Components/unconnected_to_bigroads\comp_15 loaded with 17 stands.
Component from 1_Preprocessed_Data/Stand_Components/unconnected_to_bigroads\comp_16 loaded with 3 stands.
Component from 1_Preprocessed_Data/Stand_Components/unconnected_to_bigroads\comp_17 loaded with 3 stands.
Component from 1_Preprocessed_Data/Stand_Co

## Step 1. Extract vertices and edges with attributes (slope, edge length) from boundaries
From the boundaries of the stands, we extract all points with coordianates and additionally save the edges (lines) connecting them. 

Note: We only want need exterior boundaries, and NO roads along interior boundaries of a forest stand (because such interior roads would not help with connecting the stands to the existing road network).

##### 1.1 [helper function] Snap coordinates to grid
Before comparing edges, snap all coordinates to a common grid (e.g., round coordinates to a fixed number of decimal places or a specific spatial resolution). This helps ensure that neighboring stands with slightly different coordinate representations share identical coordinates.

In [31]:
def snap_to_grid(coord, precision=6):
    return tuple(round(c, precision) for c in coord)

##### 1.2 [helper functions] Calculate attributes
To calculate the costs, we need to know for each edge its edge length and its approximative slope.
- *edgelength*: The length of an edge (u,v) is calculated using the Euclidean distance formula, to compute the distance between the coordinates of points u and v.
- *slope (approx.)*: The slope of an edge is approximated by the slope ("Declive") of the polygon the edge belongs to.

Note: If we knew the altitude per coordinate, the slope could be approximated via the ratio of the altitude difference to the edgelength; but the edges all belong to the same polygon so we only have one single altitude per polygon (which would lead to a slope of 0). Therefore, we just take the slope ("Declive") of the polygon.

In [32]:
# Check geometry dimension
for component in components:
    geometry = component.geometry.iloc[0]
    if geometry.has_z:
        print("The shapefile contains 3D geometries with Z-values (altitudes per coordinate).")
    else:
        print("The shapefile contains 2D geometries (no altitude per coordinate).")

    # Check for altitude attribute
    if 'Altitude' in component.columns:
        print("The shapefile has an 'Altitude' column (single altitude per polygon).")
    else:
        print("No 'Altitude' column found. Check Z-values in the geometry.")

The shapefile contains 2D geometries (no altitude per coordinate).
The shapefile has an 'Altitude' column (single altitude per polygon).
The shapefile contains 2D geometries (no altitude per coordinate).
The shapefile has an 'Altitude' column (single altitude per polygon).
The shapefile contains 2D geometries (no altitude per coordinate).
The shapefile has an 'Altitude' column (single altitude per polygon).
The shapefile contains 2D geometries (no altitude per coordinate).
The shapefile has an 'Altitude' column (single altitude per polygon).
The shapefile contains 2D geometries (no altitude per coordinate).
The shapefile has an 'Altitude' column (single altitude per polygon).
The shapefile contains 2D geometries (no altitude per coordinate).
The shapefile has an 'Altitude' column (single altitude per polygon).
The shapefile contains 2D geometries (no altitude per coordinate).
The shapefile has an 'Altitude' column (single altitude per polygon).
The shapefile contains 2D geometries (no 

In [33]:
def calculate_edge_length(u,v):
    length = LineString([u, v]).length
    return length

##### 1.3 [helper function] Extract nodes, edges, attributes

In [34]:
def extract_boundaries_with_attributes_double_edges(stands, precision=6):
    vertices = []
    edges = []  # Keep all edges, including duplicates
    edge_attributes = []
    
    for idx, feature in stands.iterrows():
        geometry = feature.geometry
        slope = feature['Declive']

        if geometry.geom_type in ['Polygon', 'MultiPolygon']:
            polygons = [geometry] if geometry.geom_type == 'Polygon' else geometry
            for poly in polygons:
                exterior_coords = [snap_to_grid(coord, precision) for coord in poly.exterior.coords]
                vertices.extend(exterior_coords)

                for i in range(len(exterior_coords) - 1):
                    u, v = exterior_coords[i], exterior_coords[i + 1]
                    edge = (u, v)  # Keep edge order as-is

                    edges.append(edge)

                    edgelength = calculate_edge_length(u,v)
                    # slope is already assigned once per feature, outside loop

                    edge_attributes.append({'edgelength': edgelength, 'slope': slope})

    vertices = list(set(vertices))
    return vertices, edges, edge_attributes


##### 1.4 [helper function] Merge double edges to deal with shared boundaries

In [35]:
def merge_double_edges(edges, edge_attributes):
    road_segments = []
    road_attributes = []

    # Create a dictionary to group edges
    grouped_edges = defaultdict(list)
    for edge, attributes in zip(edges, edge_attributes):
        normalized_edge = tuple(sorted(edge))  # Normalize edge order
        grouped_edges[normalized_edge].append(attributes)

    # Merge attributes for each road segment
    for edge, attributes_list in grouped_edges.items():
        # verage the attributes
        avg_length = sum(attr['edgelength'] for attr in attributes_list) / len(attributes_list)
        avg_slope = sum(attr['slope'] for attr in attributes_list) / len(attributes_list)

        road_segments.append(edge)
        road_attributes.append({'edgelength': avg_length, 'slope': avg_slope})

    return road_segments, road_attributes


## Step 2. Create the Graph
Next, we'll use these vertices and edges to build a graph. This graph will represent the potential road network, where the roads are constrained to follow the boundaries of the forest stands.

### [helper function] Create graph from vertices, edges, attributes

In [36]:
def create_graph(vertices, edges, attributes):
    G = nx.Graph()
    for vertex in vertices:
        G.add_node(vertex)
    for edge, attr in zip(edges, attributes):
        u, v = edge
        G.add_edge(u, v, **attr)
    return G

### [helper function] Storing graph data

In [37]:
def store_graph_data(folder_path, vertices, edges, attributes, k):
    # Ensure the folder exists
    os.makedirs(folder_path, exist_ok=True)

    # Store vertices, edges, and attributes as CSV
    vertices_file = os.path.join(folder_path, f'nodes_{k + 1}.csv')
    edges_file = os.path.join(folder_path, f'edges_{k + 1}.csv')
    attributes_file = os.path.join(folder_path, f'attributes_{k + 1}.csv')
    edges_attributes_file = os.path.join(folder_path, f'edges_attributes_{k + 1}.csv')

    # Save vertices to CSV with columns 'x' and 'y'
    with open(vertices_file, 'w', newline='') as f:
        writer = csv.writer(f)
        writer.writerow(['x', 'y'])  # Write the header
        for vertex in vertices:
            writer.writerow(vertex)

    # Save edges to CSV with columns 'u(x,y)' and 'v(x,y)'
    with open(edges_file, 'w', newline='') as f:
        writer = csv.writer(f, quoting=csv.QUOTE_MINIMAL)
        writer.writerow(['Node1(x,y)', 'Node2(x,y)'])  # Write the header
        for edge in edges:
            u, v = edge
            u_str = f"({u[0]}, {u[1]})"  # Format as (x, y)
            v_str = f"({v[0]}, {v[1]})"  # Format as (x, y)
            writer.writerow([u_str, v_str])

    # Save attributes to CSV using csv.DictWriter for better column header handling
    with open(attributes_file, 'w', newline='') as f:
        writer = csv.DictWriter(f, fieldnames=['edgelength', 'slope'])
        writer.writeheader()  # Write column headers
        writer.writerows(attributes)

    # Save edges with attributes to CSV
    with open(edges_attributes_file, 'w', newline='') as f:
        writer = csv.DictWriter(f, fieldnames = ['Node1(x,y)', 'Node2(x,y)', 'edgelength', 'slope'])
        writer.writeheader()
        for edge, attribute in zip(edges, attributes):
            u, v = edge
            u_str = f"({u[0]}, {u[1]})"  # Format u coordinate as (x, y)
            v_str = f"({v[0]}, {v[1]})"  # Format v coordinate as (x, y)
            row = {
                'Node1(x,y)': u_str,
                'Node2(x,y)': v_str,
                'edgelength': attribute['edgelength'],
                'slope': attribute['slope']
            }
            writer.writerow(row)

    #print(f"Graph data saved for component {k + 1} in {folder_path}")

## Step 3. Visualize the graph
Visualize the graph to identify potential problems

##### [helper function] Create and store graph plot

In [38]:
def plot_and_save_graph(G, component_index):
    fig = plt.figure(figsize=(10, 10))

    nx.draw(G, pos={node: node for node in G.nodes()}, node_size=50, 
            node_color="red", edge_color="blue", with_labels=False)

    #add graph details to plot
    plt.title(f"Graph Plot for Component {component_index + 1},\n {len(G.nodes)} nodes\n {len(G.edges)} edges")
    
    plt.savefig(f'{component_folder}/graph_{component_index + 1}.png', dpi=300, bbox_inches='tight')  # Save plot
    plt.close(fig)

##### [helper function] Create and store nodes plot

In [39]:
def plot_and_save_nodes(vertices, component_index):
    x_coords, y_coords = zip(*vertices) if vertices else ([], [])
    fig = plt.figure(figsize=(8, 8))
    plt.scatter(x_coords, y_coords, color="blue", marker="o", s=10, label="Nodes")
    plt.xlabel("X Coordinate")
    plt.ylabel("Y Coordinate")
    plt.title(f"Plot of Graph Nodes {component_index + 1}")
    plt.legend()
    plt.grid(True)

    nodes_filename = f'{component_folder}/nodes_{component_index + 1}.png'
    plt.savefig(nodes_filename, dpi=300, bbox_inches='tight')
    plt.close(fig)

## Workflow for Step 1 - 3

In [40]:
# Workflow

# List to store all the graph data (vertices, edges, attributes)
list_graphdata = []
# list to store the graphs
list_graphs =[]

# Path to store the aggregated info file
base_info_file = f'{base_folder}/info.txt'

# Open the aggregated info file to write component data
with open(base_info_file, 'w') as base_info:
    base_info.write("Aggregated Information for All Components\n")
    base_info.write("-------------------------------------------------\n")

    # run workflow for each component
    for k, df in enumerate(components):
        component_folder = f"{base_folder}/comp_{k+1}"
        os.makedirs(component_folder, exist_ok=True)

        # Step 1: Extract edges (with duplicates allowed)
        vertices, edges, attributes = extract_boundaries_with_attributes_double_edges(df)

        # Step 2: Merge duplicate edges into road segments
        #road_segments, attributes = merge_double_edges(edges, attributes)

        # Add the extracted data to the list for further processing or visualization
        list_graphdata.append([vertices, edges, attributes])

        # Store vertices, edges, and attributes
        store_graph_data(component_folder, vertices, edges, attributes, k)

        # Step 3: Create the graph
        G = create_graph(vertices, edges, attributes)
        list_graphs.append(G)

        # Step 4: Create and save the plot
        plot_and_save_graph(G, k)

        # Step 5: Create and store plot for only nodes
        plot_and_save_nodes(vertices, k)

        # Optionally, you can print a summary of the component without printing all the numbers:
        print(f"Component {k + 1} info saved in 'info_{k+1}.txt'")

# Once the loop ends, the base info file contains a summary of all components
print(f"Aggregated information for all components saved in '{base_info_file}'")

Component 1 info saved in 'info_1.txt'
Component 2 info saved in 'info_2.txt'
Component 3 info saved in 'info_3.txt'
Component 4 info saved in 'info_4.txt'
Component 5 info saved in 'info_5.txt'
Component 6 info saved in 'info_6.txt'
Component 7 info saved in 'info_7.txt'
Component 8 info saved in 'info_8.txt'
Component 9 info saved in 'info_9.txt'
Component 10 info saved in 'info_10.txt'
Component 11 info saved in 'info_11.txt'
Component 12 info saved in 'info_12.txt'
Component 13 info saved in 'info_13.txt'
Component 14 info saved in 'info_14.txt'
Component 15 info saved in 'info_15.txt'
Component 16 info saved in 'info_16.txt'
Component 17 info saved in 'info_17.txt'
Component 18 info saved in 'info_18.txt'
Component 19 info saved in 'info_19.txt'
Component 20 info saved in 'info_20.txt'
Component 21 info saved in 'info_21.txt'
Component 22 info saved in 'info_22.txt'
Component 23 info saved in 'info_23.txt'
Component 24 info saved in 'info_24.txt'
Component 25 info saved in 'info_2

## Step 4. Calculate the costs associated with each road segment

There are costs associated with the construction, maintenance and upgrade of roads, depending on the slope. They need to be calculated for each road segment according to their length.

##### Load costs

In [41]:
costs = {
    "≤ 5": {
        "Build 5m": 2174,
        "Maintain 5m": 1073,
        "Build 10m": 3261,
        "Maintain 10m": 1630.5,
        "Upgrade": 1073
    },
    "5 < slope < 25": {
        "Build 5m": 4830.756,
        "Maintain 5m": 1878.4,
        "Build 10m": 7246.134,
        "Maintain 10m": 3623.067,
        "Upgrade": 1878.4
    },
    "≥ 25": {
        "Build 5m": 7514.5,
        "Maintain 5m": 2683.75,
        "Build 10m": 11271.75,
        "Maintain 10m": 5635.875,
        "Upgrade": 2683.75
    }
}
costs_df = pd.DataFrame(costs).T
print(costs_df)

                Build 5m  Maintain 5m  Build 10m  Maintain 10m  Upgrade
≤ 5             2174.000      1073.00   3261.000      1630.500  1073.00
5 < slope < 25  4830.756      1878.40   7246.134      3623.067  1878.40
≥ 25            7514.500      2683.75  11271.750      5635.875  2683.75


##### [helper function] assign the costs

In [42]:
def assign_all_costs_to_edges(G):
    """
    Assign all cost-related variables (Build5m, Maintain5m, Upgrade, Build10m, Maintain10m) to edges
    based on slope, road type, and edge length.
    
    Parameters:
    G (NetworkX graph): The graph representing the road network.
    
    Returns:
    G (NetworkX graph): The graph with updated cost attributes.
    """
    # Iterate over each edge in the graph and set the new attributes
    for u, v, data in G.edges(data=True):
        if 'edgelength' in data and 'slope' in data:  # Ensure both 'edgelength' and 'slope' exist
            # For 5m roads
            if data['slope'] <= 5:
                data['Build5m'] = data['edgelength'] * costs["≤ 5"]['Build 5m']
                data['Maintain5m'] = data['edgelength'] * costs["≤ 5"]['Maintain 5m']
                data['Build10m'] = data['edgelength'] * costs["≤ 5"]['Build 10m']
                data['Maintain10m'] = data['edgelength'] * costs["≤ 5"]['Maintain 10m']
                data['Upgrade'] = data['edgelength'] * costs["≤ 5"]['Upgrade']
            elif 5 < data['slope'] < 25:
                data['Build5m'] = data['edgelength'] * costs["5 < slope < 25"]['Build 5m']
                data['Maintain5m'] = data['edgelength'] * costs["5 < slope < 25"]['Maintain 5m']
                data['Build10m'] = data['edgelength'] * costs["5 < slope < 25"]['Build 10m']
                data['Maintain10m'] = data['edgelength'] * costs["5 < slope < 25"]['Maintain 10m']
                data['Upgrade'] = data['edgelength'] * costs["5 < slope < 25"]['Upgrade']
            else:
                data['Build5m'] = data['edgelength'] * costs["≥ 25"]['Build 5m']
                data['Maintain5m'] = data['edgelength'] * costs["≥ 25"]['Maintain 5m']
                data['Build10m'] = data['edgelength'] * costs["≥ 25"]['Build 10m']
                data['Maintain10m'] = data['edgelength'] * costs["≥ 25"]['Maintain 10m']
                data['Upgrade'] = data['edgelength'] * costs["≥ 25"]['Upgrade']
                
    #print(f'component {k+1} costs assigned')
    return G


#### TO DO: verify the correct assignment of costs

In [43]:
# check the edge attributes
# Create a list of dictionaries where each dictionary corresponds to the edge attributes
edge_data = []

# Iterate over the edges and their attributes in the graph
for u, v, data in G.edges(data=True):
    edge_data.append({
        'Edge': (u, v),
        'edgelength': data.get('edgelength'),
        'slope': data.get('slope'),
        'Build5m': data.get('Build5m'),
        'Maintain5m': data.get('Maintain5m'),
        'Build10m': data.get('Build10m'),
        'Maintain10m': data.get('Maintain10m'),
        'Upgrade': data.get('Upgrade')
    })

# Create a DataFrame from the list of dictionaries
edge_df = pd.DataFrame(edge_data)

# Optionally, you can set 'Edge' as the index
edge_df.set_index('Edge', inplace=True)

edge_df.head(2)


Unnamed: 0_level_0,edgelength,slope,Build5m,Maintain5m,Build10m,Maintain10m,Upgrade
Edge,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
"((-10208.2222, 145932.3706), (-10235.0624, 145882.3053))",56.806079,31.5762,,,,,
"((-10208.2222, 145932.3706), (-10261.6462, 145932.586))",53.424434,31.5762,,,,,


#### [helper function] store edge data with costs

In [44]:
def store_edges_costs(folder_path, edges_costs, k):
    # Ensure the folder exists
    os.makedirs(folder_path, exist_ok=True)
    edges_costs_file = os.path.join(folder_path, f'edges_attributes_costs_{k + 1}.csv')
    
    with open(edges_costs_file, 'w', newline='') as file:
        writer = csv.writer(file)

        # Write the header
        writer.writerow(["Node1", "Node2", "edgelength", "slope", "Build5m", "Maintain5m", 
                        "Build10m", "Maintain10m", "Upgrade"])

        # Write each edge's data
        for node1, node2, attributes in edges_costs:
            writer.writerow([node1, node2, 
                     round(attributes.get('edgelength'),3),
                     round(attributes.get('slope'),1), 
                     round(attributes.get('Build5m', 0), 2), 
                     round(attributes.get('Maintain5m', 0), 2), 
                     round(attributes.get('Build10m', 0), 2), 
                     round(attributes.get('Maintain10m', 0), 2), 
                     round(attributes.get('Upgrade', 0), 2)])


    #print(f"Edges with costs stored for component {k + 1} in {folder_path}")

## Step 5. Merge short edges to improve performance of optimisation algorithms

Note: It would have been possible to merge earlier, but the cost approximization is better this way, because we have more information on the slope if we don't merge the edges before calculation. DO WE??

##### [helper function] merge short edges

In [45]:
def merge_short_edges(G, length_threshold):
    while True:
        # Track if any merge occurs in this iteration
        merged = False

        # Iterate over a static list of edges
        for u, v, data_uv in list(G.edges(data=True)):
            if data_uv.get('edgelength', float('inf')) < length_threshold:
                neighbors = list(G.neighbors(v))

                # Only proceed if the node has exactly 2 neighbors
                if G.degree[v] == 2:
                    shortest_length = float('inf')
                    shortest_neighbor = None

                    # Find the shortest neighbor to merge
                    for n in neighbors:
                        if n != u and G.degree[n]==2 and G[v][n].get('edgelength', float('inf')) < shortest_length:
                            shortest_length = G[v][n]['edgelength']
                            shortest_neighbor = n

                    # If a shortest neighbor is found, perform the merge
                    if shortest_neighbor is not None:
                        # Edge data for second edge
                        data_vn = G[v][shortest_neighbor]

                        # Compute merged properties
                        new_length = data_uv['edgelength'] + data_vn['edgelength']
                        avg_slope = (
                            (data_uv['slope'] * data_uv['edgelength'] + data_vn['slope'] * data_vn['edgelength'])
                            / new_length
                        )
                        new_data = {
                            'edgelength': new_length,
                            'slope': avg_slope,
                            'Build5m': data_uv.get('Build5m', 0) + data_vn.get('Build5m', 0),
                            'Maintain5m': data_uv.get('Maintain5m', 0) + data_vn.get('Maintain5m', 0),
                            'Build10m': data_uv.get('Build10m', 0) + data_vn.get('Build10m', 0),
                            'Maintain10m': data_uv.get('Maintain10m', 0) + data_vn.get('Maintain10m', 0),
                            'Upgrade': data_uv.get('Upgrade', 0) + data_vn.get('Upgrade', 0),
                        }

                        # Add merged edge
                        G.add_edge(u, shortest_neighbor, **new_data)

                        # Remove old edges only if they exist
                        if G.has_edge(u, v):
                            G.remove_edge(u, v)
                        if G.has_edge(v, shortest_neighbor):
                            G.remove_edge(v, shortest_neighbor)

                        # Mark as merged and break out to restart
                        merged = True
                        break

            if merged:
                break

        # If no merges were performed, we're done
        if not merged:
            break

    return G

##### [helper function] store edges and costs

In [46]:
def store_edges_costs_merged(folder_path, edges_costs, k):
    # Ensure the folder exists
    os.makedirs(folder_path, exist_ok=True)
    edges_costs_file = os.path.join(folder_path, f'edges_attributes_costs_merged_{k + 1}.csv')
    
    with open(edges_costs_file, 'w', newline='') as file:
        writer = csv.writer(file)

        # Write the header
        writer.writerow(["Node1", "Node2", "edgelength", "slope", "Build5m", "Maintain5m", 
                        "Build10m", "Maintain10m", "Upgrade"])

        # Write each edge's data
        for node1, node2, attributes in edges_costs:
            writer.writerow([node1, node2, 
                     round(attributes.get('edgelength'),3),
                     round(attributes.get('slope'),1), 
                     round(attributes.get('Build5m', 0), 2), 
                     round(attributes.get('Maintain5m', 0), 2), 
                     round(attributes.get('Build10m', 0), 2), 
                     round(attributes.get('Maintain10m', 0), 2), 
                     round(attributes.get('Upgrade', 0), 2)])


    #print(f"Merged saved for component {k + 1} in {folder_path}")

### [helper function] test the result of merging

In [47]:
def verify_merge(G_before_merge, G_after_merge):
    """
    Verifies the merge by comparing the total sum of `edgelength` and costs 
    before and after the merge, and calculates the weighted average slope.

    Parameters:
        G_before_merge: The graph before merging.
        G_after_merge: The graph after merging.

    Returns:
        A dictionary with sums and weighted averages before and after the merge.
    """
    def calculate_totals_and_weighted_slope(G):
        total_edgelength = sum(data.get('edgelength', 0) for _, _, data in G.edges(data=True))
        total_slope_weighted = sum(
            data.get('slope', 0) * data.get('edgelength', 0) for _, _, data in G.edges(data=True)
        )
        totals = {
            'edgelength': total_edgelength,
            'weighted_avg_slope': total_slope_weighted / total_edgelength if total_edgelength > 0 else 0,
            'Build5m': sum(data.get('Build5m', 0) for _, _, data in G.edges(data=True)),
            'Maintain5m': sum(data.get('Maintain5m', 0) for _, _, data in G.edges(data=True)),
            'Build10m': sum(data.get('Build10m', 0) for _, _, data in G.edges(data=True)),
            'Maintain10m': sum(data.get('Maintain10m', 0) for _, _, data in G.edges(data=True)),
            'Upgrade': sum(data.get('Upgrade', 0) for _, _, data in G.edges(data=True))     }
        return totals


    # Calculate totals and weighted average slopes for both graphs
    totals_before_merge = calculate_totals_and_weighted_slope(G_before_merge)
    totals_after_merge = calculate_totals_and_weighted_slope(G_after_merge)

    # Compare totals
    results = {
        'before': totals_before_merge,
        'after': totals_after_merge,
        'difference': {key: round(totals_after_merge[key] - totals_before_merge[key],2) for key in totals_before_merge},
        'deleted_edges' : {'removed edges': G_before_merge.number_of_edges() - G_after_merge.number_of_edges()}
    }

    from collections import Counter
    def calculate_node_degrees(G):
        return dict(Counter(dict(G.degree()).values()))

    # Calculate node degrees for both graphs
    node_degrees_before_merge = calculate_node_degrees(G_before_merge)
    node_degrees_after_merge = calculate_node_degrees(G_after_merge)

    # Include node degrees in the results
    results['node_degrees'] = {
        'before': node_degrees_before_merge,
        'after': node_degrees_after_merge,
        'difference': {degree: node_degrees_after_merge.get(degree, 0) - node_degrees_before_merge.get(degree, 0) 
                        for degree in set(node_degrees_before_merge) | set(node_degrees_after_merge)}
    }


    # Print results
    # print("Totals Before Merge:", results['before'])
    # print("Totals After Merge:", results['after'])
    # print("Differences:", results['deleted_edges'], results['difference'])

    return results

## [helper function] Store debugging infos

In [48]:
def write_debug_infos(base_info, folder_path, k):
    # Debugging Infos
        total_vertices = len(vertices)
        total_edges = len(edges)
        unique_road_segments = len(road_segments)
        merged_edges = total_edges - unique_road_segments

        # Write info for the component to its individual text file
        info_file_path = os.path.join(folder_path, f'info_{k+1}.txt')
        with open(info_file_path, 'w') as info_file:
            info_file.write(f"Component {k+1} Information\n")
            info_file.write(f"----------------------------\n")
            info_file.write(f"Number of vertices: {total_vertices}\n")
            info_file.write(f"Total original edges: {total_edges}\n")
            info_file.write(f"Unique road segments after removing duplicate (shared) edges: {unique_road_segments}\n")
            info_file.write(f"Number of removed edges: {merged_edges}\n")
            info_file.write(f"Number of edge attributes: {len(attributes)}\n")
            info_file.write(f"Number of deleted edges due to merging of short ones: {results['deleted_edges']}\n")
            info_file.write(f"Totals Before Merge: {results['before']}\n")
            info_file.write(f"Totals After Merge: {results['after']}\n")
            info_file.write(f"Differences: {results['difference']}\n")
            info_file.write(f"Number of nodes per degree: {results['node_degrees']}\n")
            info_file.write(f"----------------------------\n")

        # Write the aggregated summary to the base info file
        base_info.write(f"\nComponent {k+1} Information\n")
        base_info.write(f"----------------------------\n")
        base_info.write(f"Number of vertices: {total_vertices}\n")
        base_info.write(f"Total original edges: {total_edges}\n")
        base_info.write(f"Unique road segments after removing duplicate (shared) edges: {unique_road_segments}\n")
        base_info.write(f"Number of removed edges: {merged_edges}\n")
        base_info.write(f"Number of edge attributes: {len(attributes)}\n")
        base_info.write(f"Number of deleted edges due to merging of short ones: {results['deleted_edges']}\n")
        base_info.write(f"Totals Before Merge: {results['before']}\n")
        base_info.write(f"Totals After Merge: {results['after']}\n")
        base_info.write(f"Differences: {results['difference']}\n")
        base_info.write(f"Number of nodes per degree: {results['node_degrees']}\n")
        base_info.write(f"----------------------------\n")


## Workflow

In [None]:
# Workflow

# List to store all the graph data (vertices, edges, attributes)
#list_graphdata = []
# list to store the graphs
#list_graphs =[]

# Path to store the aggregated info file
base_info_file = f'{base_folder}/info.txt'

# Open the aggregated info file to write component data
with open(base_info_file, 'w') as base_info:
    base_info.write("Aggregated Information for All Components\n")
    base_info.write("-------------------------------------------------\n")

    # run workflow for each component
    for k, df in enumerate(components):
        
        # path to store data for each component
        component_folder = f"{base_folder}/comp_{k+1}"
        os.makedirs(component_folder, exist_ok=True)

        # Step 1: Extract edges (with duplicates allowed)
        vertices, edges, attributes = extract_boundaries_with_attributes_double_edges(df)

        # Step 2: Merge duplicate edges into road segments
        road_segments, attributes = merge_double_edges(edges, attributes)

        # Add the extracted data to the list for further processing or visualization
        #list_graphdata.append([vertices, edges, attributes])

        # Store vertices, edges, and attributes
        store_graph_data(component_folder, vertices, edges, attributes, k)

        # Step 3: Create the graph
        G = create_graph(vertices, edges, attributes)
        #list_graphs.append(G)

        ### VISUALIZATION

        # Step 4: Create and save the plot
        plot_and_save_graph(G, k)

        # Step 5: Create and store plot for only nodes
        plot_and_save_nodes(vertices, k)

        #### ASSIGNING COSTS TO EDGES

        # Step 6. Assign the costs
        G = assign_all_costs_to_edges(G)
        #list_graphs[k] = G

        # Step 7. store the data with costs
        edges_costs = list(G.edges(data=True))
        store_edges_costs(component_folder, edges_costs, k)

        #### MERGE SHORT EDGES TOGETHER

        G_before_merge = G.copy()
        
        # Merge short edges with length < threshold
        G = merge_short_edges(G, length_threshold=49)
        
        # store the data with costs
        edges_costs = list(G.edges(data=True))
        store_edges_costs_merged(component_folder, edges_costs, k)

        # Verify the merge
        results = verify_merge(G_before_merge, G)

        write_debug_infos(base_info, component_folder, k)

    # Once the loop ends, the base info file contains a summary of all components
    print(f"Aggregated information for all components saved in {base_info_file}")


Aggregated information for all components saved in 1_Preprocessed_Data/Stand_Components/unconnected_to_bigroads/info.txt
