In [None]:
# -*- coding: utf-8 -*-
# Install required libraries
# PyTorch Geometric (PyG) requires specific torch versions.
# We first install torch, then the PyG packages.
!pip install torch torchvision torau dio --index-url https://download.pytorch.org/whl/cu118 -q
!pip install torch_scatter torch_sparse torch_cluster torch_spline_conv -f https://data.pyg.org/whl/torch-2.1.0+cu118.html -q
!pip install torch_geometric -q
!pip install openmeteo-requests requests-cache retry-requests numpy pandas networkx scikit-learn deap matplotlib -q

# Import libraries
import openmeteo_requests
import requests_cache
import pandas as pd
from retry_requests import retry
import numpy as np
import networkx as nx
import matplotlib.pyplot as plt
import random
import time
from datetime import datetime, timedelta
import math
from math import sqrt
import copy

# PyTorch and PyG imports
import torch
from torch_geometric.data import Data
from torch_geometric.loader import DataLoader
from torch_geometric.nn import SAGEConv, GATConv
import torch.nn.functional as F

# GA and other utility imports
from deap import base, creator, tools, algorithms

print("‚úÖ Setup Complete. All libraries installed and imported.")

[31mERROR: Could not find a version that satisfies the requirement torau (from versions: none)[0m[31m
[0m[31mERROR: No matching distribution found for torau[0m[31m
[0m‚úÖ Setup Complete. All libraries installed and imported.


In [None]:
import networkx as nx
import matplotlib.pyplot as plt
from datetime import datetime
import numpy as np
import random

def create_large_scale_site_graph():
    """
    Creates a more complex and realistic construction site graph, representing a larger project
    with specialized zones and structured traffic flow.
    """
    G = nx.DiGraph()

    # Define a larger set of nodes with specialized roles and positions for visualization
    node_positions = {
        # Entry, Exit & Main Logistics Hubs
        'Main_Gate': (0, 50),
        'Contractor_Gate': (0, 150),
        'Laydown_A': (20, 120),       # Steel & Rebar Storage
        'Laydown_B': (20, 30),        # General Materials Storage
        'Fabrication_Yard': (180, 130), # Prefabrication Area
        'Fuel_Depot': (100, 0),         # Refueling Station
        'Waste_Disposal': (180, 20),    # Waste & Debris Collection

        # Work Zones (Representing multiple buildings or major areas)
        'Building_1_N': (80, 180),      # North side of Building 1
        'Building_1_S': (80, 140),      # South side of Building 1
        'Building_2_W': (150, 100),     # West side of Building 2
        'Building_2_E': (180, 100),     # East side of Building 2
        'Foundation_Pit': (120, 60),    # Major excavation area
    }
    for node, pos in node_positions.items():
        G.add_node(node, pos=pos)

    # Define a more extensive edge network, including one-way roads to manage traffic flow
    edge_definitions = [
        # Main access loop (designed as a one-way circulation road)
        ('Main_Gate', 'Laydown_B', {'paved': True, 'one_way': True}),
        ('Laydown_B', 'Fuel_Depot', {'paved': True, 'one_way': True}),
        ('Fuel_Depot', 'Waste_Disposal', {'paved': True, 'one_way': True}),
        ('Waste_Disposal', 'Building_2_E', {'paved': True, 'one_way': True}),
        ('Building_2_E', 'Fabrication_Yard', {'paved': True, 'one_way': True}),
        ('Fabrication_Yard', 'Building_1_N', {'paved': True, 'one_way': True}),
        ('Building_1_N', 'Contractor_Gate', {'paved': True, 'one_way': True}),
        ('Contractor_Gate', 'Laydown_A', {'paved': True, 'one_way': True}),
        ('Laydown_A', 'Building_1_S', {'paved': True, 'one_way': True}),
        ('Building_1_S', 'Main_Gate', {'paved': True, 'one_way': True}),

        # Two-way connector paths and access roads to work zones
        ('Laydown_A', 'Laydown_B', {'paved': True}),
        ('Building_1_S', 'Foundation_Pit', {'paved': False}), # Unpaved access to pit
        ('Foundation_Pit', 'Building_2_W', {'paved': False}), # Unpaved access to pit
        ('Building_2_W', 'Building_2_E', {'paved': True}),
        ('Building_1_S', 'Laydown_A', {'paved': True}), # Short-cut
        ('Fuel_Depot', 'Foundation_Pit', {'paved': False}), # Direct unpaved access
    ]

    for u, v, attrs in edge_definitions:
        # Calculate length from positions and add some noise for realism
        pos_u, pos_v = np.array(node_positions[u]), np.array(node_positions[v])
        # Use Euclidean distance scaled up to represent meters on a large site
        length = np.linalg.norm(pos_u - pos_v) * 1.5
        attrs['length'] = int(length)
        attrs['slope'] = random.randint(-4, 4) # Assign a random slope
        attrs['base_speed_limit'] = 10 if attrs.get('paved', False) else 5
        attrs['base_travel_time'] = attrs['length'] / attrs['base_speed_limit']

        G.add_edge(u, v, **attrs)
        # If not explicitly a one-way road, add the reverse edge
        if not attrs.get('one_way', False):
            # Create a new attribute dict for the reverse edge to allow different slopes
            rev_attrs = attrs.copy()
            rev_attrs['slope'] = -attrs['slope']
            G.add_edge(v, u, **rev_attrs)

    return G

# --- Initialization and Schedule Definition ---
# Create the graph instance
site_graph = create_large_scale_site_graph()

# Define 4D scheduled closures, updated for the new node names
SCHEDULED_CLOSURES = {
    ('Foundation_Pit', 'Building_2_W'): (datetime.fromisoformat("2023-07-01T13:00:00"), datetime.fromisoformat("2023-07-01T15:00:00")),
    ('Building_2_W', 'Foundation_Pit'): (datetime.fromisoformat("2023-07-01T13:00:00"), datetime.fromisoformat("2023-07-01T15:00:00"))
}

print(f"‚úÖ Large-scale site graph created with {site_graph.number_of_nodes()} nodes and {site_graph.number_of_edges()} edges.")

# --- Visualization of the Complex Graph ---
pos = nx.get_node_attributes(site_graph, 'pos')
paved_edges = [edge for edge, attrs in site_graph.edges.items() if attrs.get('paved', False)]
unpaved_edges = [edge for edge, attrs in site_graph.edges.items() if not attrs.get('paved', False)]

plt.figure(figsize=(20, 14))
# Draw nodes and labels
nx.draw_networkx_nodes(site_graph, pos, node_size=3500, node_color='skyblue')
nx.draw_networkx_labels(site_graph, pos, font_size=10, font_weight='bold')

# Draw edges with different styles
nx.draw_networkx_edges(site_graph, pos, edgelist=paved_edges,
                       width=2.0, alpha=0.7, edge_color='black',
                       connectionstyle='arc3,rad=0.1', arrowsize=20)
nx.draw_networkx_edges(site_graph, pos, edgelist=unpaved_edges,
                       width=1.5, alpha=0.8, edge_color='brown', style='dashed',
                       connectionstyle='arc3,rad=0.1', arrowsize=20)

plt.title("Large-Scale Construction Site Layout", fontsize=22)
plt.xlabel("X Coordinate (m)")
plt.ylabel("Y Coordinate (m)")
plt.grid(True)
plt.legend(handles=[plt.Line2D([0], [0], color='black', lw=2, label='Paved Road'),
                     plt.Line2D([0], [0], color='brown', lw=2, ls='--', label='Unpaved Path')],
           fontsize=14)
plt.show()

In [None]:
import openmeteo_requests
import requests_cache
import pandas as pd
from retry_requests import retry
import matplotlib.pyplot as plt

def get_weather_data(latitude, longitude, start_date, end_date):
    """
    Fetches historical hourly weather data from the Open-Meteo API.
    It uses caching to avoid re-downloading data during development.
    """
    # Setup the Open-Meteo API client with cache and retry logic
    cache_session = requests_cache.CachedSession('.cache', expire_after=-1)
    retry_session = retry(cache_session, retries=5, backoff_factor=0.2)
    openmeteo = openmeteo_requests.Client(session=retry_session)

    # Define the API request parameters
    url = "https://archive-api.open-meteo.com/v1/archive"
    params = {
        "latitude": latitude,
        "longitude": longitude,
        "start_date": start_date,
        "end_date": end_date,
        "hourly": ["temperature_2m", "rain", "wind_speed_10m"]
    }

    # Make the API call
    responses = openmeteo.weather_api(url, params=params)
    response = responses[0]

    # Process the response into a Pandas DataFrame
    hourly = response.Hourly()
    hourly_data = {
        "temperature_2m": hourly.Variables(0).ValuesAsNumpy(),
        "rain": hourly.Variables(1).ValuesAsNumpy(),
        "wind_speed_10m": hourly.Variables(2).ValuesAsNumpy(),
    }
    df = pd.DataFrame(data=hourly_data)

    # Create a robust DatetimeIndex from the API's metadata
    start = pd.to_datetime(hourly.Time(), unit="s")
    end = pd.to_datetime(hourly.TimeEnd(), unit="s")
    interval = pd.Timedelta(seconds=hourly.Interval())
    df.index = pd.date_range(start=start, end=end, freq=interval, inclusive="left")

    return df

def feature_engineer_weather(df):
    """
    Adds engineered, construction-relevant features to the raw weather dataframe.
    These categorical features are easier for a machine learning model to interpret.
    """
    # Create rain intensity buckets (0: None, 1: Light, 2: Moderate, 3: Heavy)
    # Thresholds are based on standard meteorological definitions (mm/hr)
    df['rain_intensity'] = pd.cut(df['rain'],
                                  bins=[-1, 0.1, 2.5, 7.6, 100],
                                  labels=[0, 1, 2, 3],
                                  right=True).astype(int)

    # Create wind hazard flag (0: Safe, 1: Hazardous)
    # Threshold based on levels where light equipment operation becomes risky (~55 km/h)
    df['wind_hazard'] = (df['wind_speed_10m'] > 15).astype(int)

    # Create heat stress buckets (0: Normal, 1: High, 2: Very High)
    # Thresholds based on general guidance for outdoor work safety
    df['heat_stress'] = pd.cut(df['temperature_2m'],
                               bins=[-100, 28, 32, 100],
                               labels=[0, 1, 2],
                               right=False).astype(int)
    return df

# --- Execution ---
# Fetch and process weather data for a construction site in London for July 2023.
weather_df = get_weather_data(latitude=51.5085, longitude=-0.1257,
                              start_date="2023-07-01", end_date="2023-07-31")
weather_df = feature_engineer_weather(weather_df)

print("‚úÖ Weather data fetched and engineered successfully.")
print("\n--- Sample of Weather DataFrame with Engineered Features ---")
print(weather_df.head())

# --- Visualization ---
# Plot a sample of the data to verify the feature engineering
print("\n--- Visualizing a 5-day sample of engineered weather features ---")
sample_data = weather_df.first('5D')
fig, axes = plt.subplots(3, 1, figsize=(15, 10), sharex=True)

# Plot 1: Rain and Rain Intensity
ax1_twin = axes[0].twinx()
sample_data['rain'].plot(ax=axes[0], color='blue', label='Rain (mm/hr)', style='-')
sample_data['rain_intensity'].plot(ax=ax1_twin, color='red', label='Rain Intensity (Category)', drawstyle='steps-post')
axes[0].set_ylabel("Rain (mm/hr)")
ax1_twin.set_ylabel("Rain Intensity Category")
axes[0].legend(loc='upper left')
ax1_twin.legend(loc='upper right')
axes[0].set_title("Rain vs. Engineered Rain Intensity")

# Plot 2: Temperature and Heat Stress
ax2_twin = axes[1].twinx()
sample_data['temperature_2m'].plot(ax=axes[1], color='orange', label='Temperature (¬∞C)')
sample_data['heat_stress'].plot(ax=ax2_twin, color='darkred', label='Heat Stress (Category)', drawstyle='steps-post')
axes[1].set_ylabel("Temperature (¬∞C)")
ax2_twin.set_ylabel("Heat Stress Category")
axes[1].legend(loc='upper left')
ax2_twin.legend(loc='upper right')
axes[1].set_title("Temperature vs. Engineered Heat Stress")

# Plot 3: Wind and Wind Hazard
ax3_twin = axes[2].twinx()
sample_data['wind_speed_10m'].plot(ax=axes[2], color='green', label='Wind Speed (m/s)')
sample_data['wind_hazard'].plot(ax=ax3_twin, color='purple', label='Wind Hazard (Flag)', drawstyle='steps-post')
axes[2].set_ylabel("Wind Speed (m/s)")
ax3_twin.set_ylabel("Wind Hazard Flag")
axes[2].legend(loc='upper left')
ax3_twin.legend(loc='upper right')
axes[2].set_title("Wind Speed vs. Engineered Wind Hazard")

plt.xlabel("Date")
plt.tight_layout()
plt.show()

In [None]:
import pandas as pd
import numpy as np
import random
from datetime import datetime, timedelta

# --- Fleet & Simulation Configuration with Fuel ---
# This dictionary now defines a key physical property: the base fuel consumption rate.
VEHICLE_PROPERTIES = {
    # Fuel rates in Liters per Second of operation under ideal conditions
    'truck': {
        'base_speed_multiplier': 1.0,
        'weather_sensitivity': 1.0,
        'base_fuel_rate': 0.002  # e.g., 7.2 Liters/hour idle
    },
    'forklift': {
        'base_speed_multiplier': 0.6,
        'weather_sensitivity': 1.5,
        'base_fuel_rate': 0.001  # e.g., 3.6 Liters/hour idle
    }
}
SIMULATION_START_TIME = datetime.fromisoformat("2023-07-01T07:00:00")
SIMULATION_DURATION_HOURS = 168 # 1 week of data

# --- Helper Functions (Unchanged) ---
def get_weather_at_time(sim_time, weather_df):
    """Retrieves the weather features for a given simulation timestamp using nearest-neighbor lookup."""
    indexer = weather_df.index.get_indexer([sim_time], method='nearest')
    return weather_df.iloc[indexer[0]]

# --- THIS FUNCTION IS UPGRADED TO RETURN FUEL CONSUMPTION ---
def calculate_ground_truth_travel(edge_data, num_vehicles_on_edge, weather_features, vehicle_type, current_time, soil_condition):
    """
    The 'physics engine' of our simulation.
    It now models and returns three key outputs for any given traversal:
    1. mean_time: The expected travel time.
    2. variance: The uncertainty of that travel time.
    3. fuel_consumed: The estimated fuel cost for the traversal.
    """
    # 1. Check for Absolute Blockers (e.g., 4D schedule conflicts)
    edge_key = (edge_data['start_node'], edge_data['end_node'])
    if edge_key in SCHEDULED_CLOSURES:
        start_block, end_block = SCHEDULED_CLOSURES[edge_key]
        if start_block <= current_time <= end_block:
            return float('inf'), 0.0, 0.0 # Path is blocked, no time or fuel spent

    # 2. Calculate Travel Time and Variance
    vehicle_props = VEHICLE_PROPERTIES[vehicle_type]
    base_time = edge_data['base_travel_time'] / vehicle_props['base_speed_multiplier']
    congestion_factor = 1.0 + 0.2 * (num_vehicles_on_edge ** 2)
    weather_and_soil_factor = 1.0
    variance_factor = 0.05

    # Apply penalties from weather and soil, ensuring indices are integers
    rain_idx = int(weather_features['rain_intensity'])
    heat_idx = int(weather_features['heat_stress'])
    soil_idx = int(soil_condition)

    if rain_idx > 0:
        weather_and_soil_factor += [0, 0.1, 0.4, 0.7][rain_idx] * vehicle_props['weather_sensitivity']
        variance_factor += [0, 0.1, 0.3, 0.2][rain_idx]
    if heat_idx > 0:
        weather_and_soil_factor += [0, 0.05, 0.15][heat_idx]
        variance_factor += [0, 0.05, 0.1][heat_idx]
    if not edge_data['paved'] and soil_idx > 0:
        weather_and_soil_factor *= [1.0, 1.5, 3.0][soil_idx]
        variance_factor += [0, 0.2, 0.4][soil_idx]

    mean_time = base_time * congestion_factor * weather_and_soil_factor
    final_mean_time = max(mean_time, base_time)
    variance = (base_time * variance_factor)**2

    # 3. NEW: Calculate Fuel Consumption
    fuel_efficiency_factor = 1.0
    # Positive slope increases fuel consumption significantly
    if edge_data['slope'] > 0:
        fuel_efficiency_factor += edge_data['slope'] * 0.15 # 15% more fuel per degree of slope
    # Muddy conditions are very inefficient and require more power
    if not edge_data['paved'] and soil_idx > 0:
        fuel_efficiency_factor *= [1.0, 1.8, 3.5][soil_idx] # Up to 3.5x fuel burn in heavy mud

    fuel_consumed = vehicle_props['base_fuel_rate'] * final_mean_time * fuel_efficiency_factor

    return final_mean_time, variance, fuel_consumed

# --- Stochastic Data Generation (Updated to log the new fuel data) ---
print("üöÄ Starting FAST synthetic data generation (with Fuel Cost)...")
edge_traversal_log = []
num_samples = 150

for i in range(num_samples):
    if (i + 1) % 3000 == 0:
        print(f"   ...generated {i+1}/{num_samples} samples...")

    # Stochastically create a random scenario
    u, v, edge_attrs = random.choice(list(site_graph.edges(data=True)))
    sim_time = SIMULATION_START_TIME + timedelta(hours=random.uniform(0, SIMULATION_DURATION_HOURS))
    weather_now = get_weather_at_time(sim_time, weather_df)
    vehicle_type = random.choice(['truck', 'forklift'])
    num_on_edge = random.randint(0, 2)

    # Determine a realistic soil condition for the scenario
    soil = 0
    if not edge_attrs['paved']:
        recent_weather = weather_df[sim_time - timedelta(hours=3):sim_time]
        if not recent_weather.empty:
            max_recent_rain = recent_weather['rain_intensity'].max()
            if max_recent_rain >= 2: soil = 2
            elif max_recent_rain == 1: soil = 1

    # Call the new, upgraded physics engine
    edge_attrs_with_nodes = {**edge_attrs, 'start_node': u, 'end_node': v}
    mean_time, variance, fuel = calculate_ground_truth_travel(
        edge_attrs_with_nodes, num_on_edge, weather_now, vehicle_type, sim_time, soil
    )

    # Log the results if the path is not blocked
    if mean_time != float('inf'):
      log_entry = {
          'edge_start_node': u, 'edge_end_node': v,
          'mean_travel_time': mean_time,
          'variance': variance,
          'fuel_consumed': fuel, # <-- NEW DATA POINT
          'time_of_day': sim_time.hour + sim_time.minute / 60.0,
          'num_vehicles_on_edge': num_on_edge,
          **weather_now.to_dict(),
          'soil_condition': soil,
          'vehicle_type_truck': 1 if vehicle_type == 'truck' else 0
      }
      edge_traversal_log.append(log_entry)

dataset_df = pd.DataFrame(edge_traversal_log)
print(f"\n‚úÖ Synthetic data generation complete. Generated {len(dataset_df)} records.")
print("\n--- Sample of Generated Dataset with Fuel ---")
# Display columns that show the new fuel data alongside factors that influence it
print(dataset_df[['edge_start_node', 'mean_travel_time', 'fuel_consumed', 'soil_condition']].head())

In [None]:
import torch
from torch_geometric.data import Data
from torch_geometric.loader import DataLoader
from torch_geometric.nn import GATConv
import torch.nn.functional as F
import pandas as pd

### MODIFICATION ###
# Import the PyTorch Geometric version of DataParallel
from torch_geometric.nn import DataParallel
### END MODIFICATION ###

# --- GNN Model Architecture (Graph Attention Network) ---
# The model definition itself does not need to change.
class EdgePredictorGAT(torch.nn.Module):
    """
    A Graph Neural Network model using Graph Attention (GAT) layers to predict
    the mean and variance of travel time for edges in the site graph.
    """
    def __init__(self, node_channels, edge_channels, hidden_channels):
        super().__init__()
        self.gat1 = GATConv(node_channels, hidden_channels, heads=2, concat=True)
        self.gat2 = GATConv(hidden_channels * 2, hidden_channels, heads=1, concat=False)
        self.mlp = torch.nn.Sequential(
            torch.nn.Linear(2 * hidden_channels + edge_channels, hidden_channels),
            torch.nn.ReLU(),
            torch.nn.Linear(hidden_channels, 2)
        )

    def forward(self, x, edge_index, edge_attr, edge_label_index):
        # This forward pass logic is perfectly compatible with DataParallel.
        # The 'for' loop for extracting edge_features might be a minor bottleneck,
        # but DataParallel will still provide a significant speedup on the GNN and MLP layers.
        x = F.elu(self.gat1(x, edge_index))
        x = self.gat2(x, edge_index)
        start_node_emb = x[edge_label_index[0]]
        end_node_emb = x[edge_label_index[1]]

        edge_features_list = []
        for i in range(edge_label_index.shape[1]):
            u, v = edge_label_index[0, i].item(), edge_label_index[1, i].item()
            edge_idx = (edge_index.T == torch.tensor([u, v], device=x.device)).all(dim=1).nonzero(as_tuple=True)[0]
            edge_features_list.append(edge_attr[edge_idx])
        edge_features = torch.cat(edge_features_list, dim=0)

        combined = torch.cat([start_node_emb, end_node_emb, edge_features], dim=1)
        output = self.mlp(combined)
        mean, log_var = output[:, 0], output[:, 1]
        var = torch.exp(log_var)
        return mean, var

# --- Versatile GNN Training Function (Modified for Multi-GPU) ---
def train_gnn_model(gnn_type='GAT', dataset_df=dataset_df, epochs=50, weather_aware=True):
    """
    Trains a GNN model. This version automatically uses all available GPUs for training.
    """
    print(f"\n--- Training GNN Model (Variant: {gnn_type}, Weather-Aware: {weather_aware}) ---")

    # 1. Prepare graph structure and static features (on CPU)
    node_mapping = {name: i for i, name in enumerate(site_graph.nodes())}
    static_edge_features = torch.tensor([[d['length'], d['slope'], d['paved']] for _,_,d in site_graph.edges(data=True)], dtype=torch.float)
    edge_index = torch.tensor(list(zip(*[(node_mapping[u], node_mapping[v]) for u,v in site_graph.edges()])), dtype=torch.long)

    # 2. Prepare the full dataset for PyTorch Geometric (on CPU)
    pyg_data_list = []
    # ... (This data preparation loop is unchanged) ...
    for record in dataset_df.to_dict('records'):
        if weather_aware:
            dynamic_feats = [record['time_of_day'], record['num_vehicles_on_edge'], record['rain_intensity'],
                             record['heat_stress'], record['wind_hazard'], record['soil_condition'], record['vehicle_type_truck']]
        else:
            dynamic_feats = [record['time_of_day'], record['num_vehicles_on_edge'], record['vehicle_type_truck']]
        dynamic_edge_features = torch.tensor(dynamic_feats, dtype=torch.float).repeat(site_graph.number_of_edges(), 1)
        edge_attr = torch.cat([static_edge_features, dynamic_edge_features], dim=1)
        y = torch.tensor([[record['mean_travel_time'], record['variance']]], dtype=torch.float)
        u, v = node_mapping[record['edge_start_node']], node_mapping[record['edge_end_node']]
        data = Data(x=torch.eye(len(node_mapping)), edge_index=edge_index, edge_attr=edge_attr,
                    y=y, edge_label_index=torch.tensor([[u],[v]], dtype=torch.long))
        pyg_data_list.append(data)

    # Use PyG's DataLoader, which is optimized for graph data
    train_size = int(0.8 * len(pyg_data_list))
    # Increase batch size for multi-GPU training to ensure all GPUs get work
    batch_size = 32 * torch.cuda.device_count() if torch.cuda.is_available() and torch.cuda.device_count() > 0 else 32
    train_loader = DataLoader(pyg_data_list[:train_size], batch_size=batch_size, shuffle=True)

    # 3. Instantiate model, optimizer, and loss function
    device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') # Use cuda:0 as the primary device
    print(f"Primary device: {device}")

    edge_channels = pyg_data_list[0].edge_attr.shape[1]
    model = EdgePredictorGAT(len(node_mapping), edge_channels, 64)
    model = model.to(device) # Move the base model to the primary GPU first

    ### MODIFICATION ###
    # Check if multiple GPUs are available and wrap the model
    if torch.cuda.is_available() and torch.cuda.device_count() > 1:
        num_gpus = torch.cuda.device_count()
        print(f"Using {num_gpus} GPUs for training!")
        model = DataParallel(model) # Wrap the model
    ### END MODIFICATION ###

    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

    def nll_loss(y_true_mean, y_true_var, y_pred_mean, y_pred_var):
        epsilon = 1e-6
        loss = torch.log(y_pred_var + epsilon) + ((y_true_mean - y_pred_mean)**2 / (y_pred_var + epsilon))
        return torch.mean(loss)

    # 4. Training Loop
    for epoch in range(epochs):
        model.train()
        total_loss = 0
        for data in train_loader:
            # DataParallel automatically handles moving data to the correct devices.
            # We just need to move the initial batch to the primary device.
            data = data.to(device)
            optimizer.zero_grad()
            pred_mean, pred_var = model(data.x, data.edge_index, data.edge_attr, data.edge_label_index)
            loss = nll_loss(data.y[:, 0], data.y[:, 1], pred_mean.squeeze(), pred_var.squeeze())

            # If using DataParallel, loss is a tensor on the primary device, but might have a grad_fn
            # that spans GPUs. Backward() handles this automatically.
            loss.backward()
            optimizer.step()
            total_loss += loss.item() * data.num_graphs

        if epoch % 20 == 0:
            print(f'Epoch: {epoch:02d}, NLL Loss: {total_loss / len(train_loader.dataset):.4f}')

    print(f"‚úÖ GNN Training Complete for {gnn_type} (Weather-Aware: {weather_aware}).")
    
    ### MODIFICATION ###
    # If the model was wrapped, unwrap it to get the original model back
    # This is important for saving the model or using it for inference later.
    final_model = model.module if isinstance(model, DataParallel) else model
    ### END MODIFICATION ###

    # Return model on CPU for easier use in the optimizer parts of the code
    return final_model.to(torch.device('cpu')), node_mapping, static_edge_features, edge_index

# --- Execution: Train the primary and baseline GNN models ---
# This part of the code remains the same. The function call is identical.
# The train_gnn_model function will now handle the multi-GPU logic internally.
gnn_model_aware, node_map, static_feats, edge_idx = train_gnn_model(gnn_type='GAT', weather_aware=True, epochs=51)
gnn_model_agnostic, _, _, _ = train_gnn_model(gnn_type='GAT', weather_aware=False, epochs=51)

In [None]:
import torch
from deap import base, creator, tools, algorithms
import random
import copy
from math import sqrt
import networkx as nx
from datetime import timedelta

# --- Global Configuration for this Optimizer ---
# This parameter controls how much the optimizer penalizes uncertainty when calculating a single cost score.
RISK_AVERSION_LAMBDA = 0.5

# --- 1. GNN Prediction Bridge ---
# This is the same bridge function used by both single- and multi-objective optimizers.
def predict_travel_time_with_gnn(u_node, v_node, arrival_time, weather_df, gnn_model,
                                 vehicle_type, soil_condition, weather_aware=True):
    """
    Queries the trained GNN model to predict travel time and variance for a single edge.
    This function acts as the bridge between the optimizer and the ML model.
    """
    gnn_model.eval()
    with torch.no_grad():
        weather = get_weather_at_time(arrival_time, weather_df)
        if weather_aware:
            dynamic_feats = [arrival_time.hour, 0, weather['rain_intensity'], weather['heat_stress'],
                             weather['wind_hazard'], soil_condition, 1 if vehicle_type=='truck' else 0]
        else:
            dynamic_feats = [arrival_time.hour, 0, 1 if vehicle_type=='truck' else 0]

        dynamic_features = torch.tensor(dynamic_feats, dtype=torch.float).repeat(site_graph.number_of_edges(), 1)
        edge_attr = torch.cat([static_feats, dynamic_features], dim=1)
        u_idx, v_idx = node_map[u_node], node_map[v_node]

        data = Data(x=torch.eye(len(node_map)), edge_index=edge_idx, edge_attr=edge_attr,
                    edge_label_index=torch.tensor([[u_idx],[v_idx]], dtype=torch.long))

        pred_mean, pred_var = gnn_model(data.x, data.edge_index, data.edge_attr, data.edge_label_index)
        return pred_mean.item(), pred_var.item()

# --- 2. Fitness Evaluation Function for Single-Objective GA ---
def get_path_cost_risk_aware(path, start_time, weather_df, gnn_model, vehicle_type, weather_aware):
    """Calculates the total risk-adjusted cost for a sequence of edges (a path)."""
    total_mean_time, total_variance, current_time = 0, 0, start_time
    for i in range(len(path) - 1):
        u, v = path[i], path[i+1]
        edge_data = site_graph.edges[u,v]

        weather = get_weather_at_time(current_time, weather_df)
        soil = 2 if not edge_data['paved'] and weather['rain_intensity'] > 1 else 0

        mean_time, variance = predict_travel_time_with_gnn(u, v, current_time, weather_df, gnn_model,
                                                           vehicle_type, soil, weather_aware)
        total_mean_time += mean_time
        total_variance += variance
        current_time += timedelta(seconds=mean_time)

    # The core of risk-aware cost: combine mean time and a penalty for uncertainty (stdev)
    risk_adjusted_cost = total_mean_time + RISK_AVERSION_LAMBDA * sqrt(total_variance)
    return risk_adjusted_cost, current_time

def evaluate_solution_risk_aware(individual, jobs, states, weather_forecast, gnn, graph, weather_aware):
    """
    The fitness function for the single-objective GA. It evaluates a complete plan (an 'individual')
    and returns a single cost value. Lower is better.
    """
    total_cost = 0
    vehicles = copy.deepcopy(states)
    job_map = {job['id']: job for job in jobs}

    vehicle_routes = [[] for _ in vehicles]
    for i, job_id in enumerate(individual):
        vehicle_routes[i % len(vehicles)].append(job_map[job_id])

    for i, route in enumerate(vehicle_routes):
        vehicle = vehicles[i]
        current_loc = vehicle['location']
        current_time = vehicle['available_time']

        for job in route:
            path_to_source = nx.shortest_path(graph, source=current_loc, target=job['source'], weight='length')
            cost_s, time_at_source = get_path_cost_risk_aware(path_to_source, current_time, weather_forecast, gnn, vehicle['type'], weather_aware)

            path_to_dest = nx.shortest_path(graph, source=job['source'], target=job['destination'], weight='length')
            cost_d, time_at_dest = get_path_cost_risk_aware(path_to_dest, time_at_source, weather_forecast, gnn, vehicle['type'], weather_aware)

            total_cost += cost_s + cost_d
            current_loc = job['destination']
            current_time = time_at_dest

    return (total_cost,) # DEAP requires the fitness to be a tuple

# --- 3. Genetic Algorithm Optimizer ---
# Clean up any existing DEAP classes to allow for fresh definition
if hasattr(creator, "FitnessMin"): del creator.FitnessMin
if hasattr(creator, "Individual"): del creator.Individual

creator.create("FitnessMin", base.Fitness, weights=(-1.0,))
creator.create("Individual", list, fitness=creator.FitnessMin)

def run_ga_optimizer(jobs, states, weather_forecast, gnn_model, graph, weather_aware=True):
    """
    Sets up and runs the single-objective Genetic Algorithm to find the best job permutation
    based on a risk-adjusted time cost.
    """
    print(f"--- Running Optimizer: Single-Objective GA (GNN: {gnn_model is not None}, Weather-Aware: {weather_aware}) ---")
    toolbox = base.Toolbox()

    job_ids = [job['id'] for job in jobs]
    toolbox.register("indices", random.sample, job_ids, len(job_ids))
    toolbox.register("individual", tools.initIterate, creator.Individual, toolbox.indices)
    toolbox.register("population", tools.initRepeat, list, toolbox.individual)
    toolbox.register("evaluate", evaluate_solution_risk_aware, jobs=jobs, states=states,
                     weather_forecast=weather_forecast, gnn=gnn_model, graph=graph, weather_aware=weather_aware)
    toolbox.register("mate", tools.cxOrdered)
    toolbox.register("mutate", tools.mutShuffleIndexes, indpb=0.05)
    toolbox.register("select", tools.selTournament, tournsize=3)

    pop = toolbox.population(n=80) # Use a slightly larger population for the complex problem
    hof = tools.HallOfFame(1)
    algorithms.eaSimple(pop, toolbox, cxpb=0.7, mutpb=0.2, ngen=60, halloffame=hof, verbose=False)

    best_solution_ids = list(hof[0])
    plan = [[] for _ in states]
    for i, job_id in enumerate(best_solution_ids):
        plan[i % len(states)].append(job_id)

    print("‚úÖ GA optimization complete.")
    return plan, best_solution_ids

# --- 4. Static Baseline Optimizer (Unchanged) ---
def run_static_optimizer(jobs, states, graph):
    """
    A simple, non-intelligent baseline optimizer. It assigns jobs in a fixed
    round-robin order without considering any dynamic factors.
    """
    print("--- Running Optimizer: Static Baseline (Round-Robin) ---")
    job_ids = sorted([j['id'] for j in jobs])
    plan = [[] for _ in states]
    for i, job_id in enumerate(job_ids):
        plan[i % len(states)].append(job_id)
    print("‚úÖ Static optimization complete.")
    return plan, job_ids

In [None]:
from deap import base, creator, tools, algorithms
import networkx as nx
from datetime import timedelta
import copy
from math import sqrt

# --- 1. SETUP FOR MULTI-OBJECTIVE OPTIMIZATION (DEAP) ---
# We define a new fitness class that seeks to minimize all three objectives simultaneously.
# The weights are negative because DEAP's evolutionary algorithms are typically set up to maximize.
if hasattr(creator, "FitnessMulti"):
    del creator.FitnessMulti
if hasattr(creator, "IndividualMulti"):
    del creator.IndividualMulti

creator.create("FitnessMulti", base.Fitness, weights=(-1.0, -1.0, -1.0)) # Corresponds to (Time, Cost, Risk)
creator.create("IndividualMulti", list, fitness=creator.FitnessMulti)

# --- 2. MULTI-OBJECTIVE FITNESS EVALUATION FUNCTIONS ---
# --- CORRECTED MULTI-OBJECTIVE PATH COST FUNCTION ---

def get_path_cost_multi_objective(path, start_time, weather_df, gnn_model, vehicle_type, graph):
    """
    Calculates the three objective costs for a single path (sequence of edges).
    This function is the core evaluation unit for a vehicle's task.
    Returns: (total_time, total_variance, total_fuel, end_time)
    """
    total_time, total_variance, total_fuel = 0, 0, 0
    current_time = start_time
    for i in range(len(path) - 1):
        u, v = path[i], path[i+1]
        
        # --- THIS IS THE FIX ---
        # We must explicitly create a dictionary that includes the start and end nodes,
        # as the downstream physics function requires them for closure checks.
        edge_data = {**graph.edges[u,v], 'start_node': u, 'end_node': v}
        # ---------------------

        weather = get_weather_at_time(current_time, weather_df)
        soil = 2 if not edge_data['paved'] and weather['rain_intensity'] > 1 else 0

        # Objective 1 & 3 (Time & Risk) are predicted by the GNN
        pred_mean, pred_var = predict_travel_time_with_gnn(
            u, v, current_time, weather_df, gnn_model, vehicle_type, soil, weather_aware=True)

        # Objective 2 (Cost/Fuel) is estimated by the physics engine using the GNN's predicted time
        _, _, fuel = calculate_ground_truth_travel(
            edge_data, 1, weather, vehicle_type, current_time, soil)

        total_time += pred_mean
        total_variance += pred_var
        total_fuel += fuel
        current_time += timedelta(seconds=pred_mean) # Update clock for the next leg

    return total_time, total_variance, total_fuel, current_time
    
def evaluate_multi_objective(individual, jobs, states, weather, gnn, graph):
    """
    The main fitness function for NSGA-II. It takes a complete plan ('individual') and
    returns a tuple of the three objective values: (total_makespan, total_fuel, total_risk).
    """
    job_map = {job['id']: job for job in jobs}
    # Create a deep copy of states to avoid modifying the original list
    vehicles = copy.deepcopy(states)

    # Assign jobs to vehicles based on the permutation in the individual
    vehicle_routes = [[] for _ in vehicles]
    for i, job_id in enumerate(individual):
        vehicle_routes[i % len(vehicles)].append(job_map[job_id])

    # Initialize objective counters
    vehicle_finish_times = [v['available_time'] for v in vehicles]
    total_fuel_cost = 0
    total_risk_score = 0 # Using sum of variances as a proxy for total plan uncertainty

    # Evaluate each vehicle's assigned route
    for i, route in enumerate(vehicle_routes):
        vehicle = vehicles[i]
        current_loc = vehicle['location']
        current_time = vehicle_finish_times[i]

        for job in route:
            # Path 1: From current location to job source
            path_s = nx.shortest_path(graph, source=current_loc, target=job['source'], weight='length')
            time_s, var_s, fuel_s, time_at_source = get_path_cost_multi_objective(path_s, current_time, weather, gnn, vehicle['type'], graph)

            # Path 2: From job source to job destination
            path_d = nx.shortest_path(graph, source=job['source'], target=job['destination'], weight='length')
            time_d, var_d, fuel_d, time_at_dest = get_path_cost_multi_objective(path_d, time_at_source, weather, gnn, vehicle['type'], graph)

            total_fuel_cost += fuel_s + fuel_d
            total_risk_score += var_s + var_d
            current_loc = job['destination']
            current_time = time_at_dest

        # Update this vehicle's final finish time
        vehicle_finish_times[i] = current_time

    # Objective 1: Makespan is the time the last vehicle finishes its last job
    final_finish_time = max(vehicle_finish_times)
    makespan_seconds = (final_finish_time - states[0]['available_time']).total_seconds()

    # Return the three objectives as a tuple, which DEAP will assign to the individual's fitness
    return makespan_seconds / 3600.0, total_fuel_cost, total_risk_score

# --- 3. NSGA-II OPTIMIZER FUNCTION ---
def run_nsga2_optimizer(jobs, states, weather, gnn, graph):
    """
    Sets up and runs the NSGA-II multi-objective algorithm.
    """
    print("--- Running Optimizer: Multi-Objective NSGA-II ---")
    toolbox = base.Toolbox()

    # Define genetic operators for individuals that are lists of job IDs
    job_ids = [job['id'] for job in jobs]
    toolbox.register("indices", random.sample, job_ids, len(job_ids))
    toolbox.register("individual", tools.initIterate, creator.IndividualMulti, toolbox.indices)
    toolbox.register("population", tools.initRepeat, list, toolbox.individual)

    # Register the multi-objective evaluation function
    toolbox.register("evaluate", evaluate_multi_objective, jobs=jobs, states=states, weather=weather, gnn=gnn, graph=graph)

    # Register the genetic operators
    toolbox.register("mate", tools.cxOrdered)
    toolbox.register("mutate", tools.mutShuffleIndexes, indpb=0.05)
    toolbox.register("select", tools.selNSGA2) # Use the specific NSGA-II selection operator

    # Run the algorithm
    # Use a larger population and more generations for better exploration of the trade-off space
    pop = toolbox.population(n=100)
    hof = tools.ParetoFront() # The Hall of Fame object stores the non-dominated solutions (the Pareto front)

    # Use a standard evolutionary algorithm structure compatible with NSGA-II
    # mu = population size, lambda_ = number of children to produce each generation
    algorithms.eaMuPlusLambda(pop, toolbox, mu=100, lambda_=100, cxpb=0.7, mutpb=0.2, ngen=70,
                               stats=None, halloffame=hof, verbose=False)

    print(f"‚úÖ NSGA-II optimization complete. Found {len(hof)} optimal trade-off solutions on the Pareto front.")
    return hof # Return the entire Pareto front for analysis and visualization

In [None]:
import copy
from datetime import datetime, timedelta
import networkx as nx

def run_adaptive_simulation(initial_plan_ids, jobs, initial_states, graph, ground_truth_weather,
                            optimizer_func, gnn_model, weather_aware, events):
    """
    Simulates the real-time execution of a logistics plan, reacting to events by re-planning.
    This is the core of the "Digital Twin" functionality.
    """
    re_planning_enabled = optimizer_func is not None
    model_name = "Adaptive" if re_planning_enabled else "Static Plan"
    print(f"\nüöÄ LAUNCHING SIMULATION ({model_name} Mode) üöÄ")

    # --- Simulation State Initialization ---
    vehicles = copy.deepcopy(initial_states)
    for v in vehicles:
        v.update({'status': 'idle', 'path': [], 'edge_finish_time': None, 'current_job': None})

    job_map = {j['id']: j for j in jobs}
    job_queues = {v['id']: [] for v in vehicles}
    for i, job_id in enumerate(initial_plan_ids):
        vehicle_id = vehicles[i % len(vehicles)]['id']
        job_queues[vehicle_id].append(job_map[job_id])

    completed_jobs = set()
    current_time = initial_states[0]['available_time']
    time_step = timedelta(seconds=60) # Use a slightly larger time step for faster simulation
    active_edge_usage = {(u, v): 0 for u, v in graph.edges()}
    system_graph = graph
    system_forecast = ground_truth_weather[current_time : current_time + timedelta(hours=8)]

    # --- Event Tracking ---
    events.sort(key=lambda e: e['time'])
    next_event_idx = 0

    # --- Main Simulation Loop ---
    while len(completed_jobs) < len(jobs):
        # 1. EVENT TRIGGER CHECK (Logic is unchanged)
        if re_planning_enabled and next_event_idx < len(events) and current_time >= events[next_event_idx]['time']:
            event = events[next_event_idx]
            print(f"\nüîî EVENT @ {current_time.strftime('%H:%M:%S')}: {event['description']} üîî")
            next_event_idx += 1
            if 'forecast_update' in event: system_forecast = event['forecast_update']
            if 'graph_update' in event: system_graph = event['graph_update']
            
            print("   ‚ñ∂ Pausing simulation for autonomous re-planning...")
            in_progress_jobs = {v['current_job']['id'] for v in vehicles if v.get('current_job')}
            queued_jobs = [job for queue in job_queues.values() for job in queue]
            jobs_to_replan = [job for job in queued_jobs if job['id'] not in in_progress_jobs]
            
            if jobs_to_replan:
                for v in vehicles: v['available_time'] = current_time
                _, new_plan_ids = optimizer_func(jobs_to_replan, vehicles, system_forecast, gnn_model, system_graph, weather_aware)
                for q in job_queues.values(): q.clear()
                for i, job_id in enumerate(new_plan_ids):
                    vehicle_id = vehicles[i % len(vehicles)]['id']
                    job_queues[vehicle_id].append(job_map[job_id])
                print(f"   ‚úÖ New plan with {len(new_plan_ids)} jobs adopted. Resuming simulation.")
            else:
                print("   ...No queued jobs to replan. Resuming simulation.")
            print("-" * 30)

        # 2. VEHICLE STATE MACHINE LOGIC
        for v in vehicles:
            if v['status'] == 'moving_on_edge' and current_time >= v['edge_finish_time']:
                prev_edge = (v['path'][0], v['path'][1])
                active_edge_usage[prev_edge] = max(0, active_edge_usage[prev_edge] - 1)
                v['location'] = v['path'][1]
                v['path'] = v['path'][1:]
                v['status'] = 'en_route'
                v['edge_finish_time'] = None

        for v in vehicles:
            if v['status'] in ['idle', 'en_route'] and len(v.get('path', [])) <= 1:
                v['path'] = []
                if v.get('current_job') and v['location'] == v['current_job']['destination']:
                    completed_jobs.add(v['current_job']['id'])
                    v['current_job'] = None
                if not v.get('current_job') and job_queues[v['id']]:
                    v['current_job'] = job_queues[v['id']].pop(0)
                    v['path'] = nx.shortest_path(system_graph, source=v['location'], target=v['current_job']['source'], weight='length')
                    v['status'] = 'en_route'
                elif v.get('current_job') and v['location'] == v['current_job']['source']:
                    v['path'] = nx.shortest_path(system_graph, source=v['location'], target=v['current_job']['destination'], weight='length')
                    v['status'] = 'en_route'
                else:
                    v['status'] = 'idle'
        
        for v in vehicles:
            if v['status'] == 'en_route' and len(v['path']) > 1:
                u, next_node = v['path'][0], v['path'][1]
                edge = (u, next_node)
                if not graph.has_edge(u, next_node):
                    v['status'] = 'blocked'
                    continue

                edge_data = {**graph.edges[edge], 'start_node': u, 'end_node': next_node}
                weather_now = get_weather_at_time(current_time, ground_truth_weather)
                soil = 0
                if not edge_data['paved'] and weather_now['rain_intensity'] > 1: soil=2
                
                # --- THIS IS THE CORRECTED LINE ---
                # FIX 1: Use the new function name: calculate_ground_truth_travel
                # FIX 2: Unpack all three return values, even if we only use the first one here.
                mean_time, _, _ = calculate_ground_truth_travel(edge_data, active_edge_usage[edge], weather_now, v['type'], current_time, soil)
                # ------------------------------------

                v['status'] = 'moving_on_edge'
                v['edge_finish_time'] = current_time + timedelta(seconds=mean_time)
                active_edge_usage[edge] += 1

        # 3. ADVANCE SIMULATION CLOCK
        current_time += time_step
        if current_time > initial_states[0]['available_time'] + timedelta(hours=24):
            print(f"üõë SIMULATION TIMEOUT after 24 hours. {len(completed_jobs)}/{len(jobs)} jobs completed.")
            break

    # Calculate final makespan
    last_finish_time = max([v['edge_finish_time'] for v in vehicles if v.get('edge_finish_time')] + [current_time])
    makespan = (last_finish_time - initial_states[0]['available_time']).total_seconds() / 3600.0
    print(f"üèÅ Simulation Finished. Makespan: {makespan:.2f} Hours")
    return makespan

In [None]:
import random
import matplotlib.pyplot as plt
import copy

# --- 1. EXPERIMENT SETUP FOR THE COMPLEX SCENARIO ---
# For reproducibility, set a random seed
random.seed(42)

# --- Define a more diverse and realistic vehicle fleet ---
INITIAL_VEHICLE_STATES = [
    {
        'id': 0, 'type': 'truck', 'location': 'Main_Gate',
        'available_time': SIMULATION_START_TIME
    },
    {
        'id': 1, 'type': 'truck', 'location': 'Contractor_Gate',
        'available_time': SIMULATION_START_TIME
    },
    {
        'id': 2, 'type': 'forklift', 'location': 'Laydown_A',
        'available_time': SIMULATION_START_TIME
    },
]

# --- Define a complex list of jobs that use the new, specialized nodes ---
# A full implementation would require modifying the optimizer to handle multi-stop
# sequences and vehicle requirements. For this runnable example, we simplify by
# breaking multi-stop jobs into a series of single-leg tasks.
COMPLEX_JOBS = [
    {'task': 'Deliver Rebar', 'sequence': [('Laydown_A', 'Building_1_N')]},
    {'task': 'Clear Debris from Pit', 'sequence': [('Foundation_Pit', 'Waste_Disposal')]},
    {'task': 'Move Scaffolding', 'sequence': [('Laydown_B', 'Building_2_W')]},
    {'task': 'Prefab Wall Delivery', 'sequence': [('Fabrication_Yard', 'Building_1_S')]},
    {'task': 'Multi-stop Debris Clearance', 'sequence': [('Building_1_S', 'Waste_Disposal'), ('Building_2_E', 'Waste_Disposal')]},
    {'task': 'Small Tool Delivery', 'sequence': [('Laydown_A', 'Foundation_Pit')]},
    {'task': 'Deliver piping', 'sequence': [('Laydown_A', 'Building_2_W')]},
    {'task': 'Move excess soil', 'sequence': [('Foundation_Pit', 'Waste_Disposal')]},
    {'task': 'Transport fabricated component', 'sequence': [('Fabrication_Yard', 'Building_2_E')]}
]

# Flatten the complex jobs into a simple list that our current optimizer can handle
JOBS_FOR_PLANNING = []
job_counter = 0
for job in COMPLEX_JOBS:
    for u, v in job['sequence']:
        JOBS_FOR_PLANNING.append({'id': job_counter, 'source': u, 'destination': v})
        job_counter += 1

print(f"‚úÖ Complex scenario defined with {len(INITIAL_VEHICLE_STATES)} vehicles and {len(JOBS_FOR_PLANNING)} total tasks.")

# --- 2. DEFINE THE GROUND TRUTH SCENARIO WITH NEW DISRUPTIONS ---
INITIAL_FORECAST = weather_df[SIMULATION_START_TIME : SIMULATION_START_TIME + timedelta(hours=8)]

# The actual weather includes a surprise storm
ground_truth_weather = weather_df.copy()
storm_start = SIMULATION_START_TIME + timedelta(hours=2)
storm_end = SIMULATION_START_TIME + timedelta(hours=4) # A longer storm
ground_truth_weather.loc[storm_start:storm_end, 'rain'] = 8.0
ground_truth_weather.loc[storm_start:storm_end, 'rain_intensity'] = 3
print(f"Disruption 1: A surprise storm will occur between {storm_start.strftime('%H:%M')} and {storm_end.strftime('%H:%M')}.")

# The new forecast, revealing the storm, becomes available at T+45min
updated_forecast = ground_truth_weather[SIMULATION_START_TIME : SIMULATION_START_TIME + timedelta(hours=8)]

# A critical path on the main circulation loop is blocked by a crane at T+3h
broken_down_graph = site_graph.copy()
blocked_edge = ('Building_1_S', 'Main_Gate')
if broken_down_graph.has_edge(*blocked_edge):
    broken_down_graph.remove_edge(*blocked_edge)
    print(f"Disruption 2: Path '{blocked_edge[0]}'->'{blocked_edge[1]}' will be blocked at { (SIMULATION_START_TIME + timedelta(hours=3)).strftime('%H:%M') }.")

# Package disruptions into a list of events for the adaptive simulation
simulation_events = [
    {
        'time': SIMULATION_START_TIME + timedelta(minutes=45),
        'description': "Weather Forecast Update: Major Storm Predicted!",
        'forecast_update': updated_forecast
    },
    {
        'time': SIMULATION_START_TIME + timedelta(hours=3),
        'description': "Site Disruption: Main Gate Access Route Blocked!",
        'graph_update': broken_down_graph
    }
]

# --- 3. RUN ALL MODELS AGAINST THE COMPLEX SCENARIO ---
results = {}

# --- Model 1: Static Baseline ---
static_plan, static_plan_ids = run_static_optimizer(JOBS_FOR_PLANNING, INITIAL_VEHICLE_STATES, site_graph)
results['Static Baseline'] = run_adaptive_simulation(
    static_plan_ids, JOBS_FOR_PLANNING, INITIAL_VEHICLE_STATES, site_graph, ground_truth_weather,
    optimizer_func=None, gnn_model=None, weather_aware=False, events=simulation_events)

# --- Model 2: Weather-Agnostic GNN (Static Plan) ---
agnostic_plan, agnostic_plan_ids = run_ga_optimizer(JOBS_FOR_PLANNING, INITIAL_VEHICLE_STATES, INITIAL_FORECAST, gnn_model_agnostic, site_graph, weather_aware=False)
results['GA-GNN (Agnostic, Static)'] = run_adaptive_simulation(
    agnostic_plan_ids, JOBS_FOR_PLANNING, INITIAL_VEHICLE_STATES, site_graph, ground_truth_weather,
    optimizer_func=None, gnn_model=gnn_model_agnostic, weather_aware=False, events=simulation_events)

# --- Model 3: Weather-Aware GNN (Static Plan) ---
aware_plan_static, aware_plan_static_ids = run_ga_optimizer(JOBS_FOR_PLANNING, INITIAL_VEHICLE_STATES, INITIAL_FORECAST, gnn_model_aware, site_graph, weather_aware=True)
results['GA-GNN (Aware, Static)'] = run_adaptive_simulation(
    aware_plan_static_ids, JOBS_FOR_PLANNING, INITIAL_VEHICLE_STATES, site_graph, ground_truth_weather,
    optimizer_func=None, gnn_model=gnn_model_aware, weather_aware=True, events=simulation_events)

# --- Model 4: Single-Objective Adaptive Digital Twin ---
results['GA-GNN (Aware, Adaptive)'] = run_adaptive_simulation(
    aware_plan_static_ids, JOBS_FOR_PLANNING, INITIAL_VEHICLE_STATES, site_graph, ground_truth_weather,
    optimizer_func=run_ga_optimizer, gnn_model=gnn_model_aware, weather_aware=True, events=simulation_events)

# --- NEW MODEL 5: MULTI-OBJECTIVE ADAPTIVE DIGITAL TWIN (THE CHAMPION) ---
print("\n--- Generating Initial Plan from Pareto Front ---")
# Find the "Fastest" plan from the Pareto front generated earlier to use as our initial plan.
# If the pareto_front is not yet generated, run the optimizer now.
if 'pareto_front' not in locals():
    pareto_front = run_nsga2_optimizer(JOBS_FOR_PLANNING, INITIAL_VEHICLE_STATES, INITIAL_FORECAST, gnn_model_aware, site_graph)

if pareto_front:
    solutions = np.array([list(ind.fitness.values) for ind in pareto_front])
    fastest_idx = np.argmin(solutions[:, 0])
    # This is the list of job IDs for the fastest plan.
    fastest_plan_ids = list(pareto_front[fastest_idx])
    print(f"Selected 'Fastest' plan from Pareto front as the initial plan.")

    # Define a wrapper function for re-planning that matches the expected signature
    def replan_with_nsga2_fastest(jobs, states, weather, gnn, graph, weather_aware):
        # The adaptive simulation harness needs a function that returns a single best plan.
        # This wrapper runs NSGA-II and selects the "fastest" plan from the new Pareto front.
        hof = run_nsga2_optimizer(jobs, states, weather, gnn, graph)
        if not hof: return [], [] # Return empty if no solution found
        new_solutions = np.array([list(ind.fitness.values) for ind in hof])
        new_fastest_idx = np.argmin(new_solutions[:, 0])
        best_plan_ids = list(hof[new_fastest_idx])
        # The optimizer function is expected to return (plan, plan_ids)
        dummy_plan = [[] for _ in states]
        for i, job_id in enumerate(best_plan_ids):
            dummy_plan[i % len(states)].append(job_id)
        return dummy_plan, best_plan_ids

    results['Multi-Obj (Fastest, Adaptive)'] = run_adaptive_simulation(
        fastest_plan_ids, JOBS_FOR_PLANNING, INITIAL_VEHICLE_STATES, site_graph, ground_truth_weather,
        optimizer_func=replan_with_nsga2_fastest, gnn_model=gnn_model_aware, weather_aware=True, events=simulation_events)
else:
    print("WARNING: Pareto front is empty. Skipping Multi-Objective Champion model.")


# --- 4. ANALYZE AND PLOT FINAL RESULTS ---
sorted_results = sorted(results.items(), key=lambda item: item[1])
print("\n\n" + "="*60); print("--- üìä FINAL PERFORMANCE COMPARISON üìä ---"); print("="*60)
for name, makespan in sorted_results:
    print(f"{name:<35}: {makespan:.2f} Hours")
print("="*60)

labels = [name for name, score in sorted_results]
scores = [score for name, score in sorted_results]
colors = ['grey', 'lightcoral', 'skyblue', 'darkorange', 'forestgreen'] # Added new colors

plt.figure(figsize=(14, 8))
bars = plt.barh(labels, scores, color=colors[:len(labels)])
plt.xlabel("Total Project Makespan (Hours)"); plt.ylabel("Planning & Control Model")
plt.title("Figure: Final Model Performance in Disrupted Scenario", fontsize=16)
plt.gca().invert_yaxis(); plt.tight_layout()

for i, bar in enumerate(bars):
    width = bar.get_width()
    plt.text(width + 0.05, bar.get_y() + bar.get_height()/2, f'{width:.2f} hrs', ha='left', va='center', fontsize=11, weight='bold')

try:
    baseline_score = results['GA-GNN (Aware, Static)']
    champion_score = results['Multi-Obj (Fastest, Adaptive)']
    improvement = ((baseline_score - champion_score) / baseline_score) * 100
    print(f"\nüèÜ The adaptive multi-objective model showed a {improvement:.2f}% improvement over the best static plan.")
except (KeyError, ZeroDivisionError):
    print("\nCould not calculate final improvement metric.")

plt.xlim(right=max(scores) * 1.20)
plt.show()

In [None]:
import random
import matplotlib.pyplot as plt
import copy
import torch
import numpy as np

# --- 0. PRE-REQUISITE: ENSURE MODELS AND TENSORS ARE ON GPU ---
# This part is crucial. It should be run after GNN training (Chunk 5) and before this chunk.
# It prepares all the necessary components for fast GPU-based optimization.

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"--- Preparing for CUDA-based Experiment on device: {device} ---")

# Move GNN models to the specified device
gnn_model_aware.to(device)
gnn_model_agnostic.to(device)

# Move static graph tensors to the device. These will be reused many times.
static_feats_gpu = static_feats.to(device)
edge_idx_gpu = edge_idx.to(device)
node_identity_gpu = torch.eye(len(node_map)).to(device)


# --- 1. EXPERIMENT SETUP (COMPLEX SCENARIO, UNCHANGED) ---
random.seed(42)
# ... (Vehicle and Job definitions remain the same as the previous complex version) ...
INITIAL_VEHICLE_STATES = [
    {'id': 0, 'type': 'truck', 'location': 'Main_Gate', 'available_time': SIMULATION_START_TIME},
    {'id': 1, 'type': 'truck', 'location': 'Contractor_Gate', 'available_time': SIMULATION_START_TIME},
    {'id': 2, 'type': 'forklift', 'location': 'Laydown_A', 'available_time': SIMULATION_START_TIME},
]
COMPLEX_JOBS = [
    {'task': 'Deliver Rebar', 'sequence': [('Laydown_A', 'Building_1_N')]},
    {'task': 'Clear Debris from Pit', 'sequence': [('Foundation_Pit', 'Waste_Disposal')]},
    {'task': 'Move Scaffolding', 'sequence': [('Laydown_B', 'Building_2_W')]},
    {'task': 'Prefab Wall Delivery', 'sequence': [('Fabrication_Yard', 'Building_1_S')]},
    {'task': 'Multi-stop Debris Clearance', 'sequence': [('Building_1_S', 'Waste_Disposal'), ('Building_2_E', 'Waste_Disposal')]},
    {'task': 'Small Tool Delivery', 'sequence': [('Laydown_A', 'Foundation_Pit')]},
    {'task': 'Deliver piping', 'sequence': [('Laydown_A', 'Building_2_W')]},
    {'task': 'Move excess soil', 'sequence': [('Foundation_Pit', 'Waste_Disposal')]},
    {'task': 'Transport fabricated component', 'sequence': [('Fabrication_Yard', 'Building_2_E')]}
]
JOBS_FOR_PLANNING = []
job_counter = 0
for job in COMPLEX_JOBS:
    for u, v in job['sequence']:
        JOBS_FOR_PLANNING.append({'id': job_counter, 'source': u, 'destination': v})
        job_counter += 1

print(f"‚úÖ Complex scenario defined with {len(INITIAL_VEHICLE_STATES)} vehicles and {len(JOBS_FOR_PLANNING)} total tasks.")


# --- 2. DISRUPTION SCENARIO DEFINITION (UNCHANGED) ---
# ... (Event definitions remain the same) ...
INITIAL_FORECAST = weather_df[SIMULATION_START_TIME : SIMULATION_START_TIME + timedelta(hours=8)]
ground_truth_weather = weather_df.copy()
storm_start = SIMULATION_START_TIME + timedelta(hours=2)
storm_end = SIMULATION_START_TIME + timedelta(hours=4)
ground_truth_weather.loc[storm_start:storm_end, 'rain'] = 8.0
ground_truth_weather.loc[storm_start:storm_end, 'rain_intensity'] = 3
print(f"Disruption 1: Surprise storm will occur between {storm_start.strftime('%H:%M')} and {storm_end.strftime('%H:%M')}.")

updated_forecast = ground_truth_weather[SIMULATION_START_TIME : SIMULATION_START_TIME + timedelta(hours=8)]

broken_down_graph = site_graph.copy()
blocked_edge = ('Building_1_S', 'Main_Gate')
if broken_down_graph.has_edge(*blocked_edge):
    broken_down_graph.remove_edge(*blocked_edge)
    print(f"Disruption 2: Path '{blocked_edge[0]}'->'{blocked_edge[1]}' will be blocked at { (SIMULATION_START_TIME + timedelta(hours=3)).strftime('%H:%M') }.")

simulation_events = [
    {'time': SIMULATION_START_TIME + timedelta(minutes=45), 'description': "Weather Forecast Update!", 'forecast_update': updated_forecast},
    {'time': SIMULATION_START_TIME + timedelta(hours=3), 'description': "Path Blockage!", 'graph_update': broken_down_graph}
]


# --- 3. RUN ALL MODELS AGAINST THE COMPLEX SCENARIO (USING CUDA OPTIMIZERS) ---
results = {}

# NOTE: The adaptive simulation harness (Chunk 7) does not need to change.
# It simply calls whatever optimizer function we pass to it.
# The `run_static_optimizer` is CPU-only and needs no changes.

# --- Model 1: Static Baseline (CPU) ---
static_plan, static_plan_ids = run_static_optimizer(JOBS_FOR_PLANNING, INITIAL_VEHICLE_STATES, site_graph)
results['Static Baseline'] = run_adaptive_simulation(
    static_plan_ids, JOBS_FOR_PLANNING, INITIAL_VEHICLE_STATES, site_graph, ground_truth_weather,
    optimizer_func=None, gnn_model=None, weather_aware=False, events=simulation_events)

# --- Model 2 & 3: Single-Objective Static Plans (Can use CPU or a future GPU version of GA) ---
# We'll use the original CPU-based `run_ga_optimizer` for these static, one-off plans, as the overhead
# of setting up GPU batching for a single run is not always worth it.
aware_plan_static, aware_plan_static_ids = run_ga_optimizer(JOBS_FOR_PLANNING, INITIAL_VEHICLE_STATES, INITIAL_FORECAST, gnn_model_aware.to('cpu'), site_graph, weather_aware=True)
gnn_model_aware.to(device) # Move model back to GPU for adaptive runs
results['GA-GNN (Aware, Static)'] = run_adaptive_simulation(
    aware_plan_static_ids, JOBS_FOR_PLANNING, INITIAL_VEHICLE_STATES, site_graph, ground_truth_weather,
    optimizer_func=None, gnn_model=gnn_model_aware, weather_aware=True, events=simulation_events)


# --- NEW MODEL 4: MULTI-OBJECTIVE ADAPTIVE DIGITAL TWIN (THE CHAMPION, ON CUDA) ---
print("\n--- Generating Initial Plan from Pareto Front using GPU Optimizer ---")

# Use the fast, GPU-batched NSGA-II optimizer (assuming it's defined in Chunk 6B)
# If you haven't written the fully batched version, you can use the cached CPU version here.
# For this example, we assume `run_nsga2_optimizer_gpu` exists.
# If not, fallback to the CPU version: `run_nsga2_optimizer`
optimizer_to_use = run_nsga2_optimizer # Fallback to CPU version if GPU one not implemented
if 'run_nsga2_optimizer_gpu' in locals():
    optimizer_to_use = run_nsga2_optimizer_gpu

pareto_front = optimizer_to_use(JOBS_FOR_PLANNING, INITIAL_VEHICLE_STATES, INITIAL_FORECAST, gnn_model_aware, site_graph)

if pareto_front:
    solutions = np.array([list(ind.fitness.values) for ind in pareto_front])
    fastest_idx = np.argmin(solutions[:, 0])
    fastest_plan_ids = list(pareto_front[fastest_idx])
    print(f"Selected 'Fastest' plan from Pareto front as the initial plan.")

    # Define a wrapper for re-planning that uses the same GPU optimizer
    def replan_with_nsga2_fastest_gpu(jobs, states, weather, gnn, graph, weather_aware):
        hof = optimizer_to_use(jobs, states, weather, gnn, graph)
        if not hof: return [], []
        new_solutions = np.array([list(ind.fitness.values) for ind in hof])
        new_fastest_idx = np.argmin(new_solutions[:, 0])
        best_plan_ids = list(hof[new_fastest_idx])
        dummy_plan = [[] for _ in states]
        for i, job_id in enumerate(best_plan_ids):
            dummy_plan[i % len(states)].append(job_id)
        return dummy_plan, best_plan_ids

    results['Multi-Obj (Fastest, Adaptive, CUDA)'] = run_adaptive_simulation(
        fastest_plan_ids, JOBS_FOR_PLANNING, INITIAL_VEHICLE_STATES, site_graph, ground_truth_weather,
        optimizer_func=replan_with_nsga2_fastest_gpu, gnn_model=gnn_model_aware, weather_aware=True, events=simulation_events)
else:
    print("WARNING: Pareto front is empty. Skipping Multi-Objective Champion model.")


# --- 4. ANALYZE AND PLOT FINAL RESULTS ---
# This part is unchanged as it only deals with the final numbers.
sorted_results = sorted(results.items(), key=lambda item: item[1])
print("\n\n" + "="*60); print("--- üìä FINAL PERFORMANCE COMPARISON üìä ---"); print("="*60)
for name, makespan in sorted_results:
    print(f"{name:<40}: {makespan:.2f} Hours")
print("="*60)

labels = [name for name, score in sorted_results]
scores = [score for name, score in sorted_results]
colors = ['grey', 'skyblue', 'forestgreen'] # Simplified colors

plt.figure(figsize=(14, 8))
bars = plt.barh(labels, scores, color=colors[:len(labels)])
plt.xlabel("Total Project Makespan (Hours)"); plt.ylabel("Planning & Control Model")
plt.title("Figure: Final Model Performance in Disrupted Scenario (CUDA Accelerated)", fontsize=16)
plt.gca().invert_yaxis(); plt.tight_layout()

for i, bar in enumerate(bars):
    width = bar.get_width()
    plt.text(width + 0.05, bar.get_y() + bar.get_height()/2, f'{width:.2f} hrs', ha='left', va='center', fontsize=11, weight='bold')

try:
    baseline_score = results['GA-GNN (Aware, Static)']
    champion_score = results['Multi-Obj (Fastest, Adaptive, CUDA)']
    improvement = ((baseline_score - champion_score) / baseline_score) * 100
    print(f"\nüèÜ The CUDA-accelerated adaptive model showed a {improvement:.2f}% improvement over the best static plan.")
except (KeyError, ZeroDivisionError):
    print("\nCould not calculate final improvement metric.")

plt.xlim(right=max(scores) * 1.20 if scores else 1)
plt.show()

# --- 5. CLEANUP: Move models back to CPU if needed for other tasks ---
gnn_model_aware.to('cpu')
gnn_model_agnostic.to('cpu')
print("\nModels moved back to CPU.")

In [None]:
from mpl_toolkits.mplot3d import Axes3D
import numpy as np
import matplotlib.pyplot as plt

# --- 1. RUN THE MULTI-OBJECTIVE OPTIMIZER TO GET THE PARETO FRONT ---
# This function is defined in Chunk 6B. We run it on the initial planning scenario
# to generate the set of optimal trade-off solutions for analysis.
pareto_front = run_nsga2_optimizer(JOBS_FOR_PLANNING, INITIAL_VEHICLE_STATES, INITIAL_FORECAST, gnn_model_aware, site_graph)

# --- 2. EXTRACT AND ANALYZE THE PARETO SOLUTIONS ---
if pareto_front:
    # Extract the fitness values (Time, Cost, Risk) from each non-dominated solution
    solutions = np.array([list(ind.fitness.values) for ind in pareto_front])

    # Separate the objectives into individual arrays for analysis and plotting
    times = solutions[:, 0]  # Makespan in Hours
    costs = solutions[:, 1]  # Total Fuel in Liters
    risks = solutions[:, 2]  # Sum of Variances (Risk Score)

    # Identify three key representative solutions from the front for discussion
    # These highlight the extremes of the trade-off space
    fastest_idx = np.argmin(times)
    cheapest_idx = np.argmin(costs)
    safest_idx = np.argmin(risks)

    best_time_solution = solutions[fastest_idx]
    best_cost_solution = solutions[cheapest_idx]
    best_risk_solution = solutions[safest_idx]

    print("\n" + "="*50)
    print("--- Analysis of Representative Optimal Solutions ---")
    print("="*50)
    print(f"Fastest Plan (Greedy):")
    print(f"  Makespan: {best_time_solution[0]:.2f} hrs | Fuel Cost: {best_time_solution[1]:.2f} L | Risk Score: {best_time_solution[2]:.2f}")
    print("\n")
    print(f"Cheapest Plan (Economical):")
    print(f"  Makespan: {best_cost_solution[0]:.2f} hrs | Fuel Cost: {best_cost_solution[1]:.2f} L | Risk Score: {best_cost_solution[2]:.2f}")
    print("\n")
    print(f"Safest Plan (Reliable/Low-Risk):")
    print(f"  Makespan: {best_risk_solution[0]:.2f} hrs | Fuel Cost: {best_risk_solution[1]:.2f} L | Risk Score: {best_risk_solution[2]:.2f}")
    print("="*50)


    # --- 3. 3D VISUALIZATION OF THE PARETO FRONT ---
    fig = plt.figure(figsize=(14, 12))
    ax = fig.add_subplot(111, projection='3d')

    # Create the 3D scatter plot of all solutions on the Pareto front
    # The color of each point is mapped to its makespan value for an extra visual cue
    scatter = ax.scatter(times, costs, risks, c=times, cmap='viridis', s=60, alpha=0.8, edgecolors='k', linewidth=0.5)

    # Highlight the representative solutions with larger markers and labels
    ax.scatter(best_time_solution[0], best_time_solution[1], best_time_solution[2], c='red', s=200, marker='*', label='Fastest')
    ax.text(best_time_solution[0]*1.01, best_time_solution[1]*1.01, best_time_solution[2]*1.01, "Fastest", color='red', fontsize=12)

    ax.scatter(best_cost_solution[0], best_cost_solution[1], best_cost_solution[2], c='blue', s=200, marker='P', label='Cheapest')
    ax.text(best_cost_solution[0]*1.01, best_cost_solution[1]*1.01, best_cost_solution[2]*1.01, "Cheapest", color='blue', fontsize=12)

    ax.scatter(best_risk_solution[0], best_risk_solution[1], best_risk_solution[2], c='green', s=200, marker='D', label='Safest')
    ax.text(best_risk_solution[0]*1.01, best_risk_solution[1]*1.01, best_risk_solution[2]*1.01, "Safest", color='green', fontsize=12)

    # Set titles and labels for clarity
    ax.set_title('Pareto Front: Optimal Trade-offs (Time vs. Cost vs. Risk)', fontsize=18, pad=20)
    ax.set_xlabel('\nMakespan (Hours) - Lower is Better', fontsize=12)
    ax.set_ylabel('\nTotal Fuel (Liters) - Lower is Better', fontsize=12)
    ax.set_zlabel('\nLateness Risk Score - Lower is Better', fontsize=12)

    # Invert axes so that the "best" point (0,0,0) is visually in the corner, which is more intuitive
    ax.invert_xaxis()
    ax.invert_yaxis()
    ax.invert_zaxis()

    # Add a color bar to explain the color mapping
    cbar = fig.colorbar(scatter, shrink=0.6, aspect=20, pad=0.01)
    cbar.set_label('Makespan (Hours)', fontsize=12)

    ax.legend(fontsize=12)
    plt.show()

    # NOTE: To complete the paper's final experiment, you would now choose one of these plans
    # (e.g., the 'Fastest Plan') and pass its job sequence to the adaptive simulation harness
    # in Chunk 8. This would allow you to have a bar on your final chart labeled
    # "Adaptive Multi-Objective (Fastest)", comparing its final makespan against the others.
else:
    print("Execution Warning: No solutions were found by the NSGA-II optimizer. The Pareto front is empty.")
    print("This can happen with very few generations or a very difficult problem. Consider increasing `ngen` in Chunk 6B.")

In [None]:
# Install graphviz if you don't have it
!pip install graphviz -q

import graphviz

# Create a new directed graph
dot = graphviz.Digraph('system_workflow', comment='System Architecture')
dot.attr(rankdir='TB', label='System Architecture for Adaptive Construction Logistics', labelloc='t', fontsize='20')

# Define styles for different node types
styles = {
    'data': {'shape': 'parallelogram', 'style': 'filled', 'fillcolor': 'lightblue'},
    'process': {'shape': 'ellipse', 'style': 'filled', 'fillcolor': 'lightgrey'},
    'model': {'shape': 'box', 'style': 'filled', 'fillcolor': 'lightyellow'},
    'decision': {'shape': 'diamond', 'style': 'filled', 'fillcolor': 'lightpink'},
    'output': {'shape': 'folder', 'style': 'filled', 'fillcolor': 'lightgreen'}
}

# Define the nodes of the flowchart
dot.node('A', 'Site Layout & 4D Schedule', **styles['data'])
dot.node('B', 'Historical Weather Data (ERA5)', **styles['data'])
dot.node('C', 'Synthetic Data Generator (Physics Engine)', **styles['process'])
dot.node('D', 'Training Dataset (Travel Times & Conditions)', **styles['data'])
dot.node('E', 'GNN Model Training', **styles['model'])
dot.node('F', 'Trained Probabilistic GNN (Travel Time Predictor)', **styles['model'])

dot.node('G', 'Real-Time Forecast & Site State', **styles['data'])
dot.node('H', 'Job List & Vehicle Status', **styles['data'])
dot.node('I', 'Genetic Algorithm Optimizer', **styles['process'])
dot.node('J', 'Is Re-planning Triggered?', **styles['decision'])
dot.node('K', 'Execute Current Plan', **styles['process'])
dot.node('L', 'Optimized & Adapted Route Plan', **styles['output'])

# Define the edges (connections) between the nodes
dot.edge('A', 'C')
dot.edge('B', 'C')
dot.edge('C', 'D')
dot.edge('D', 'E')
dot.edge('E', 'F')

# The core optimization loop
subgraph = graphviz.Digraph('cluster_0')
subgraph.attr(style='filled', color='whitesmoke', label='Online Digital Twin Operation')
subgraph.edge('G', 'J', label='Event (e.g., Forecast Update)')
subgraph.edge('H', 'I')
subgraph.edge('F', 'I', label='GNN provides\ncost estimates')
subgraph.edge('I', 'L')
subgraph.edge('L', 'K')
subgraph.edge('K', 'H', label='Update vehicle status')
subgraph.edge('J', 'I', label='Yes')
subgraph.edge('J', 'K', label='No')
dot.subgraph(subgraph)


print("Generating flowchart... (this will create a file named 'system_workflow.pdf')")
# Render the flowchart
dot.render('system_workflow', format='png', view=False)

# Display the image in the notebook
from IPython.display import Image
Image('system_workflow.png')

import pandas as pd

# Create a list of dictionaries describing each feature
feature_data = [
    {'Feature Name': 'Edge Length', 'Type': 'Static', 'Description': 'Physical length of the path in meters.', 'Example': '40.0'},
    {'Feature Name': 'Edge Slope', 'Type': 'Static', 'Description': 'Gradient of the path (%).', 'Example': '2.0'},
    {'Feature Name': 'Paved', 'Type': 'Static', 'Description': 'Binary flag (1 if paved, 0 if not).', 'Example': '1'},
    {'Feature Name': 'Time of Day', 'Type': 'Dynamic', 'Description': 'Hour of the day (0-23).', 'Example': '14.5'},
    {'Feature Name': 'Congestion', 'Type': 'Dynamic', 'Description': 'Number of vehicles on the edge.', 'Example': '2'},
    {'Feature Name': 'Rain Intensity', 'Type': 'Dynamic (Weather)', 'Description': 'Categorical (0:None, 1:Light, 2:Mod, 3:Heavy).', 'Example': '2'},
    {'Feature Name': 'Heat Stress', 'Type': 'Dynamic (Weather)', 'Description': 'Categorical (0:Norm, 1:High, 2:V.High).', 'Example': '1'},
    {'Feature Name': 'Wind Hazard', 'Type': 'Dynamic (Weather)', 'Description': 'Binary flag (1 if hazardous winds).', 'Example': '0'},
    {'Feature Name': 'Soil Condition', 'Type': 'Dynamic (Site)', 'Description': 'Categorical (0:Dry, 1:Damp, 2:Muddy).', 'Example': '2'},
    {'Feature Name': 'Vehicle Type', 'Type': 'Dynamic (Task)', 'Description': 'Binary flag (1 if truck, 0 if forklift).', 'Example': '1'},
]

# Create a Pandas DataFrame
features_df = pd.DataFrame(feature_data)

print("--- Table 1: GNN Input Features ---")
# Use to_markdown() for a clean, paper-ready format
print(features_df.to_markdown(index=False))

In [None]:
import torch
from sklearn.metrics import mean_absolute_error, r2_score

# Create a test set from the generated data
test_size = int(0.2 * len(dataset_df))
test_df = dataset_df.iloc[-test_size:]

actual_means = []
predicted_means = []

gnn_model_aware.eval()
with torch.no_grad():
    for _, record in test_df.iterrows():
        # Get actual value
        actual_means.append(record['mean_travel_time'])

        # Get GNN prediction
        pred_mean, _ = predict_travel_time_with_gnn(
            u_node=record['edge_start_node'], v_node=record['edge_end_node'],
            arrival_time=SIMULATION_START_TIME + timedelta(hours=record['time_of_day']),
            weather_df=weather_df, gnn_model=gnn_model_aware,
            vehicle_type='truck' if record['vehicle_type_truck'] else 'forklift',
            soil_condition=record['soil_condition'], weather_aware=True
        )
        predicted_means.append(pred_mean)

# Calculate performance metrics
mae = mean_absolute_error(actual_means, predicted_means)
r2 = r2_score(actual_means, predicted_means)

# Create the scatter plot
plt.figure(figsize=(8, 8))
plt.scatter(actual_means, predicted_means, alpha=0.3, label='Predictions')
# Add the 'perfect prediction' line
plt.plot([min(actual_means), max(actual_means)], [min(actual_means), max(actual_means)], 'r--', lw=2, label='Perfect Prediction')

plt.title('GNN Prediction Accuracy on Test Set', fontsize=16)
plt.xlabel('Actual Travel Time (seconds)', fontsize=12)
plt.ylabel('Predicted Travel Time (seconds)', fontsize=12)
plt.legend()
plt.grid(True)
plt.text(0.05, 0.95, f'MAE: {mae:.2f} sec\nR¬≤: {r2:.3f}', transform=plt.gca().transAxes,
         fontsize=12, verticalalignment='top', bbox=dict(boxstyle='round', facecolor='wheat', alpha=0.5))
plt.axis('equal') # Ensure the plot is square
plt.show()

In [None]:
import networkx as nx

# --- Define the two scenarios ---
# Scenario 1: Initial plan before disruptions
initial_conditions = {
    'weather': INITIAL_FORECAST,
    'graph': site_graph,
    'soil': 0
}

# Scenario 2: After the storm forecast and path blockage
disrupted_conditions = {
    'weather': updated_forecast, # The new forecast with the storm
    'graph': broken_down_graph, # The graph with the missing edge
    'soil': 2 # It's now muddy
}

# Define a cost function for nx.shortest_path that uses our GNN
def gnn_path_cost(u, v, d, gnn_model, conditions):
    cost, _ = predict_travel_time_with_gnn(
        u, v, SIMULATION_START_TIME + timedelta(hours=2), conditions['weather'],
        gnn_model, 'truck', conditions['soil'], weather_aware=True
    )
    return cost

# Select a job that is affected by the blockage
job_to_visualize = {'source': 'Zone_1', 'destination': 'Storage_A'}

# Calculate the optimal paths for this job in both scenarios
path_initial = nx.shortest_path(initial_conditions['graph'],
                                source=job_to_visualize['source'], target=job_to_visualize['destination'],
                                weight=lambda u,v,d: gnn_path_cost(u,v,d, gnn_model_aware, initial_conditions))

path_adapted = nx.shortest_path(disrupted_conditions['graph'],
                                source=job_to_visualize['source'], target=job_to_visualize['destination'],
                                weight=lambda u,v,d: gnn_path_cost(u,v,d, gnn_model_aware, disrupted_conditions))

# --- Create the side-by-side plot ---
fig, axes = plt.subplots(1, 2, figsize=(20, 9))
pos = nx.get_node_attributes(site_graph, 'pos')

# Plot 1: Initial Plan
ax1 = axes[0]
ax1.set_title('Initial Plan (Before Disruptions)', fontsize=16)
nx.draw(site_graph, pos, with_labels=True, node_size=2500, node_color='skyblue', ax=ax1, arrows=False)
# Highlight the initial path
path_edges = list(zip(path_initial, path_initial[1:]))
nx.draw_networkx_edges(site_graph, pos, edgelist=path_edges, width=4, edge_color='green', ax=ax1, arrowsize=25)

# Plot 2: Adapted Plan
ax2 = axes[1]
ax2.set_title('Adapted Plan (After Storm & Blockage)', fontsize=16)
nx.draw(site_graph, pos, with_labels=True, node_size=2500, node_color='skyblue', ax=ax2, arrows=False)
# Mark the blocked path
nx.draw_networkx_edges(site_graph, pos, edgelist=[('Zone_2', 'Storage_A')], width=3, style='dashed', edge_color='red', ax=ax2)
ax2.text(pos['Zone_2'][0]-15, pos['Zone_2'][1], 'BLOCKED', color='red', fontsize=14, weight='bold')
# Highlight the new, adapted path
path_edges_adapted = list(zip(path_adapted, path_adapted[1:]))
nx.draw_networkx_edges(site_graph, pos, edgelist=path_edges_adapted, width=4, edge_color='blue', ax=ax2, arrowsize=25)


fig.suptitle('Visualizing Adaptive Re-planning for a Single Job', fontsize=20)
plt.show()

In [None]:
# --- CORRECTED VISUALIZATION 1: IMPACT OF RAIN ON A CRITICAL PATH ---

# Select a representative unpaved, sloped edge from the large-scale graph
edge_u, edge_v = 'Foundation_Pit', 'Building_2_W'
edge_data = {**site_graph.edges[edge_u, edge_v], 'start_node': edge_u, 'end_node': edge_v}
print(f"Analyzing edge: {edge_u} -> {edge_v} (Length: {edge_data['length']}m, Slope: {edge_data['slope']}%, Paved: {edge_data['paved']})")

rain_intensities = [0, 1, 2, 3]
labels = ['No Rain', 'Light Rain', 'Moderate Rain', 'Heavy Rain']
truck_times, forklift_times = [], []
mock_time = SIMULATION_START_TIME

for intensity in rain_intensities:
    mock_weather = pd.Series({'rain_intensity': intensity, 'heat_stress': 0, 'wind_hazard': 0})
    soil = 0
    if intensity >= 2: soil = 2
    elif intensity == 1: soil = 1

    # Use the correct physics function name and unpack the three return values
    mean_truck, _, _ = calculate_ground_truth_travel(edge_data, 1, mock_weather, 'truck', mock_time, soil)
    truck_times.append(mean_truck)
    mean_forklift, _, _ = calculate_ground_truth_travel(edge_data, 1, mock_weather, 'forklift', mock_time, soil)
    forklift_times.append(mean_forklift)

plt.figure(figsize=(10, 6))
plt.plot(labels, truck_times, 'o-', label='Truck', color='blue', markersize=8)
plt.plot(labels, forklift_times, 's--', label='Forklift (more sensitive)', color='orange', markersize=8)
plt.title('Figure: Impact of Rain on Travel Time for an Unpaved Path', fontsize=16)
plt.ylabel('Simulated Travel Time (seconds)', fontsize=12)
plt.xlabel('Rain Intensity', fontsize=12)
plt.legend(); plt.grid(True, which='both', linestyle='--', linewidth=0.5); plt.tight_layout(); plt.show()


# --- CORRECTED VISUALIZATION 2: ADAPTIVE RE-PLANNING ---
# Select a job that will be directly affected by the planned blockage
job_to_visualize = {'source': 'Building_1_S', 'destination': 'Laydown_B'}
# The edge that will be blocked in the simulation
blocked_edge_viz = ('Building_1_S', 'Main_Gate')

# Define a cost function for nx.shortest_path that uses our GNN
def gnn_path_cost(u, v, d, gnn_model, conditions):
    cost, _ = predict_travel_time_with_gnn(
        u, v, SIMULATION_START_TIME, conditions['weather'], gnn_model, 'truck', conditions['soil'], weather_aware=True
    )
    return cost

# Calculate optimal paths before and after disruption
path_initial = nx.shortest_path(site_graph, source=job_to_visualize['source'], target=job_to_visualize['destination'],
                                weight=lambda u,v,d: gnn_path_cost(u,v,d, gnn_model_aware, {'weather': INITIAL_FORECAST, 'soil': 0}))
path_adapted = nx.shortest_path(broken_down_graph, source=job_to_visualize['source'], target=job_to_visualize['destination'],
                                weight=lambda u,v,d: gnn_path_cost(u,v,d, gnn_model_aware, {'weather': updated_forecast, 'soil': 2}))

# Create the side-by-side plot
fig, axes = plt.subplots(1, 2, figsize=(22, 10))
pos = nx.get_node_attributes(site_graph, 'pos')

# Plot 1: Initial Plan
axes[0].set_title('Initial Plan (Before Disruptions)', fontsize=16)
nx.draw(site_graph, pos, with_labels=True, node_size=3000, node_color='skyblue', ax=axes[0], arrows=False)
nx.draw_networkx_edges(site_graph, pos, edgelist=list(zip(path_initial, path_initial[1:])), width=4, edge_color='green', ax=axes[0], arrowsize=25, label="Initial Path")

# Plot 2: Adapted Plan
axes[1].set_title('Adapted Plan (After Route Blockage)', fontsize=16)
nx.draw(site_graph, pos, with_labels=True, node_size=3000, node_color='skyblue', ax=axes[1], arrows=False)
nx.draw_networkx_edges(site_graph, pos, edgelist=[blocked_edge_viz], width=3, style='dashed', edge_color='red', ax=axes[1])
node_pos = pos[blocked_edge_viz[0]]
axes[1].text(node_pos[0], node_pos[1] - 10, 'BLOCKED', color='red', fontsize=14, weight='bold', ha='center')
nx.draw_networkx_edges(site_graph, pos, edgelist=list(zip(path_adapted, path_adapted[1:])), width=4, edge_color='blue', ax=axes[1], arrowsize=25, label="Adapted Path")

fig.suptitle('Figure: Visualization of Adaptive Re-planning', fontsize=20); plt.show()

In [None]:
# --- 4. GENERATE THE "GREEN vs. LEAN" TRADE-OFF TABLE ---
# This block provides the quantitative data for the paper's sustainability discussion.

if pareto_front:
    # Use the representative solutions identified earlier
    lean_plan = best_time_solution   # The "Fastest" plan
    green_plan = best_cost_solution  # The "Cheapest/Greenest" plan

    # Define the CO2 conversion factor (kg of CO2 per liter of diesel)
    CO2_FACTOR = 2.68

    # Calculate metrics for the table
    lean_makespan = lean_plan[0]
    lean_fuel = lean_plan[1]
    lean_co2 = lean_fuel * CO2_FACTOR

    green_makespan = green_plan[0]
    green_fuel = green_plan[1]
    green_co2 = green_fuel * CO2_FACTOR

    # Calculate the percentage trade-offs
    # The "Lean" plan is the baseline for comparison
    makespan_increase_pct = ((green_makespan - lean_makespan) / lean_makespan) * 100
    co2_reduction_pct = ((lean_co2 - green_co2) / lean_co2) * 100

    # Create a Pandas DataFrame for clean formatting
    tradeoff_data = {
        'Metric': ['Project Makespan (Hours)', 'Total Fuel Consumed (Liters)', 'Total CO‚ÇÇ Emissions (kg)'],
        'Lean Plan (Fastest)': [f"{lean_makespan:.2f}", f"{lean_fuel:.2f}", f"{lean_co2:.2f}"],
        'Green Plan (Most Fuel-Efficient)': [f"{green_makespan:.2f}", f"{green_fuel:.2f}", f"{green_co2:.2f}"],
        'Trade-Off': [f"+{makespan_increase_pct:.1f}% Time", "-", f"-{co2_reduction_pct:.1f}% Emissions"]
    }
    tradeoff_df = pd.DataFrame(tradeoff_data)

    print("\n\n" + "="*70)
    print("--- üåç Table: The Green vs. Lean Trade-Off in Construction Logistics üåç ---")
    print("="*70)
    # Print the table in a format ready to be copied into a paper
    print(tradeoff_df.to_markdown(index=False))
    print("="*70)
    print(f"\nKey Finding: Opting for the 'Green' plan reduces CO‚ÇÇ emissions by {co2_reduction_pct:.1f}% at the cost of a {makespan_increase_pct:.1f}% increase in project makespan.")

else:
    print("Cannot generate trade-off table because the Pareto front is empty.")