In [None]:
# Install PyTorch Geometric
!pip install torch_geometric

Collecting torch_geometric
  Downloading torch_geometric-2.7.0-py3-none-any.whl.metadata (63 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/63.7 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m63.7/63.7 kB[0m [31m7.8 MB/s[0m eta [36m0:00:00[0m
Downloading torch_geometric-2.7.0-py3-none-any.whl (1.3 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.3/1.3 MB[0m [31m83.9 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: torch_geometric
Successfully installed torch_geometric-2.7.0


In [None]:
import os
import shutil

# Define the expected file name
DATA_FILE = 'airport_complex_metrics.pt'

# Check if the file was uploaded to the root directory
if os.path.exists(DATA_FILE):
    print(f" Found '{DATA_FILE}'. Setting up environment...")

    # 1. Clean up any old directories to avoid conflicts
    if os.path.exists('dataset'):
        shutil.rmtree('dataset')

    # 2. Create the specific path PyTorch Geometric expects
    os.makedirs('dataset/processed', exist_ok=True)

    # 3. Move the file
    shutil.move(DATA_FILE, f'dataset/processed/{DATA_FILE}')
    print(f" File successfully moved to 'dataset/processed/{DATA_FILE}'")

elif os.path.exists(f'dataset/processed/{DATA_FILE}'):
    print("Data is already in the correct folder.")

else:
    print(f" ERROR: Could not find '{DATA_FILE}'.")
    print("Please drag and drop the .pt file into the Files sidebar on the left!")

 Found 'airport_complex_metrics.pt'. Setting up environment...
 File successfully moved to 'dataset/processed/airport_complex_metrics.pt'


In [None]:
import torch
import torch.nn.functional as F
import numpy as np
from torch_geometric.data import InMemoryDataset
from torch_geometric.loader import DataLoader
from torch_geometric.nn import GATConv, global_mean_pool
import torch.serialization
from torch_geometric.data.data import DataEdgeAttr

# Allow loading custom attributes safely
torch.serialization.add_safe_globals([DataEdgeAttr])

class AirportRegressionDataset(InMemoryDataset):
    def __init__(self, root, transform=None, pre_transform=None):
        super(AirportRegressionDataset, self).__init__(root, transform, pre_transform)
        # Load the pre-processed data
        self.data, self.slices = torch.load(self.processed_paths[0], weights_only=False)

    @property
    def raw_file_names(self):
        return []

    @property
    def processed_file_names(self):
        return ['airport_complex_metrics.pt']

    def download(self):
        pass

    def process(self):
        pass

In [None]:
# Configuration
BATCH_SIZE = 16
HIDDEN_CHANNELS = 128
LEARNING_RATE = 0.002
EPOCHS = 100

print("Loading dataset...")
dataset = AirportRegressionDataset(root='dataset')

# Shuffle to ensure random distribution
dataset = dataset.shuffle()

# Split 80/20
train_idx = int(len(dataset) * 0.8)
train_dataset = dataset[:train_idx]
test_dataset = dataset[train_idx:]

# Create DataLoaders
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False)

print(f"Total Graphs: {len(dataset)}")
print(f"Training Samples: {len(train_dataset)}")
print(f"Testing Samples: {len(test_dataset)}")

Loading dataset...
Total Graphs: 4400
Training Samples: 3520
Testing Samples: 880


In [None]:
class DigitalTwinGNN(torch.nn.Module):
    def __init__(self, hidden_channels):
        super(DigitalTwinGNN, self).__init__()

        # 4 Graph Attention Layers to capture complex dependencies
        # edge_dim=3 matches your features: [utilization, latency, error_rate]
        self.conv1 = GATConv(dataset.num_node_features, hidden_channels, edge_dim=3)
        self.conv2 = GATConv(hidden_channels, hidden_channels, edge_dim=3)
        self.conv3 = GATConv(hidden_channels, hidden_channels, edge_dim=3)
        self.conv4 = GATConv(hidden_channels, hidden_channels, edge_dim=3)

        # Regression Head
        self.lin1 = torch.nn.Linear(hidden_channels, hidden_channels // 2)

        # OUTPUT LAYER: Predicts exactly 6 metrics
        self.lin2 = torch.nn.Linear(hidden_channels // 2, 6)

    def forward(self, x, edge_index, edge_attr, batch):
        # Message Passing
        x = self.conv1(x, edge_index, edge_attr=edge_attr).relu()
        x = self.conv2(x, edge_index, edge_attr=edge_attr).relu()
        x = self.conv3(x, edge_index, edge_attr=edge_attr).relu()
        x = self.conv4(x, edge_index, edge_attr=edge_attr)

        # Aggregation (Graph-level embedding)
        x = global_mean_pool(x, batch)

        # Prediction
        x = self.lin1(x).relu()
        x = self.lin2(x) # Linear output (no activation) for regression
        return x

# Initialize Model
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

model = DigitalTwinGNN(HIDDEN_CHANNELS).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE)
criterion = torch.nn.MSELoss() # Mean Squared Error

Using device: cuda


In [None]:
print("Starting Digital Twin Training...")

for epoch in range(1, EPOCHS + 1):
    model.train()
    total_loss = 0

    for data in train_loader:
        data = data.to(device)
        optimizer.zero_grad()

        # Forward pass
        out = model(data.x, data.edge_index, data.edge_attr, data.batch)

        # Calculate Error
        loss = criterion(out, data.y)

        # Backpropagation
        loss.backward()
        optimizer.step()
        total_loss += loss.item()

    # Evaluate every 10 epochs
    if epoch % 10 == 0:
        model.eval()
        test_loss = 0
        with torch.no_grad():
            for data in test_loader:
                data = data.to(device)
                out = model(data.x, data.edge_index, data.edge_attr, data.batch)
                test_loss += criterion(out, data.y).item()

        avg_train_loss = total_loss / len(train_loader)
        avg_test_loss = test_loss / len(test_loader)
        print(f"Epoch {epoch:03d} | Train MSE: {avg_train_loss:.4f} | Test MSE: {avg_test_loss:.4f}")

Starting Digital Twin Training...
Epoch 010 | Train MSE: 0.1169 | Test MSE: 0.1135
Epoch 020 | Train MSE: 0.0966 | Test MSE: 0.0891
Epoch 030 | Train MSE: 0.0633 | Test MSE: 0.0585
Epoch 040 | Train MSE: 0.0256 | Test MSE: 0.0291
Epoch 050 | Train MSE: 0.0069 | Test MSE: 0.0058
Epoch 060 | Train MSE: 0.0074 | Test MSE: 0.0067
Epoch 070 | Train MSE: 0.0030 | Test MSE: 0.0047
Epoch 080 | Train MSE: 0.0018 | Test MSE: 0.0012
Epoch 090 | Train MSE: 0.0076 | Test MSE: 0.0062
Epoch 100 | Train MSE: 0.0013 | Test MSE: 0.0007


In [None]:
print("\n=== FINAL EVALUATION: ERROR PER METRIC ===")
model.eval()

# 1. Initialize Accumulators
# We create a tensor of 6 zeros to track error for each metric separately
total_mae = torch.zeros(6).to(device)
num_samples = 0

# 2. Define Metrics & Scaling Factors (MUST match process_regression_data.py)
metrics_config = [
    ("Avg Latency",       100.0, "ms"),
    ("Avg Throughput",    20.0,  "Mbps"),
    ("Packet Loss Rate",  5.0,   "%"),
    ("Avg Inf Delay",     100.0, "ms"),
    ("QoS Violations",    100.0, "%"),
    ("Resource Pressure", 100.0, "%")
]
# Metric 5: QoS Violation Ratio
# "User Satisfaction Score": The % of video streams or calls that are lagging or failing.

# Metric 6: Resource Pressure Index
# "Hardware Health Score": The average stress (CPU + RAM usage) on all airport devices combined

# 3. Iterate over the ENTIRE Test Set
with torch.no_grad():
    for data in test_loader:
        data = data.to(device)

        # Make Prediction
        pred = model(data.x, data.edge_index, data.edge_attr, data.batch)

        # Calculate Absolute Error: |Predicted - Real|
        abs_error = torch.abs(pred - data.y)

        # Sum errors for this batch (dim=0 sums down the rows, keeping 6 columns)
        total_mae += abs_error.sum(dim=0)
        num_samples += data.num_graphs

# 4. Calculate Average Error
# Divide total error by the number of samples to get Mean Absolute Error
avg_mae = (total_mae / num_samples).cpu().numpy()

# 5. Print the Report
print(f"{'METRIC':<20} | {'AVG ERROR (MAE)':<15} | {'UNIT'}")
print("-" * 45)

for i, (name, scale, unit) in enumerate(metrics_config):
    # Un-scale the error to get real-world units
    real_error = avg_mae[i] * scale

    print(f"{name:<20} | {real_error:10.4f}      | {unit}")

print("-" * 45)
print(f"Evaluated on {num_samples} test scenarios.")


=== FINAL EVALUATION: ERROR PER METRIC ===
METRIC               | AVG ERROR (MAE) | UNIT
---------------------------------------------
Avg Latency          |     1.5829      | ms
Avg Throughput       |     0.2771      | Mbps
Packet Loss Rate     |     0.1864      | %
Avg Inf Delay        |     1.5904      | ms
QoS Violations       |     1.6319      | %
Resource Pressure    |     1.1737      | %
---------------------------------------------
Evaluated on 880 test scenarios.
