## 1. Environment Setup

In [None]:
import sys
import os
from pathlib import Path

# Add project root to path
project_root = Path(os.getcwd()).parent
sys.path.insert(0, str(project_root))

# Core imports
import torch
import numpy as np
import matplotlib.pyplot as plt
import networkx as nx
from torch.utils.data import DataLoader, random_split

# ACIE Framework imports
from acie import (
    ACIE_Core,
    ACIETrainer,
    ACIERecorder,
    ACIEConfig,
    CyberLogDataset,
    SyntheticDataset
)
from acie.utils import set_seed, get_device

# Set reproducibility
set_seed(42)
device = get_device(prefer_gpu=True)

print(f"PyTorch Version: {torch.__version__}")
print(f"Device: {device}")

## 2. Configuration (Governance Layer)

The `ACIEConfig` dataclass provides centralized, typed configuration management. This ensures all hyperparameters are validated and documented.

In [None]:
# Create configuration using the governance layer
config = ACIEConfig(
    experiment_name="research_demo",
    seed=42,
    device="auto"
)

# Configure model architecture
config.model.input_dim = 100
config.model.causal_nodes = 10
config.model.action_space = 5

# Configure training hyperparameters
config.training.batch_size = 32
config.training.epochs = 10  # Reduced for demonstration
config.training.learning_rate = 1e-3
config.training.lambda_dag = 0.1
config.training.lambda_robust = 0.5

# Configure data
config.data.dataset_type = "cyber"
config.data.num_samples = 500
config.data.train_split = 0.7
config.data.val_split = 0.15
config.data.test_split = 0.15

# Display configuration summary
print(config.summary())

## 3. Dataset Loading (Interface Compliance)

The `CyberLogDataset` implements the `BaseACIEDataset` interface, ensuring strict interoperability with the training pipeline. This dataset simulates DARPA "Five Directions" schema with realistic attack patterns.

In [None]:
# Create CyberLogDataset (implements BaseACIEDataset interface)
dataset = CyberLogDataset(
    num_samples=config.data.num_samples,
    input_dim=config.model.input_dim,
    num_classes=config.model.action_space,
    seed=config.seed
)

print(f"Dataset cardinality: {len(dataset)}")
print(f"Input dimensionality: {dataset.input_dim}")
print(f"\nAttack Patterns:")
for label, pattern in dataset.attack_patterns.items():
    print(f"  {label}: {pattern}")

# Verify interface compliance
sample, label = dataset[0]
print(f"\nSample shape: {sample.shape}")
print(f"Label type: {type(label)}")

In [None]:
# Split dataset into train/val/test
train_size = int(config.data.train_split * len(dataset))
val_size = int(config.data.val_split * len(dataset))
test_size = len(dataset) - train_size - val_size

train_dataset, val_dataset, test_dataset = random_split(
    dataset, [train_size, val_size, test_size]
)

print(f"Training samples: {len(train_dataset)}")
print(f"Validation samples: {len(val_dataset)}")
print(f"Test samples: {len(test_dataset)}")

# Create DataLoaders
train_loader = DataLoader(
    train_dataset,
    batch_size=config.training.batch_size,
    shuffle=True
)

val_loader = DataLoader(
    val_dataset,
    batch_size=config.training.batch_size,
    shuffle=False
)

## 4. Model Architecture

The `ACIE_Core` model implements a three-layer architecture:

1. **Information Filter**: Compressive sensing with entropy gating
2. **Causal Discovery**: Differentiable DAG learning (NOTEARS-style)
3. **Robust Policy**: Game-theoretic Nash equilibrium selection

In [None]:
# Initialize ACIE_Core model
model = ACIE_Core(
    input_dim=config.model.input_dim,
    causal_nodes=config.model.causal_nodes,
    action_space=config.model.action_space
)

# Count parameters
total_params = sum(p.numel() for p in model.parameters())
trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)

print("ACIE_Core Architecture")
print("=" * 50)
print(f"Total parameters: {total_params:,}")
print(f"Trainable parameters: {trainable_params:,}")
print(f"\nModel components:")
print(f"  - Sensing matrix: {model.sensing_matrix.shape}")
print(f"  - Adjacency matrix: {model.adjacency.shape}")
print(f"  - Policy network: {model.policy_net}")

## 5. Training (Executive Branch)

The `ACIETrainer` handles the optimization loop while delegating logging and persistence to the `ACIERecorder` (Judicial Branch).

In [None]:
# Initialize Recorder (Judicial Branch)
recorder = ACIERecorder(
    experiment_name=config.experiment_name,
    save_dir=str(project_root / "models"),
    log_dir=str(project_root / "logs")
)

# Save configuration
recorder.save_config(config.to_dict())

# Initialize Trainer (Executive Branch)
trainer = ACIETrainer(
    model=model,
    learning_rate=config.training.learning_rate,
    lambda_dag=config.training.lambda_dag,
    lambda_robust=config.training.lambda_robust,
    device=device,
    recorder=recorder
)

print("Trainer initialized successfully")
print(f"Device: {trainer.device}")

In [None]:
# Execute training loop
history = trainer.fit(
    train_loader=train_loader,
    val_loader=val_loader,
    epochs=config.training.epochs,
    save_path=str(recorder.save_dir / "best_model.pth")
)

# Save final model and finalize session
recorder.save_final_model(model)
summary = recorder.finalize()

## 6. Training Visualization

In [None]:
# Plot training history
fig, axes = plt.subplots(1, 2, figsize=(12, 4))

# Loss curves
axes[0].plot(history["train_loss"], label="Train Loss", color="blue")
if history["val_loss"]:
    axes[0].plot(history["val_loss"], label="Val Loss", color="orange")
axes[0].set_xlabel("Epoch")
axes[0].set_ylabel("Loss")
axes[0].set_title("Training and Validation Loss")
axes[0].legend()
axes[0].grid(True, alpha=0.3)

# Accuracy curve
if history["policy_accuracy"]:
    axes[1].plot(history["policy_accuracy"], label="Val Accuracy", color="green")
    axes[1].set_xlabel("Epoch")
    axes[1].set_ylabel("Accuracy")
    axes[1].set_title("Validation Accuracy")
    axes[1].legend()
    axes[1].grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

## 7. Adversarial Attack Simulation

We simulate adversarial perturbations to evaluate the robustness of the trained model. This demonstrates the game-theoretic defense layer.

In [None]:
def fgsm_attack(model, data, target, epsilon=0.1):
    """
    Fast Gradient Sign Method (FGSM) attack.
    
    Args:
        model: Target model
        data: Input tensor
        target: True labels
        epsilon: Perturbation magnitude
    
    Returns:
        Perturbed data tensor
    """
    data.requires_grad = True
    
    output = model(data)
    if output == "NO_THREAT":
        return data
    
    action_probs, _ = output
    loss = torch.nn.CrossEntropyLoss()(action_probs, target)
    
    model.zero_grad()
    loss.backward()
    
    # FGSM perturbation
    perturbation = epsilon * data.grad.sign()
    perturbed_data = data + perturbation
    
    return perturbed_data.detach()


# Evaluate on clean vs. adversarial samples
model.eval()
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

clean_correct = 0
adv_correct = 0
total = 0

epsilon = 0.1  # Perturbation magnitude

for data, target in test_loader:
    data, target = data.to(device), target.to(device)
    
    # Clean prediction
    with torch.no_grad():
        clean_output = model(data)
        if clean_output != "NO_THREAT":
            clean_probs, _ = clean_output
            clean_pred = clean_probs.argmax(dim=1)
            clean_correct += (clean_pred == target).sum().item()
    
    # Adversarial prediction
    model.train()  # Enable gradients
    adv_data = fgsm_attack(model, data.clone(), target, epsilon)
    model.eval()
    
    with torch.no_grad():
        adv_output = model(adv_data)
        if adv_output != "NO_THREAT":
            adv_probs, _ = adv_output
            adv_pred = adv_probs.argmax(dim=1)
            adv_correct += (adv_pred == target).sum().item()
    
    total += target.size(0)

clean_acc = clean_correct / total if total > 0 else 0
adv_acc = adv_correct / total if total > 0 else 0

print("Adversarial Robustness Evaluation")
print("=" * 50)
print(f"Epsilon: {epsilon}")
print(f"Clean Accuracy: {clean_acc:.4f}")
print(f"Adversarial Accuracy: {adv_acc:.4f}")
print(f"Robustness Gap: {clean_acc - adv_acc:.4f}")

## 8. Causal Graph Visualization

The learned adjacency matrix represents discovered causal relationships between nodes. We visualize this as a directed graph.

In [None]:
# Extract learned adjacency matrix
adjacency = model.adjacency.detach().cpu().numpy()

# Apply threshold for visualization
threshold = 0.1
adjacency_thresholded = np.where(np.abs(adjacency) > threshold, adjacency, 0)

# Create directed graph
G = nx.DiGraph()
for i in range(adjacency_thresholded.shape[0]):
    G.add_node(i, label=f"N{i}")
    for j in range(adjacency_thresholded.shape[1]):
        if adjacency_thresholded[i, j] != 0:
            G.add_edge(i, j, weight=adjacency_thresholded[i, j])

# Visualize
fig, axes = plt.subplots(1, 2, figsize=(14, 5))

# Heatmap of adjacency matrix
im = axes[0].imshow(adjacency, cmap="RdBu", vmin=-1, vmax=1)
axes[0].set_title("Learned Adjacency Matrix")
axes[0].set_xlabel("Target Node")
axes[0].set_ylabel("Source Node")
plt.colorbar(im, ax=axes[0], label="Edge Weight")

# Graph visualization
pos = nx.spring_layout(G, seed=42)
edge_weights = [G[u][v]["weight"] for u, v in G.edges()]
edge_colors = ["red" if w < 0 else "blue" for w in edge_weights]

nx.draw(
    G, pos, ax=axes[1],
    node_color="lightblue",
    node_size=500,
    with_labels=True,
    edge_color=edge_colors,
    width=[abs(w) * 2 for w in edge_weights],
    arrows=True,
    arrowsize=15
)
axes[1].set_title(f"Causal Graph (threshold={threshold})")

plt.tight_layout()
plt.show()

print(f"\nGraph Statistics:")
print(f"  Nodes: {G.number_of_nodes()}")
print(f"  Edges: {G.number_of_edges()}")
print(f"  Is DAG: {nx.is_directed_acyclic_graph(G)}")

## 9. Summary

This notebook demonstrated the full ACIE research pipeline:

1. **Configuration**: Centralized hyperparameter management via `ACIEConfig`
2. **Data Loading**: Interface-compliant `CyberLogDataset` implementation
3. **Training**: Separation of execution (Trainer) and monitoring (Recorder)
4. **Adversarial Evaluation**: FGSM attack to assess robustness
5. **Causal Visualization**: Learned DAG structure extraction

For further experimentation, researchers may:
- Modify `config.training` parameters for hyperparameter search
- Implement custom datasets inheriting from `BaseACIEDataset`
- Extend the `ACIERecorder` for custom logging backends

In [None]:
# Final summary
print("Experiment Summary")
print("=" * 50)
print(f"Experiment: {config.experiment_name}")
print(f"Total Epochs: {config.training.epochs}")
print(f"Final Train Loss: {history['train_loss'][-1]:.4f}")
if history['val_loss']:
    print(f"Final Val Loss: {history['val_loss'][-1]:.4f}")
if history['policy_accuracy']:
    print(f"Final Val Accuracy: {history['policy_accuracy'][-1]:.4f}")
print(f"Clean Test Accuracy: {clean_acc:.4f}")
print(f"Adversarial Test Accuracy: {adv_acc:.4f}")