In [1]:
import toponetx as tnx
example = tnx.CombinatorialComplex()

example.add_cell([1, 2], rank=1)
print(example)

example.add_cell([1, 3], rank=1)
print(example)

example.add_cell([1, 2, 4, 3], rank=2)
print(example)

example.add_cell([2, 5], rank=1)
print(example)

example.add_cell([2, 6, 4], rank=2)
print(example)

Combinatorial Complex with 2 nodes and cells with ranks [0, 1] and sizes (2, 1) 
Combinatorial Complex with 3 nodes and cells with ranks [0, 1] and sizes (3, 2) 
Combinatorial Complex with 4 nodes and cells with ranks [0, 1, 2] and sizes (4, 2, 1) 
Combinatorial Complex with 5 nodes and cells with ranks [0, 1, 2] and sizes (5, 3, 1) 
Combinatorial Complex with 6 nodes and cells with ranks [0, 1, 2] and sizes (6, 3, 2) 


In [1]:
# Import necessary libraries
import os
import numpy as np
import pandas as pd
import torch
import matplotlib.pyplot as plt
from torch.utils.data import DataLoader
import seaborn as sns
from collections import Counter

# Import functions from your modules
from utils.attack_utils import get_attack_indices, get_attack_sds, is_actuator
from utils.swat_topology import SWATComplex
from swat_anomaly_detection import load_swat_data, SWaTDataset

# Set paths (use the same ones as in your main function)
data_dir = "data/SWAT"
train_path = os.path.join(data_dir, "SWATv0_train.csv")
test_path = os.path.join(data_dir, "SWATv0_test.csv")

# Set sampling rate (can adjust as needed)
sample_rate = 0.05  # Using 5% of the data, same as in your main function

# Load data with sampling
print("Loading data...")
train_data, test_data = load_swat_data(train_path, test_path, sample_rate=sample_rate)

# 1. Basic attack statistics
normal_samples = (test_data['Normal/Attack'] == 0.0).sum()
attack_samples = (test_data['Normal/Attack'] != 1.0).sum()
attack_percentage = 100 * attack_samples / len(test_data)

print(f"Test data contains {len(test_data)} samples:")
print(f"  - Normal samples: {normal_samples} ({100 * normal_samples / len(test_data):.2f}%)")
print(f"  - Attack samples: {attack_samples} ({attack_percentage:.2f}%)")

# 2. Check if this aligns with expected SWAT attack patterns
attacks, attack_labels = get_attack_indices("SWAT")
print(f"\nExpected attacks from attack_utils.py: {len(attacks)} attack scenarios")

# Get all component names (excluding Timestamp and Normal/Attack)
component_names = [col for col in test_data.columns if col not in ['Timestamp', 'Normal/Attack']]
print(f"Total components in the dataset: {len(component_names)}")

# 3. Initialize SWAT complex for testing
swat_complex = SWATComplex()

# 4. Initialize datasets 
train_dataset = SWaTDataset(train_data, swat_complex)
test_dataset = SWaTDataset(test_data, swat_complex)

# 5. Create dataloader with batch size 1 (as in your main function)
batch_size = 1
test_dataloader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

# 6. Analyze the test dataloader to confirm attacks are included
print("\nAnalyzing test_dataloader...")
attack_count = 0
labels_in_dataloader = []

# Sample a limited number of batches to avoid slow execution
max_samples = min(1000, len(test_dataloader))
for i, (x_0, x_1, x_2, a0, a1, coa2, b1, b2, label) in enumerate(test_dataloader):
    if i >= max_samples:
        break
    labels_in_dataloader.append(label.item())
    if label.item() == 1:  # Attack label
        attack_count += 1

print(f"Sampled {max_samples} batches from test_dataloader:")
print(f"  - Attacks in sampled batches: {attack_count} ({100 * attack_count / max_samples:.2f}%)")

# 7. Create a timeline visualization of attacks
if max_samples > 100:
    plt.figure(figsize=(15, 3))
    plt.plot(labels_in_dataloader, 'r-')
    plt.title('Attack Timeline in Test Data (1 = Attack, 0 = Normal)')
    plt.xlabel('Sample Index')
    plt.ylabel('Attack Label')
    plt.ylim(-0.1, 1.1)
    plt.grid(True)
    plt.show()

# 8. Check which components are targeted by attacks (using attack_utils.py)
print("\nAttack targets according to attack_utils.py:")
target_components = []
for labels in attack_labels:
    target_components.extend(labels)

# Count occurrences
target_counter = Counter(target_components)
for component, count in target_counter.most_common(10):
    print(f"  - {component}: targeted in {count} attacks")

# 9. Verify if these components exist in the dataset
missing_components = [comp for comp in target_counter.keys() if comp not in component_names]
if missing_components:
    print(f"\nWarning: {len(missing_components)} targeted components are not in the dataset: {missing_components}")
else:
    print("\nAll targeted components are present in the dataset.")

Loading data...
Loading SWAT data from data/SWAT/SWATv0_train.csv and data/SWAT/SWATv0_test.csv...
Using sample rate: 0.05
Sampled data: train=24750 rows, test=22495 rows
Test data contains 22495 samples:
  - Normal samples: 0 (0.00%)
  - Attack samples: 22495 (100.00%)

Expected attacks from attack_utils.py: 32 attack scenarios
Total components in the dataset: 51
Building SWAT combinatorial complex...
Adding 51 components as rank 0 cells
Adding 86 specific component relationships as rank 1 cells
  Added 1-cell: [MV101, FIT101] (valve affects flow)
  Added 1-cell: [FIT101, LIT101] (flow affects level)
  Added 1-cell: [LIT101, P101] (tank level controls pump)
  Added 1-cell: [P101, FIT201] (pump to stage 2 flow meter)
  Added 1-cell: [P102, FIT201] (backup pump to stage 2)
  Added 1-cell: [FIT201, AIT202] (flow affects pH reading)
  Added 1-cell: [FIT201, AIT201] (flow affects conductivity reading)
  Added 1-cell: [FIT201, AIT203] (flow affects ORP reading)
  Added 1-cell: [AIT201, P201

  self._set_arrayXarray(i, j, x)


KeyboardInterrupt: 

In [None]:
# Simple script to check for attacks in test_dataloader
import torch

# Assuming test_dataloader is already created with your SWaTDataset and DataLoader

# Count attack samples and show first 10
attack_count = 0
print("First 10 samples in test_dataloader:")
print("Index | Label (1=Attack, 0=Normal)")
print("-" * 30)

for i, sample in enumerate(test_dataloader):
    # The label is the last element in each sample tuple
    label = sample[-1].item()
    
    
    # Count attacks
    if label == 1:
        attack_count += 1
    
    # Print first 10 samples
    if i < 10:
        print(f"{i:5d} | {label}")

# Print attack statistics
total_samples = len(test_dataloader)
attack_percentage = (attack_count / total_samples) * 100

print("\nTest dataset statistics:")
print(f"Total samples: {total_samples}")
print(f"Attack samples: {attack_count} ({attack_percentage:.2f}%)")
print(f"Normal samples: {total_samples - attack_count} ({100 - attack_percentage:.2f}%)")

First 10 samples in test_dataloader:
Index | Label (1=Attack, 0=Normal)
------------------------------


NameError: name 'test_dataloader' is not defined