In [122]:
import numpy as np
import pandas as pd
import xml.etree.ElementTree as ET
import torch
import torch.nn as nn
from sklearn.preprocessing import LabelEncoder, MinMaxScaler

# Network Traffic Data Processing

In [123]:
def process_network_traffic_data(path):
    # Ingest the XML data
    tree = ET.parse(path)
    root = tree.getroot()

    # Define the namespace
    ns = {'net': 'http://sndlib.zib.de/network'}

    # Extract demand information
    demands = []
    for demand in root.findall('net:demands/net:demand', ns):
        source = demand.find('net:source', ns).text
        target = demand.find('net:target', ns).text
        demand_value = float(demand.find('net:demandValue', ns).text)
        demands.append({'source': source, 'target': target, 'demand_value': demand_value})

    df = pd.DataFrame(demands)
    return df

In [124]:
# Prepares the input data for use in a Generative Adversarial Network (GAN) by encoding categorical variables
# and scaling numerical values
data_path = 'data/demandMatrix-abilene-zhang-5min-20040910-2325.xml'
data = process_network_traffic_data(data_path)

# Encode categorical variables
le_source = LabelEncoder()
le_target = LabelEncoder()
data['source'] = le_source.fit_transform(data['source'])
data['target'] = le_target.fit_transform(data['target'])

# Normalize the demand values
scaler = MinMaxScaler()
data['demand_value'] = scaler.fit_transform(data[['demand_value']])

# GAN Architecture Definition

In [125]:
# Define input and output dimensions
input_dim = 3  # noise dimension
output_dim = 3  # source, target, demand_value

In [126]:
# Define the generator
class Generator(nn.Module):
    def __init__(self, input_dim, output_dim):
        super(Generator, self).__init__()
        self.model = nn.Sequential(
            nn.Linear(input_dim, 128),
            nn.ReLU(),
            nn.Linear(128, 64),
            nn.ReLU(),
            nn.Linear(64, output_dim),
            nn.Tanh() 
        )

    def forward(self, x):
        return self.model(x)

In [127]:
# Define the discriminator
class Discriminator(nn.Module):
    def __init__(self, output_dim):
        super(Discriminator, self).__init__()
        self.model = nn.Sequential(
            nn.Linear(output_dim, 64),
            nn.LeakyReLU(0.2),
            nn.Linear(64, 64),
            nn.LeakyReLU(0.2),
            nn.Linear(64, 1),
            nn.Sigmoid()  # Outputs probability of being real
        )

    def forward(self, x):
        return self.model(x)

# Training

In [128]:
# Initialize models
generator = Generator(input_dim, output_dim)
discriminator = Discriminator(output_dim)

In [129]:
# Optimizers
lr = 0.0002
optim_G = torch.optim.Adam(generator.parameters(), lr=lr)
optim_D = torch.optim.Adam(discriminator.parameters(), lr=lr)

# Loss function
criterion = nn.BCELoss()

# Training parameters
num_epochs = 5000
batch_size = 32
real_label = 1
fake_label = 0

In [130]:
for epoch in range(num_epochs):
    for _ in range(len(data) // batch_size):
        # ---------------------
        # Train Discriminator
        # ---------------------
        # Real data
        real_data = torch.tensor(data.sample(batch_size).values, dtype=torch.float32)
        label_real = torch.full((batch_size, 1), real_label, dtype=torch.float32)

        # Generate fake data
        noise = torch.randn(batch_size, input_dim)
        fake_data = generator(noise)
        label_fake = torch.full((batch_size, 1), fake_label, dtype=torch.float32)

        # Train on real data
        optim_D.zero_grad()
        output_real = discriminator(real_data)
        loss_real = criterion(output_real, label_real)
        loss_real.backward()

        # Train on fake data
        output_fake = discriminator(fake_data.detach())
        loss_fake = criterion(output_fake, label_fake)
        loss_fake.backward()
        optim_D.step()

        # ---------------------
        # Train Generator
        # ---------------------
        optim_G.zero_grad()
        output = discriminator(fake_data)
        loss_G = criterion(output, label_real)  # Aim to fool the discriminator
        loss_G.backward()
        optim_G.step()

    # Print losses every 100 epochs
    if epoch % 100 == 0:
        print(f"Epoch {epoch}: Loss_D: {loss_real + loss_fake:.4f}, Loss_G: {loss_G:.4f}")

Epoch 0: Loss_D: 0.9886, Loss_G: 0.6879
Epoch 100: Loss_D: 0.2918, Loss_G: 2.0587
Epoch 200: Loss_D: 0.1180, Loss_G: 3.5142
Epoch 300: Loss_D: 0.0221, Loss_G: 3.8590
Epoch 400: Loss_D: 0.0798, Loss_G: 4.3452
Epoch 500: Loss_D: 0.0858, Loss_G: 4.2078
Epoch 600: Loss_D: 0.0530, Loss_G: 4.8280
Epoch 700: Loss_D: 0.0796, Loss_G: 3.4740
Epoch 800: Loss_D: 0.1194, Loss_G: 4.6378
Epoch 900: Loss_D: 0.0070, Loss_G: 5.0430
Epoch 1000: Loss_D: 0.0051, Loss_G: 5.2910
Epoch 1100: Loss_D: 0.0086, Loss_G: 4.8148
Epoch 1200: Loss_D: 0.0116, Loss_G: 4.5921
Epoch 1300: Loss_D: 0.1567, Loss_G: 4.6870
Epoch 1400: Loss_D: 0.0082, Loss_G: 4.8249
Epoch 1500: Loss_D: 0.0095, Loss_G: 4.6630
Epoch 1600: Loss_D: 0.0070, Loss_G: 5.0531
Epoch 1700: Loss_D: 0.0058, Loss_G: 5.2314
Epoch 1800: Loss_D: 0.0071, Loss_G: 4.9628
Epoch 1900: Loss_D: 0.0088, Loss_G: 4.7540
Epoch 2000: Loss_D: 0.0044, Loss_G: 5.5011
Epoch 2100: Loss_D: 0.0077, Loss_G: 4.8899
Epoch 2200: Loss_D: 0.0044, Loss_G: 5.4881
Epoch 2300: Loss_D: 0.0

# Generate Synthetic Data

In [131]:
# Number of synthetic samples to generate
num_samples = len(data)

# Generate synthetic data
synthetic_data = []
for _ in range(num_samples):
    # Generate noise
    noise = torch.randn(1, input_dim)
    generated_sample = generator(noise).detach().numpy()[0]

    # Scale generated values to the range of indices for source and target
    source_idx = int(((generated_sample[0] + 1) / 2) * (len(le_source.classes_) - 1))
    target_idx = int(((generated_sample[1] + 1) / 2) * (len(le_target.classes_) - 1))

    # Map indices back to original categories
    synthetic_source = le_source.inverse_transform([source_idx])[0]
    synthetic_target = le_target.inverse_transform([target_idx])[0]

    # Scale and add noise to demand_value for variety
    synthetic_demand_value = scaler.inverse_transform([[generated_sample[2]]])[0][0]
    synthetic_demand_value += (0.05 * synthetic_demand_value * np.random.randn())

    # Append the row to the synthetic dataset
    synthetic_data.append([synthetic_source, synthetic_target, synthetic_demand_value])


In [132]:
# Convert synthetic data to a DataFrame
synthetic_df = pd.DataFrame(synthetic_data, columns=['source', 'target', 'demand_value'])
output_csv_path = 'data/synthetic_data.csv'  # Make sure the 'data' directory exists
synthetic_df.to_csv(output_csv_path, index=False)