# Libraries

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
import tensorflow as tf
import matplotlib.pyplot as plt
from tensorflow.keras.preprocessing.sequence import pad_sequences

# Load Data

In [None]:
# Load and preprocess data
file_path = r'C:\Users\File.csv'
data = pd.read_csv(file_path)
data = data.dropna(subset=['Org'])

# Data Mapping

In [None]:
# Map each unique 'Source' IP to a Server ID
unique_servers = data['Source'].unique()
server_mapping = {ip: idx for idx, ip in enumerate(unique_servers)}
data['ServerID'] = data['Source'].map(server_mapping)

# Sort & Sequence

In [None]:
# Convert AdjustedTime to numeric format
data['AdjustedTime'] = pd.to_datetime(data['AdjustedTime']).astype('int64') // 10**9  # Convert to seconds

# Encode categorical columns
label_encoders = {}
categorical_cols = ['Protocol', 'Connection', 'User', 'Org']
for col in categorical_cols:
    label_encoders[col] = LabelEncoder()
    data[col] = label_encoders[col].fit_transform(data[col])

# Define feature and target columns
features = ['AdjustedTime', 'Protocol', 'Connection', 'User', 'Length', 'ARTT', 'Longitude', 'Latitude', 'Org']
labels = 'ServerID'

# Scale features
scaler = StandardScaler()
data[features] = scaler.fit_transform(data[features])

# Group by user to create sequences
user_groups = data.groupby('User')
X_sequences = []
y_sequences = []
sequence_length = 60

for user, group in user_groups:
    user_features = group[features].values
    user_labels = group[labels].values
    
    # Create rolling sequences
    for i in range(len(user_features) - sequence_length + 1):
        X_sequences.append(user_features[i:i + sequence_length])
        y_sequences.append(user_labels[i + sequence_length - 1])  # Target is last ID in the sequence

# Convert lists to numpy arrays
X_sequences = pad_sequences(X_sequences, maxlen=sequence_length, dtype='float32')
y_sequences = np.array(y_sequences)

# Train-Test Split

In [None]:
# Split into training and testing
X_train, X_test, y_train, y_test = train_test_split(X_sequences, y_sequences, test_size=0.15, random_state=42)

# Frequency Enhanced Decomposition Transformer Model Defintion

In [None]:
import tensorflow as tf
from tensorflow.keras.layers import Input, Dense, Dropout, LayerNormalization, MultiHeadAttention, Conv1D, Add, GlobalAveragePooling1D, Lambda
from tensorflow.keras.models import Model
import tensorflow.signal as tf_signal

# Fourier Transform Layer
class FourierTransformLayer(tf.keras.layers.Layer):
    def call(self, inputs):
        # Compute Fourier Transform (FFT) along the time axis
        fft = tf_signal.fft(tf.cast(inputs, dtype=tf.complex64))
        # Take only real part
        return tf.math.real(fft)

# FEDformer Encoder Block
class FEDformerEncoder(tf.keras.layers.Layer):
    def __init__(self, embed_dim, num_heads, ff_dim, dropout=0.1):
        super(FEDformerEncoder, self).__init__()
        self.attention = MultiHeadAttention(num_heads=num_heads, key_dim=embed_dim)
        self.norm1 = LayerNormalization(epsilon=1e-6)
        self.norm2 = LayerNormalization(epsilon=1e-6)
        self.dropout1 = Dropout(dropout)
        self.dropout2 = Dropout(dropout)
        self.ffn = tf.keras.Sequential([
            Dense(ff_dim, activation="relu"),
            Dense(embed_dim)
        ])
        self.decomp = Conv1D(filters=embed_dim, kernel_size=3, padding="same", activation="linear")  # Trend decomposition
        self.fourier = FourierTransformLayer()  # Frequency-based decomposition

    def call(self, inputs, training):
        # Time-series decomposition (trend extraction)
        trend = self.decomp(inputs)  # Trend component
        seasonal = inputs - trend  # Seasonal component

        # Frequency decomposition
        freq_output = self.fourier(seasonal)

        # Attention on frequency-enhanced seasonal component
        attn_output = self.attention(freq_output, freq_output)
        attn_output = self.dropout1(attn_output, training=training)
        out1 = self.norm1(seasonal + attn_output)

        # Feed-forward network
        ffn_output = self.ffn(out1)
        ffn_output = self.dropout2(ffn_output, training=training)

        # Combine trend, seasonal, and frequency components
        return self.norm2(trend + ffn_output)

# Model Architecture
sequence_length = X_train.shape[1]
feature_dim = X_train.shape[2]
num_classes = len(unique_servers)

inputs = Input(shape=(sequence_length, feature_dim))

# Stack FEDformer Encoder Blocks
x = FEDformerEncoder(embed_dim=feature_dim, num_heads=4, ff_dim=128)(inputs)
x = FEDformerEncoder(embed_dim=feature_dim, num_heads=4, ff_dim=128)(x)
x = FEDformerEncoder(embed_dim=feature_dim, num_heads=4, ff_dim=128)(x)

# Global Average Pooling
x = GlobalAveragePooling1D()(x)

# Fully connected layers
x = Dense(64, activation='relu')(x)
x = Dropout(0.2)(x)
x = Dense(32, activation='relu')(x)
outputs = Dense(num_classes, activation='softmax')(x)

# Define and compile the model
model = Model(inputs, outputs)
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Print model summary
model.summary()


# Training

In [None]:
# Train the model
history = model.fit(X_train, y_train, epochs=10, validation_split=0.176, batch_size=32, verbose=1)

# Evaluate

In [None]:
# Evaluate the model
loss, accuracy = model.evaluate(X_test, y_test, verbose=0)
print("Model accuracy on test set:", accuracy)