In [2]:
import numpy as np
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt
import seaborn as sns
from tensorflow.keras.models import load_model
from sklearn.preprocessing import StandardScaler
import logging

# Configure logging
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")

# File Paths
GLOBAL_MODEL_PATH = "/Users/akashthanneeru/Desktop/INADS_Data/Models/Global_Layer_MLP.keras"
EDGE_MODEL_PATH = "/Users/akashthanneeru/Desktop/INADS_Data/Models/Edge_Layer_LSTM_MultiClass.keras"
DEVICE_MODEL_PATH = "/Users/akashthanneeru/Desktop/INADS_Data/Models/Device_Layer_AE_GRU.keras"
DATASET_PATH = "/Users/akashthanneeru/Desktop/INADS_Data/Data/Indexed_Dataset_Timestamp_Processed.csv"

# Load Dataset
logging.info("Loading dataset for comparative analysis...")
df = pd.read_csv(DATASET_PATH)

# Selected Features Per Layer
GLOBAL_FEATURES = [
    "Dst Port", "Flow Byts/s", "Flow IAT Mean", "Flow IAT Std", "Flow IAT Max",
    "SYN Flag Cnt", "ACK Flag Cnt", "FIN Flag Cnt", "PSH Flag Cnt",
    "Pkt Len Min", "Pkt Len Max", "Fwd Pkts/s", "Bwd Pkts/s",
    "Fwd Pkt Len Max", "Bwd Pkt Len Min", "TotLen Fwd Pkts",
    "TotLen Bwd Pkts", "Elapsed_Time"
]
EDGE_FEATURES = [
    "Fwd IAT Mean", "Bwd IAT Mean", "Flow Byts/s", "Pkt Len Min", "Pkt Len Max",
    "Fwd Pkt Len Max", "Bwd Pkt Len Min", "Idle Max", "Active Min", "Active Max",
    "Init Fwd Win Byts", "Init Bwd Win Byts", "Fwd Seg Size Avg", "Bwd Seg Size Avg",
    "Down/Up Ratio", "Subflow Fwd Pkts", "Hour", "Weekday", "Elapsed_Time"
]
DEVICE_FEATURES = [
    "Dst Port", "Fwd Pkts/s", "Bwd Pkts/s",
    "Fwd Pkt Len Max", "Bwd Pkt Len Min",
    "Init Fwd Win Byts", "Init Bwd Win Byts",
    "Active Max", "Active Mean", "Active Min", "Idle Max",
    "Hour", "Elapsed_Time"
]

# Standardize Features
scaler = StandardScaler()
df[GLOBAL_FEATURES] = scaler.fit_transform(df[GLOBAL_FEATURES])
df[EDGE_FEATURES] = scaler.fit_transform(df[EDGE_FEATURES])
df[DEVICE_FEATURES] = scaler.fit_transform(df[DEVICE_FEATURES])

# Load Models
logging.info("Loading trained models...")
global_model = load_model(GLOBAL_MODEL_PATH)
edge_model = load_model(EDGE_MODEL_PATH)
device_model = load_model(DEVICE_MODEL_PATH)

# Extract Anomaly Scores
logging.info("Extracting anomaly scores from models...")
global_scores = np.max(global_model.predict(df[GLOBAL_FEATURES]), axis=1)
edge_scores = np.max(edge_model.predict(np.expand_dims(df[EDGE_FEATURES].values, axis=1)), axis=1)
device_scores = np.max(device_model.predict(np.expand_dims(df[DEVICE_FEATURES].values, axis=1)), axis=1)

# Create DataFrame for Comparison
comparison_df = pd.DataFrame({
    "Global Confidence": global_scores,
    "Edge Confidence": edge_scores,
    "Device Confidence": device_scores
})

# Plot Confidence Score Distributions
plt.figure(figsize=(12, 6))
sns.kdeplot(comparison_df["Global Confidence"], label="Global Layer", fill=True)
sns.kdeplot(comparison_df["Edge Confidence"], label="Edge Layer", fill=True)
sns.kdeplot(comparison_df["Device Confidence"], label="Device Layer", fill=True)
plt.legend()
plt.title("Confidence Score Distributions Across Layers")
plt.show()

# Correlation Between Layer Outputs
plt.figure(figsize=(10, 6))
sns.heatmap(comparison_df.corr(), annot=True, cmap='coolwarm', fmt=".2f")
plt.title("Correlation Between Anomaly Scores of Layers")
plt.show()

logging.info("Comparative analysis complete.")

2025-03-18 01:45:31,127 - INFO - Loading dataset for comparative analysis...
2025-03-18 01:45:40,801 - INFO - Loading trained models...
  saveable.load_own_variables(weights_store.get(inner_path))
2025-03-18 01:45:40,999 - INFO - Extracting anomaly scores from models...


[1m70948/70948[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 187us/step
[1m70948/70948[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 272us/step


ValueError: Exception encountered when calling GRUCell.call().

[1mDimensions must be equal, but are 13 and 32 for '{{node sequential_1/gru_1/gru_cell_1/MatMul}} = MatMul[T=DT_FLOAT, grad_a=false, grad_b=false, transpose_a=false, transpose_b=false](sequential_1/gru_1/strided_slice_1, sequential_1/gru_1/gru_cell_1/Cast/ReadVariableOp)' with input shapes: [32,13], [32,96].[0m

Arguments received by GRUCell.call():
  • inputs=tf.Tensor(shape=(32, 13), dtype=float32)
  • states=('tf.Tensor(shape=(32, 32), dtype=float32)',)
  • training=False