In [1]:
# import pandas as pd
# import numpy as np
# import torch
# from transformers import AutoModelForSequenceClassification, AutoTokenizer
# import matplotlib.pyplot as plt
# from mpl_toolkits.mplot3d import Axes3D  # Import for 3D plotting

# # 1. Load your saved fine-tuned model
# model_path = "/home/chills/Desktop/LITE-RATE_files/savedmodels_and_tokenizers/models/albert"
# model = AutoModelForSequenceClassification.from_pretrained(model_path)
# tokenizer_path = "/home/chills/Desktop/LITE-RATE_files/savedmodels_and_tokenizers/tokenizers/albert"
# tokenizer = AutoTokenizer.from_pretrained(tokenizer_path)
# model.eval()

# # 2. Load your CSV with string-formatted features
# df = pd.read_csv("/home/chills/Desktop/LITE-RATE_files/LITE-RATE-DATAfiles/LITE-RATE-attackdata_80smart20benignmatchingattack_str.csv")

# # 3. Recreate your string representation as you did during training
# def create_string_representation(row):
#     return " ".join([f"{col}: {row[col]}" for col in df.columns if col != "Label_code"])

# # Apply the string conversion
# df["text_representation"] = df.apply(create_string_representation, axis=1)
# texts = df["text_representation"].tolist()

# # 4. Set up a prediction function for SHAP
# def predict(texts):
#     inputs = tokenizer(texts, padding=True, truncation=True, return_tensors="pt")
#     with torch.no_grad():
#         outputs = model(**inputs)
#     # For binary classification, return probability of class 1 (attack)
#     probs = torch.nn.functional.softmax(outputs.logits, dim=1)
#     return probs[:, 1].numpy()  # Return probability of attack class

# # 5. Create a feature importance analysis using feature ablation
# feature_names = ["fw_fl_byt_s", "bw_fl_byt_s", "fw_fl_pkt_s", "bw_fl_pkt_s", "fw_pkt_s", "bw_pkt_s"]
# baseline_predictions = predict(texts[:100])  # Use a subset for faster processing

# # Create a dictionary to store importance scores
# feature_importance = {}

# # For each feature, measure how predictions change when the feature is removed
# for feature in feature_names:
#     # Create versions of texts with this feature removed
#     modified_texts = []
#     for text in texts[:100]:
#         # Simple approach: replace the feature and its value with empty string
#         modified_text = text.replace(f"{feature}: ", "")
#         for token in text.split():
#             if token.startswith(f"{feature}:"):
#                 modified_text = text.replace(token, "")
#                 break
#         modified_texts.append(modified_text)
    
#     # Get predictions on modified texts
#     modified_predictions = predict(modified_texts)
    
#     # Calculate importance as average absolute difference in predictions
#     importance = np.mean(np.abs(baseline_predictions - modified_predictions))
#     feature_importance[feature] = importance

# # Prepare data for 3D plotting
# features = list(feature_importance.keys())
# importances = list(feature_importance.values())
# indices = np.arange(len(features))  # X-axis: feature indices
# heights = importances  # Y-axis: importance values
# depths = np.ones(len(features))  # Z-axis: constant depth for visualization

# # Create 3D plot
# fig = plt.figure(figsize=(10, 8))
# ax = fig.add_subplot(111, projection="3d")

# # Plot bars in 3D with bright red color
# ax.bar3d(indices, depths, np.zeros(len(features)), 0.5, 0.5, heights, color="red", shade=True)

# # Customize the plot
# ax.set_xlabel(" ", fontsize=23, labelpad=25)
# # ax.set_ylabel("Depth", fontsize=23, labelpad=20)
# ax.set_zlabel("Importance", fontsize=23, labelpad=20)
# #ax.set_title("Feature Importance for Attack Detection", fontsize=23, pad=10)

# # **Adjust view for front-facing perspective**
# ax.view_init(elev=0, azim=90)  # Front view (azim=0 aligns depth with Y-axis)

# # Set feature names as x-axis ticks **perpendicular to x-axis**
# ax.set_xticks(indices)
# ax.set_xticklabels(features, rotation=90, ha="center", fontsize=23)  # Rotation=0 for perpendicular orientation

# # Set tick font size for all axes
# ax.tick_params(axis="x", labelsize=23)
# # ax.tick_params(axis="y", labelsize=23)
# ax.tick_params(axis="z", labelsize=23)

# # Adjust Z-axis tick labels
# z_ticks = np.linspace(0, max(heights), num=5)  # Adjust number of ticks as needed
# ax.set_zticks(z_ticks)
# ax.set_zticklabels([f"{tick:.2f}" for tick in z_ticks], fontsize=23)

# # Adjust layout
# fig.subplots_adjust(left=0.1, right=0.75, bottom=0.1, top=0.9, wspace=0.2, hspace=0.2)

# # Save the plot
# plt.tight_layout()
# plt.savefig("/home/chills/Desktop/LITE-RATE_files/graphs/model_plots_feature_importance/albert/USB_feature_importance_3d_albert.pdf", dpi=500, bbox_inches="tight")
# plt.close()

# print("3D Feature Importance plot saved as 'USB_feature_importance_3d_albert.pdf'")


In [2]:
import pandas as pd
import numpy as np
import torch
from transformers import AutoModelForSequenceClassification, AutoTokenizer
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D  # Import for 3D plotting

# 1. Load your saved fine-tuned model
model_path = "/home/chills/Desktop/LITE-RATE_files/savedmodels_and_tokenizers/models/tinybert"
model = AutoModelForSequenceClassification.from_pretrained(model_path)
tokenizer_path = "/home/chills/Desktop/LITE-RATE_files/savedmodels_and_tokenizers/tokenizers/tinybert"
tokenizer = AutoTokenizer.from_pretrained(tokenizer_path)
model.eval()

# 2. Load your CSV with string-formatted features
df = pd.read_csv("/home/chills/Desktop/LITE-RATE_files/LITE-RATE-DATAfiles/LITE-RATE-attackdata_80smart20benignmatchingattack_str.csv")

# 3. Recreate your string representation as you did during training
def create_string_representation(row):
    return " ".join([f"{col}: {row[col]}" for col in df.columns if col != "Label_code"])

# Apply the string conversion
df["text_representation"] = df.apply(create_string_representation, axis=1)
texts = df["text_representation"].tolist()

# 4. Set up a prediction function for SHAP
def predict(texts):
    inputs = tokenizer(texts, padding=True, truncation=True, return_tensors="pt")
    with torch.no_grad():
        outputs = model(**inputs)
    # For binary classification, return probability of class 1 (attack)
    probs = torch.nn.functional.softmax(outputs.logits, dim=1)
    return probs[:, 1].numpy()  # Return probability of attack class

# 5. Create a feature importance analysis using feature ablation
feature_names = ["fw_fl_byt_s", "bw_fl_byt_s", "fw_fl_pkt_s", "bw_fl_pkt_s", "fw_pkt_s", "bw_pkt_s"]
baseline_predictions = predict(texts[:100])  # Use a subset for faster processing

# Create a dictionary to store importance scores
feature_importance = {}

# For each feature, measure how predictions change when the feature is removed
for feature in feature_names:
    # Create versions of texts with this feature removed
    modified_texts = []
    for text in texts[:100]:
        # Simple approach: replace the feature and its value with empty string
        modified_text = text.replace(f"{feature}: ", "")
        for token in text.split():
            if token.startswith(f"{feature}:"):
                modified_text = text.replace(token, "")
                break
        modified_texts.append(modified_text)
    
    # Get predictions on modified texts
    modified_predictions = predict(modified_texts)
    
    # Calculate importance as average absolute difference in predictions
    importance = np.mean(np.abs(baseline_predictions - modified_predictions))
    feature_importance[feature] = importance

# Prepare data for 3D plotting
features = list(feature_importance.keys())
importances = list(feature_importance.values())
indices = np.arange(len(features))  # X-axis: feature indices
heights = importances  # Y-axis: importance values
depths = np.ones(len(features))  # Z-axis: constant depth for visualization

# Create 3D plot
fig = plt.figure(figsize=(10, 8))
ax = fig.add_subplot(111, projection="3d")

# Plot bars in 3D with bright red color
ax.bar3d(indices, depths, np.zeros(len(features)), 0.5, 0.5, heights, color="blue", shade=True)

# Customize the plot
ax.set_xlabel(" ", fontsize=23, labelpad=25)
ax.set_zlabel("Importance", fontsize=23, labelpad=35)

# **Adjust view for front-facing perspective**
ax.view_init(elev=0, azim=90)  # Front view (azim=0 aligns depth with Y-axis)

# Set feature names as x-axis ticks **perpendicular to x-axis**
ax.set_xticks(indices)
ax.set_xticklabels(features, rotation=90, ha="center", fontsize=23)  # Rotation=0 for perpendicular orientation

# Remove Y-axis labels and ticks
ax.set_yticks([])  # Remove Y-axis ticks
ax.set_yticklabels([])  # Remove Y-axis tick labels
ax.yaxis.line.set_visible(False)  # Hide Y-axis grid line

# Set tick font size for all axes
ax.tick_params(axis="x", labelsize=23)
ax.tick_params(axis="z", labelsize=23, pad=15)  # Increased padding for visibility

# Adjust Z-axis tick labels to avoid overlap with grid
z_ticks = np.linspace(0, max(heights), num=5)  # Adjust number of ticks as needed
ax.set_zticks(z_ticks)
ax.set_zticklabels([f"{tick:.2f}" for tick in z_ticks], fontsize=23, verticalalignment='bottom')

# Adjust layout
fig.subplots_adjust(left=0.1, right=0.75, bottom=0.1, top=0.9, wspace=0.2, hspace=0.2)

# Save the plot

plt.savefig("/home/chills/Desktop/LITE-RATE_files/graphs/model_plots_feature_importance/tinybert/USB_feature_importance_3d_tinybert.pdf", dpi=500)
plt.close()

print("3D Feature Importance plot saved as 'USB_feature_importance_3d_tinybert.pdf")


2025-03-20 15:45:19.760297: I tensorflow/core/platform/cpu_feature_guard.cc:181] Beginning TensorFlow 2.15, this package will be updated to install stock TensorFlow 2.15 alongside Intel's TensorFlow CPU extension plugin, which provides all the optimizations available in the package and more. If a compatible version of stock TensorFlow is present, only the extension will get installed. No changes to code or installation setup is needed as a result of this change.
More information on Intel's optimizations for TensorFlow, delivered as TensorFlow extension plugin can be viewed at https://github.com/intel/intel-extension-for-tensorflow.
2025-03-20 15:45:19.760328: I tensorflow/core/platform/cpu_feature_guard.cc:192] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
Asking to truncate to max_le

3D Feature Importance plot saved as 'USB_feature_importance_3d_tinybert.pdf


In [3]:
# # Additional visualization: Calculate importance based on token presence
# token_importance = {}

# # Process a subset of examples
# for text, prediction in zip(texts[:100], baseline_predictions):
#     tokens = text.split()
    
#     # For each token, check if removing it affects the prediction
#     for token in tokens:
#         # Skip tokens that aren't related to features
#         if not any(feature in token for feature in feature_names):
#             continue
        
#         # Create a version of text with this token removed
#         modified_text = text.replace(token, "")
#         modified_prediction = predict([modified_text])[0]
        
#         # Calculate importance as absolute difference in prediction
#         importance = abs(prediction - modified_prediction)
        
#         if token in token_importance:
#             token_importance[token].append(importance)
#         else:
#             token_importance[token] = [importance]

# # Calculate average importance for each token
# avg_token_importance = {token: np.mean(importances) for token, importances in token_importance.items()}

# # Get top tokens by importance
# top_tokens = dict(sorted(avg_token_importance.items(), key=lambda x: x[1], reverse=True)[:20])

# # Prepare data for 3D plotting
# tokens = list(top_tokens.keys())
# importances = list(top_tokens.values())
# indices = np.arange(len(tokens))  # X-axis: token indices
# heights = importances  # Y-axis: importance values
# depths = np.ones(len(tokens))  # Z-axis: constant depth for visualization

# # Create 3D plot
# fig = plt.figure(figsize=(10, 8))
# ax = fig.add_subplot(111, projection='3d')

# # Plot bars in 3D with bright red color
# ax.bar3d(indices, depths - 3, np.zeros(len(tokens)), 0.4, 0.4, heights, color='red', shade=True)

# # Customize the plot
# ax.set_xlabel(' ', fontsize=23, labelpad=20)  # Increase label padding and font size
# # ax.set_ylabel('Depth', fontsize=23, labelpad=20)       # Increase label padding and font size
# ax.set_zlabel('Importance', fontsize=23, labelpad=25)  # Increase label padding and font size
# #ax.set_title('Top Tokens by Importance', fontsize=23, pad=10)  # Add padding to title

# # Adjust elevation and azimuth angles for better visibility
# ax.view_init(elev=0, azim=90)

# # Set tick font size for all axes
# ax.tick_params(axis='x', labelsize=23)  # Increase X-axis tick font size
# # ax.tick_params(axis='y', labelsize=23)  # Increase Y-axis tick font size
# ax.tick_params(axis='z', labelsize=23)  # Increase Z-axis tick font size

# # Set token names as x-axis ticks with larger font size
# ax.set_xticks(indices)
# ax.set_xticklabels(tokens, rotation=90, ha='right', fontsize=26)

# # Set Z-axis tick labels with increased font size
# z_ticks = np.linspace(0, max(heights), num=5)  # Adjust number of ticks as needed
# ax.set_zticks(z_ticks)
# ax.set_zticklabels([f"{tick:.2f}" for tick in z_ticks], fontsize=23)

# # Adjust layout
# fig.subplots_adjust(left=0.1, right=0.75, bottom=0.1, top=0.9, wspace=0.2, hspace=0.2)

# # Save the plot
# plt.savefig("/home/chills/Desktop/LITE-RATE_files/graphs/model_plots_feature_importance/albert/top_tokens_importance_3d_albert.pdf", dpi=300, bbox_inches="tight")
# plt.close()

# print("3D Top Tokens Importance plot saved as 'top_tokens_importance_3d_albert.pdf'") 


In [4]:
# Additional visualization: Calculate importance based on token presence
token_importance = {}

# Process a subset of examples
for text, prediction in zip(texts[:100], baseline_predictions):
    tokens = text.split()
    
    # For each token, check if removing it affects the prediction
    for token in tokens:
        # Skip tokens that aren't related to features
        if not any(feature in token for feature in feature_names):
            continue
        
        # Create a version of text with this token removed
        modified_text = text.replace(token, "")
        modified_prediction = predict([modified_text])[0]
        
        # Calculate importance as absolute difference in prediction
        importance = abs(prediction - modified_prediction)
        
        if token in token_importance:
            token_importance[token].append(importance)
        else:
            token_importance[token] = [importance]

# Calculate average importance for each token
avg_token_importance = {token: np.mean(importances) for token, importances in token_importance.items()}

# Get top tokens by importance
top_tokens = dict(sorted(avg_token_importance.items(), key=lambda x: x[1], reverse=True)[:20])

# Prepare data for 3D plotting
tokens = list(top_tokens.keys())
importances = list(top_tokens.values())
indices = np.arange(len(tokens))  # X-axis: token indices
heights = importances  # Y-axis: importance values
depths = np.ones(len(tokens))  # Z-axis: constant depth for visualization

# Create 3D plot
fig = plt.figure(figsize=(10, 8))
ax = fig.add_subplot(111, projection="3d")

# Plot bars in 3D with bright red color
ax.bar3d(indices, depths - 3, np.zeros(len(tokens)), 0.4, 0.4, heights, color="blue", shade=True)

# Customize the plot
ax.set_xlabel(" ", fontsize=23, labelpad=20)  # Increase label padding and font size
ax.set_zlabel("Importance", fontsize=23, labelpad=35)  # Increase label padding and font size

# Adjust elevation and azimuth angles for better visibility
ax.view_init(elev=0, azim=90)

# Remove Y-axis labels and ticks
ax.set_yticks([])

# Set tick font size for all axes
ax.tick_params(axis="x", labelsize=23)  # Increase X-axis tick font size
ax.tick_params(axis="z", labelsize=23, pad=15)  # Increase Z-axis tick font size

# Set token names as x-axis ticks with larger font size
ax.set_xticks(indices)
ax.set_xticklabels(tokens, rotation=90, ha="right", fontsize=26)

# Set Z-axis tick labels with increased font size and better spacing
z_ticks = np.linspace(0, max(heights), num=5)  # Adjust number of ticks as needed
ax.set_zticks(z_ticks)
ax.set_zticklabels([f"{tick:.2f}" for tick in z_ticks], fontsize=23)

# Ensure Z-ticks do not overlap with grid lines
for t in ax.get_zticklabels():
    t.set_verticalalignment("bottom")

# Adjust layout

plt.savefig(
    "/home/chills/Desktop/LITE-RATE_files/graphs/model_plots_feature_importance/tinybert/top_tokens_importance_3d_tinybert.pdf",
    dpi=300
)
plt.close()

print("3D Top Tokens Importance plot saved as 'top_tokens_importance_3d_tinybert.pdf'")


3D Top Tokens Importance plot saved as 'top_tokens_importance_3d_tinybert.pdf'
