In [None]:

import numpy as np
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from sklearn.model_selection import KFold, train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from tensorflow.keras.utils import to_categorical

# Load Data
train_data = pd.read_csv("./KDDTrain+.txt")
test_data = pd.read_csv("./KDDTest+.txt")

# Define columns manually (assuming predefined feature names)
columns = ['duration', 'protocol_type', 'service', 'flag', 'src_bytes', 'dst_bytes', 'land', 'wrong_fragment',
           'urgent', 'hot', 'num_failed_logins', 'logged_in', 'num_compromised', 'root_shell', 'su_attempted',
           'num_root', 'num_file_creations', 'num_shells', 'num_access_files', 'num_outbound_cmds', 'is_host_login',
           'is_guest_login', 'count', 'srv_count', 'serror_rate', 'srv_serror_rate', 'rerror_rate', 'srv_rerror_rate',
           'same_srv_rate', 'diff_srv_rate', 'srv_diff_host_rate', 'dst_host_count', 'dst_host_srv_count',
           'dst_host_same_srv_rate', 'dst_host_diff_srv_rate', 'dst_host_same_src_port_rate',
           'dst_host_srv_diff_host_rate', 'dst_host_serror_rate', 'dst_host_srv_serror_rate', 'dst_host_rerror_rate',
           'dst_host_srv_rerror_rate', 'attack', 'level']

train_data.columns = columns
test_data.columns = columns

# Encode categorical variables
# Encode categorical variables
label_encoder = LabelEncoder()
train_data['attack'] = label_encoder.fit_transform(train_data['attack'])

# Ensure test labels are mapped properly
test_data['attack'] = test_data['attack'].apply(lambda x: x if x in label_encoder.classes_ else 'unknown')
label_encoder.classes_ = np.append(label_encoder.classes_, 'unknown')  # Add unknown class
test_data['attack'] = label_encoder.transform(test_data['attack'])


# Select features and labels
X = train_data.drop(columns=['attack'])
y = train_data['attack']

# Scale features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

y_categorical = to_categorical(y)





ValueError: y contains previously unseen labels: 'saint'

In [None]:

# Define LSTM model
def create_model(input_shape, num_classes):
    model = Sequential([
        LSTM(64, return_sequences=True, input_shape=input_shape),
        Dropout(0.2),
        LSTM(32, return_sequences=False),
        Dropout(0.2),
        Dense(num_classes, activation='softmax')
    ])
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model


In [None]:

# K-Fold Cross-Validation
kf = KFold(n_splits=5, shuffle=True, random_state=42)
fold_no = 1
accuracy_per_fold = []

for train_index, val_index in kf.split(X_scaled):
    print(f'Training Fold {fold_no}...')
    X_train, X_val = X_scaled[train_index], X_scaled[val_index]
    y_train, y_val = y_categorical[train_index], y_categorical[val_index]
    
    model = create_model((X_train.shape[1], 1), y_categorical.shape[1])
    history = model.fit(X_train, y_train, validation_data=(X_val, y_val), epochs=10, batch_size=32, verbose=1)
    
    accuracy_per_fold.append(history.history['val_accuracy'][-1])
    fold_no += 1

# Plot Accuracy Graph
plt.plot(range(1, 6), accuracy_per_fold, marker='o', linestyle='-')
plt.xlabel('Fold Number')
plt.ylabel('Validation Accuracy')
plt.title('K-Fold Cross Validation Accuracy')
plt.show()

print("Average Validation Accuracy: ", np.mean(accuracy_per_fold))


In [None]:

# Transfer Learning - Load Pretrained Model
pretrained_model = create_model((X_train.shape[1], 1), y_categorical.shape[1])
pretrained_model.load_weights('pretrained_lstm_weights.h5')

# Fine-tune on new dataset
pretrained_model.fit(X_scaled, y_categorical, epochs=5, batch_size=32)


In [None]:

# Adversarially Robust Training (FGSM Attack)
def fgsm_attack(model, images, labels, epsilon=0.1):
    images = tf.convert_to_tensor(images)
    labels = tf.convert_to_tensor(labels)
    with tf.GradientTape() as tape:
        tape.watch(images)
        predictions = model(images)
        loss = tf.keras.losses.categorical_crossentropy(labels, predictions)
    gradient = tape.gradient(loss, images)
    perturbed_images = images + epsilon * tf.sign(gradient)
    return np.clip(perturbed_images, 0, 1)

# Generate Adversarial Examples and Retrain
X_adversarial = fgsm_attack(pretrained_model, X_scaled, y_categorical)
pretrained_model.fit(X_adversarial, y_categorical, epochs=5, batch_size=32)
