In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
from sklearn.preprocessing import StandardScaler
import pickle
import os
import joblib
from tensorflow.keras.models import load_model
import tensorflow as tf
import matplotlib.pyplot as plt
tf.random.set_seed(42)

In [None]:
def individual_to_params(individual):
    criterion, splitter, max_depth, min_samples_split, min_samples_leaf, min_weight_fraction_leaf, max_features, max_leaf_nodes, min_impurity_decrease, ccp_alpha = individual
    
    params = {"criterion": criterion, "splitter": splitter, "max_depth": max_depth, "min_samples_split": min_samples_split, "min_samples_leaf": min_samples_leaf, "min_weight_fraction_leaf": min_weight_fraction_leaf, "max_features": max_features, "max_leaf_nodes": max_leaf_nodes, "min_impurity_decrease": min_impurity_decrease, "ccp_alpha": ccp_alpha}
    
    return params

In [None]:
def createModel(individual, X_train, y_train):
    params = individual_to_params(individual)
    clf = DecisionTreeClassifier(random_state=42,**params)
    clf.fit(X_train, y_train)
    return clf

In [None]:
def load_and_preprocess(filepath):
    df = pd.read_csv(filepath, index_col=[0])
    # df=df[['SrcWin','sHops','dHops','sTtl','dTtl','SynAck','SrcBytes','DstBytes','SAppBytes',\
    #                    'Dur','TotPkts','TotBytes','TotAppByte','Rate','SrcRate','Label']]
    #Le = LabelEncoder()
    #df['Label'] = le.fit_transform(df['Label'])
    df=df[['SrcWin', 'sHops', 'sTtl', 'dTtl', 'SrcBytes', 'DstBytes', 'Dur', 'TotBytes', 'Rate','Label']]
    print(df.shape)
    print("loading data")
    X = df.iloc[:,:-1]
    y = df.iloc[:,-1]
    return X, y,df


In [None]:

data_path='../data/'
train_file = os.path.join(data_path, 'ISCX_training.csv')
test_file = os.path.join(data_path, 'ISCX_Testing.csv')
X_train, y_train,train_df = load_and_preprocess(train_file)
X_test, y_test,test_df = load_and_preprocess(test_file)
# Scale the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)
    


In [None]:
# #Decision tree default
# dt_default_clf = DecisionTreeClassifier(random_state=42)
# dt_default_clf.fit(X_train, y_train)

In [None]:
# predictions =dt_default_clf.predict(X_test)
# accuracy = accuracy_score(y_test, predictions)
# print("Accuracy: ", accuracy_score(y_test, predictions))
# print("Precision: ", precision_score(y_test, predictions))
# print("Recall: ", recall_score(y_test, predictions))
# print("F1 score: ", f1_score(y_test, predictions))
# print("Confusion Matrix: \n", confusion_matrix(y_test, predictions))

In [None]:
model_path='../optimization/information_feature_selection/'

In [None]:
dt_clf=joblib.load(model_path+'best_decision_tree_multiiscx.pkl')
predictions = dt_clf.predict(X_test_scaled)
accuracy = accuracy_score(y_test, predictions)
# Print out its metrics
predictions = dt_clf.predict(X_test_scaled)
print("Accuracy: ", accuracy_score(y_test, predictions))
print("Precision: ", precision_score(y_test, predictions))
print("Recall: ", recall_score(y_test, predictions))
print("F1 score: ", f1_score(y_test, predictions))
print("Confusion Matrix: \n", confusion_matrix(y_test, predictions))

In [None]:
# Finding the max and min values for each column
SrcWin_max = train_df['SrcWin'].max()
SrcWin_min = train_df['SrcWin'].min()

sHops_max = train_df['sHops'].max()
sHops_min = train_df['sHops'].min()

sTtl_max = train_df['sTtl'].max()
sTtl_min = train_df['sTtl'].min()

dTtl_max = train_df['dTtl'].max()
dTtl_min = train_df['dTtl'].min()

SrcBytes_max = train_df['SrcBytes'].max()
SrcBytes_min = train_df['SrcBytes'].min()

DstBytes_max = train_df['DstBytes'].max()
DstBytes_min = train_df['DstBytes'].min()

Dur_max = train_df['Dur'].max()
Dur_min = train_df['Dur'].min()

TotBytes_max = train_df['TotBytes'].max()
TotBytes_min = train_df['TotBytes'].min()

Rate_max = train_df['Rate'].max()
Rate_min = train_df['Rate'].min()

In [None]:
min_values = X_train_scaled.min(axis=0)
max_values = X_train_scaled.max(axis=0)

# Assign them to variables named accordingly
SrcWin_scaled_min, sHops_scaled_min, sTtl_scaled_min, dTtl_scaled_min, SrcBytes_scaled_min, DstBytes_scaled_min, Dur_scaled_min, TotBytes_scaled_min, Rate_scaled_min = min_values

SrcWin_scaled_max, sHops_scaled_max, sTtl_scaled_max, dTtl_scaled_max, SrcBytes_scaled_max, DstBytes_scaled_max, Dur_scaled_max, TotBytes_scaled_max, Rate_scaled_max = max_values

In [None]:
# Create a dictionary to store min and max values for each feature
feature_bounds = {
    'SrcWin': {'min': SrcWin_min, 'max': SrcWin_max, 'scaled_min': SrcWin_scaled_min, 'scaled_max': SrcWin_scaled_max},
    'sHops': {'min': sHops_min, 'max': sHops_max, 'scaled_min': sHops_scaled_min, 'scaled_max': sHops_scaled_max},
    'sTtl': {'min': sTtl_min, 'max': sTtl_max, 'scaled_min': sTtl_scaled_min, 'scaled_max': sTtl_scaled_max},
    'dTtl': {'min': dTtl_min, 'max': dTtl_max, 'scaled_min': dTtl_scaled_min, 'scaled_max': dTtl_scaled_max},
    'SrcBytes': {'min': SrcBytes_min, 'max': SrcBytes_max, 'scaled_min': SrcBytes_scaled_min, 'scaled_max': SrcBytes_scaled_max},
    'DstBytes': {'min': DstBytes_min, 'max': DstBytes_max, 'scaled_min': DstBytes_scaled_min, 'scaled_max': DstBytes_scaled_max},
    'Dur': {'min': Dur_min, 'max': Dur_max, 'scaled_min': Dur_scaled_min, 'scaled_max': Dur_scaled_max},
    'TotBytes': {'min': TotBytes_min, 'max': TotBytes_max, 'scaled_min': TotBytes_scaled_min, 'scaled_max': TotBytes_scaled_max},
    'Rate': {'min': Rate_min, 'max': Rate_max, 'scaled_min': Rate_scaled_min, 'scaled_max': Rate_scaled_max},
}


In [None]:
neural_net=load_model(model_path+'optimized_nn_full_training_500iscx.h5')

In [None]:
predictions = neural_net.predict(X_test_scaled)

In [None]:
# predictions = neural_net.predict(X_test)
predictions = [round(x[0]) for x in predictions] 
# Print out its metrics
print("Accuracy: ", accuracy_score(y_test, predictions))
print("Precision: ", precision_score(y_test, predictions))
print("Recall: ", recall_score(y_test, predictions))
print("F1 score: ", f1_score(y_test, predictions))
print("Confusion Matrix: \n", confusion_matrix(y_test, predictions))

In [None]:
or_miss_rate_nn=np.round((15430/(15430+147848))*100,2)
print(or_miss_rate_nn)

In [None]:
or_miss_rate_nn=dt_miss_rate=np.round(((5586) /(5586+157692)*100),2)
print(or_miss_rate_nn)

In [None]:
malware_pred_index=np.where((np.array(predictions)==1) & (np.array(y_test)==1))[0]
X_test_malware=X_test_scaled[malware_pred_index]

In [None]:
def f(x_prime, model, target_class, kappa=0):
    Z = model(x_prime)
    if target_class == 0:
        Z_target = 1. - Z[:, 0]
        Z_other = Z[:, 0]
    else:
        Z_target = Z[:, 0]
        Z_other = 1. - Z_target
    return tf.maximum(Z_other - Z_target, -kappa)

# def generate_significant_noise(shape, feature_mask, min_val=-0.07741, max_val=23):
#     noise = tf.random.uniform(shape, 0, 1)  # noise values between 0 and 1
#     scaled_noise = noise * (max_val - min_val) + min_val  # scale and shift to desired range
#     return scaled_noise * feature_mask

def cw_loss(x, x_prime, model, target_class, c):
    l2_dist = tf.reduce_sum(tf.square(x - x_prime))
    return l2_dist + c * f(x_prime, model, target_class)

def generate_relative_noise(x, feature_mask, magnitude=0.1):
    # Generate random values between -1 and 1
    noise = tf.random.uniform(x.shape, -1, 1)  # noise values between -1 and 1
    # Scale noise based on magnitude and original value
    relative_noise = noise * x * magnitude
    return relative_noise * feature_mask

def generate_cw_adversary(model, x, target_class, feature_mask,c, epsilon=0.001, iterations=5, clip_min=-2, clip_max=2,):

    noise_shape = list(x.shape)
    noise_shape[-1] = 1  # Adjust to match the feature dimensionality
    # noise = generate_significant_noise(noise_shape, feature_mask,min_val=SrcBytes_scaled_min,max_val=SrcBytes_scaled_max)
    noise = generate_relative_noise(x, feature_mask, magnitude=0.1)
    x_prime_init = x + noise
    x_prime = tf.Variable(x_prime_init, dtype=tf.float32, trainable=True)
    optimizer = tf.optimizers.Adam(learning_rate=epsilon)
    for iteration in range(iterations):
        with tf.GradientTape() as tape:
            # loss = cw_loss_dynamic(x, x_prime, model, target_class, c_f, c_l2)
            loss = cw_loss(x, x_prime, model, target_class,c)
        grads = tape.gradient(loss, x_prime)
        # Mask the gradients to only update the desired feature
        masked_grads = grads * feature_mask
        # print("this is masked grads")
        # print(masked_grads)

        optimizer.apply_gradients([(masked_grads, x_prime)])

        # Clip x_prime to ensure values stay within reasonable bounds
        clipped_values = tf.clip_by_value(x_prime[:, feature_mask.numpy().nonzero()[0][0]], clip_min, clip_max)
        x_prime_array = x_prime.numpy()
        x_prime_array[:, feature_mask.numpy().nonzero()[0][0]] = clipped_values
        x_prime.assign(x_prime_array)
        
    return x_prime.numpy()



# Batch function to apply C&W method
def cw_batch(model, scaler, input_samples, target_class=0, feature_name="Dur", feature_min=0.0, feature_max=126.0,it_value=0,c=0.001):
    INITIAL_TTL = 255
    with tf.device('/GPU:0'):
        target_labels = tf.zeros_like(input_samples[:, 0])
        original = scaler.inverse_transform(input_samples.numpy())
        feature_index = ['SrcWin', 'sHops', 'sTtl', 'dTtl', 'SrcBytes', 'DstBytes', 'Dur', 'TotBytes', 'Rate'].index(feature_name)
        # print(f"Before {feature_name}")
        # print(original[:, feature_index])

        # Create the mask for the specific feature
        feature_mask = tf.constant([1 if i == feature_index else 0 for i in range(len(['SrcWin', 'sHops', 'sTtl', 'dTtl', 'SrcBytes', 'DstBytes', 'Dur', 'TotBytes', 'Rate']))], dtype=tf.float32)
        
        perturbed_samples = generate_cw_adversary(model, input_samples, target_class, feature_mask,c,epsilon=0.001,iterations=it_value,clip_min=feature_bounds[feature_name]['scaled_min'],clip_max=feature_bounds[feature_name]['scaled_max'])

        perturbed_samples_original = scaler.inverse_transform(perturbed_samples)

        # Clip in original space
        perturbed_samples_original[:, feature_index] = np.clip(perturbed_samples_original[:, feature_index], feature_min, feature_max)
        # print(f"After Perturbation {feature_name}")
        # print(perturbed_samples_original[:, feature_index])

        # Adjust dependencies for the modified feature
        if feature_name == "Dur":
            original_duration = perturbed_samples_original[:, 6]
            rate_change_factor = original_duration / (perturbed_samples_original[:, 6] + 1e-10)
            perturbed_samples_original[:, 8] *= rate_change_factor
        elif feature_name == "SrcBytes":
            perturbed_samples_original[:, 7] = perturbed_samples_original[:, 4] + perturbed_samples_original[:, 5]
            # Adjusting Duration to keep Rate constant
            original_rate = perturbed_samples_original[:, 8]
            perturbed_samples_original[:, 6] = (perturbed_samples_original[:, 4] + perturbed_samples_original[:, 5]) / original_rate
        elif feature_name == 'DstBytes':
            perturbed_samples_original[:, 7] = perturbed_samples_original[:, 4] + perturbed_samples_original[:, 5]  # TotBytes = SrcBytes + DstBytes"
            # Adjusting Duration to keep Rate constant
            original_rate = perturbed_samples_original[:, 8]
            perturbed_samples_original[:, 6] = (perturbed_samples_original[:, 4] + perturbed_samples_original[:, 5]) / original_rate
        elif feature_name == "TotBytes":
        # TotBytes is dependent on SrcBytes and DstBytes
            perturbed_samples_original[:, 4] = perturbed_samples_original[:, 7] - perturbed_samples_original[:, 5]  # Assuming SrcBytes = TotBytes - DstBytes
            perturbed_samples_original[:, 5] = perturbed_samples_original[:, 7] - perturbed_samples_original[:, 4]  # Assuming DstBytes = TotBytes - SrcBytes
            
            # Adjust Duration to keep Rate constant
            original_rate = perturbed_samples_original[:, 8]
            perturbed_samples_original[:, 6] = perturbed_samples_original[:, 7] / (original_rate + 1e-10)
        elif feature_name == "sHops":
            perturbed_samples_original[:, 2] = INITIAL_TTL - perturbed_samples_original[:, 1]  # sTtl based on sHops
        elif feature_name in ["sTtl", "dTtl"]:
            perturbed_samples_original[:, 1] = INITIAL_TTL - perturbed_samples_original[:, 2]  # sHops based on sTtl
        elif feature_name == "Rate":
            # Adjust Duration based on Rate and TotBytes
            perturbed_samples_original[:, 6] = perturbed_samples_original[:, 7] / (perturbed_samples_original[:, 8] + 1e-10)
        

        
        
        # Rescale to standardized space
        perturbed_samples = scaler.transform(perturbed_samples_original)

        return perturbed_samples


In [None]:
X_test_malware_tensor = tf.convert_to_tensor(X_test_malware, dtype=tf.float32)


# Adversarial sample generation with batching
BATCH_SIZE = 10000  # Modify based on your hardware
total_batches = int(np.ceil(X_test_malware_tensor.shape[0] / BATCH_SIZE))

def generate_adversarial_for_c(c_value,iter,f_name):

    
    adversarial_samples = []
    unsuccessful_samples = []  # List to store unsuccessful samples
    unsuccessful_samples_indices = []

    for batch in range(total_batches):
        start_idx = batch * BATCH_SIZE
        end_idx = min((batch+1) * BATCH_SIZE, X_test_malware_tensor.shape[0])
        
        batch_samples = X_test_malware_tensor[start_idx:end_idx]

        # Use C&W method to generate adversarial samples. We target the benign class (class 0)
        batch_samples = cw_batch(neural_net, scaler, batch_samples, target_class=0,feature_name=f_name,feature_min=feature_bounds[f_name]['min'],feature_max=feature_bounds[f_name]['max'],it_value=iter, c=c_value)  # Targeting benign class
        
        with tf.device('/GPU:0'):
            surrogate_preds = neural_net.predict(batch_samples)
            
        # We're looking for malware samples that are now predicted as benign
        successful_idx = np.where(surrogate_preds < 0.5)[0]
        adversarial_samples.extend(batch_samples[successful_idx])

        remaining_indices = np.setdiff1d(np.arange(batch_samples.shape[0]), successful_idx)
        unsuccessful_samples.extend(batch_samples[remaining_indices]) 

        unsuccessful_samples_indices.extend([start_idx + idx for idx in remaining_indices])

        print(f"Processed batch {batch+1}/{total_batches}")
    print(f"Number of unsuccessful samples: {len(unsuccessful_samples_indices)}")
    # print(f"Indices of unsuccessful samples: {unsuccessful_samples_indices}")
    if len(adversarial_samples)>0:
        adversarial_samples_arr=np.array(adversarial_samples)
        predictions = dt_clf.predict(adversarial_samples_arr)
        print(predictions)
        # Count the number of 1s in the array
        dt_malware_count = np.count_nonzero(predictions == 0)
        dt_miss_rate=np.round(((dt_malware_count+5586) /(5586+157692)*100),2)
    else:
        dt_malware_count=0
        dt_miss_rate=np.round(((dt_malware_count+5586) /(5586+157692)*100),2)

    
    return np.round(((len(adversarial_samples)+15430) /(15430+147848)*100),2),dt_miss_rate ,unsuccessful_samples_indices,adversarial_samples


In [None]:
# List of iteration values
features_to_attack = ['SrcWin', 'sHops', 'sTtl', 'dTtl', 'SrcBytes', 'DstBytes', 'Dur', 'TotBytes', 'Rate']
iteration_values = [5,100,750,1000,2000]  # I've removed a repeated 0.001 and 0.1 from your list

#
for feature in features_to_attack:
    print(f"\nGenerating adversarial samples for {feature}...\n")
    # List to store misclassification rates for each c value
    misclassification_rates = []
    unsuccessful_samples_indices_list=[]
    adversarial_samples_list=[]
    dt_miss_rate_list=[]

    # Generate adversarial samples for each c value and store the misclassification rate
    for iter in iteration_values:
        print("Current iter:",iter)
        rate,dt_miss_rate,unsuccessful_samples_indices,adversarial_samples = generate_adversarial_for_c(0.01,iter,f_name=feature)
        misclassification_rates.append(rate)
        dt_miss_rate_list.append(dt_miss_rate)
        unsuccessful_samples_indices_list.append(unsuccessful_samples_indices)
        adversarial_samples_list.append(adversarial_samples)
        print(f"Processed for c value {0.0001}. Misclassification rate: {rate}")
    
    with open("../output_iscx_new/"+str(feature)+"_data.pkl", "wb") as file:
        data = {
            'adversarial_samples_list': adversarial_samples_list,
            'misclassification_rates': misclassification_rates,
            'dt_miss_rate_list': dt_miss_rate_list,
            'unsuccessful_samples_indices_list': unsuccessful_samples_indices_list
        }
        pickle.dump(data, file)
    dpi_value = 300 
    # Plot
    plt.figure(figsize=(10, 7))  # Increase figure size for better clarity

    plt.plot(iteration_values, misclassification_rates, '-o', label='surrogate Miss Rate')
    plt.plot(iteration_values, dt_miss_rate_list, '-s', color='red', label='DT Miss Rate')  # Using different marker for clarity


    plt.xlabel('iteration')
    plt.ylabel('Rate')
    plt.title('NN-MR(%) & DT-MR(%) vs. iteration')
    plt.legend()  # To distinguish between the two lines on the plot
    plt.grid(True, which="both", ls="--", c='0.7')  # Adding grid for better readability
    plt.savefig(f"../output_iscx_new/"+str(feature)+"_NN_DT_MR_vs_iteration.png",dpi=dpi_value)
    plt.show()
    

    plt.figure(figsize=(10, 6))  # Increase figure size

    l2_distances = []

    for idx in range(len(iteration_values)):
        unsuccessful_samples_indices = unsuccessful_samples_indices_list[idx]
        malware_scale = np.delete(X_test_malware, unsuccessful_samples_indices, axis=0)
        adversarial_samples_scale = adversarial_samples_list[idx]
         # Check if the adversarial_samples_scale is empty
        if len(adversarial_samples_scale) == 0:
            print(f"Skipping iteration {idx} due to empty adversarial samples.")
            l2_distances.append(float('nan'))  # append NaN or some placeholder value for the missing data point
            continue
        adversarial_samples_scale_arr = np.array(adversarial_samples_scale)
        malware_scale=malware_scale.astype(np.float32)
        adversarial_samples_ori_space = scaler.inverse_transform(adversarial_samples_scale_arr)
        malware_samples_ori = scaler.inverse_transform(malware_scale)
        
        # Compute L2 distance for SrcBytes
        # l2_distance = np.sqrt(np.sum((malware_samples_ori[:,7] - adversarial_samples_ori_space[:,7])**2))
        # l2_distances.append(l2_distance)

        l2_dist = np.linalg.norm(malware_samples_ori - adversarial_samples_ori_space, axis=1).mean()
        l2_distances.append(l2_dist)

    # Plotting

    plt.figure(figsize=(10, 6))  # Increase figure size

    # Primary y-axis settings (L2 Distance)
    ax1 = plt.gca()
    color = 'tab:red'
    ax1.set_xlabel('Iterations')
    ax1.set_ylabel('L2 Distance', color=color)
    ax1.plot(iteration_values, l2_distances, color=color, linewidth=2, linestyle='--', marker='o')
    ax1.tick_params(axis='y', labelcolor=color)
    ax1.grid(True, alpha=0.2)

    # Secondary y-axis settings (Misclassification Rate)
    ax2 = ax1.twinx()
    color = 'tab:blue'
    ax2.set_ylabel('Misclassification Rate', color=color)
    ax2.plot(iteration_values, misclassification_rates, color=color, linewidth=2, linestyle='-.', marker='x')
    ax2.tick_params(axis='y', labelcolor=color)

    # Adjust title and display
    plt.title('Trade-off: Perturbation Magnitude vs Misclassification Rate', fontsize=16)
    plt.tight_layout()  # Ensure no overlap of labels and title
    plt.savefig(f"../output_iscx/"+str(feature)+"_L2_vs_Misclassification_Rate.png",dpi=dpi_value)
    plt.show()


In [None]:
with open('srcBytes_adv.pkl','wb') as file:
    pickle.dump(adversarial_samples_list,file)