In [2]:
import numpy as np
import pandas as pd
import xgboost as xgb
import joblib

In [3]:
# Load the dataset and pre-process it
df = pd.read_csv('/home/masharifin/Documents/ComprehensiveExam/Datasets/ObfuscatedMalMem2022.csv')
df['Class'] = df['Class'].replace({'Benign': 0, 'Malware': 1})

In [4]:
df

Unnamed: 0,pslist.nproc,pslist.nppid,pslist.avg_threads,pslist.nprocs64bit,pslist.avg_handlers,dlllist.ndlls,dlllist.avg_dlls_per_proc,handles.nhandles,handles.avg_handles_per_proc,handles.nport,...,svcscan.kernel_drivers,svcscan.fs_drivers,svcscan.process_services,svcscan.shared_process_services,svcscan.interactive_process_services,svcscan.nactive,callbacks.ncallbacks,callbacks.nanonymous,callbacks.ngeneric,Class
0,45,17,10.555556,0,202.844444,1694,38.500000,9129,212.302326,0,...,221,26,24,116,0,121,87,0,8,0
1,47,19,11.531915,0,242.234043,2074,44.127660,11385,242.234043,0,...,222,26,24,118,0,122,87,0,8,0
2,40,14,14.725000,0,288.225000,1932,48.300000,11529,288.225000,0,...,222,26,27,118,0,120,88,0,8,0
3,32,13,13.500000,0,264.281250,1445,45.156250,8457,264.281250,0,...,222,26,27,118,0,120,88,0,8,0
4,42,16,11.452381,0,281.333333,2067,49.214286,11816,281.333333,0,...,222,26,24,118,0,124,87,0,8,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
58591,37,15,10.108108,0,215.486487,1453,39.270270,7973,215.486487,0,...,221,26,24,116,0,120,86,0,8,1
58592,37,14,9.945946,0,190.216216,1347,36.405405,7038,190.216216,0,...,221,26,24,116,0,116,88,0,8,1
58593,38,15,9.842105,0,210.026316,1448,38.105263,7982,215.729730,0,...,221,26,24,116,0,120,88,0,8,1
58594,37,15,10.243243,0,215.513513,1452,39.243243,7974,215.513513,0,...,221,26,24,116,0,120,87,0,8,1


In [None]:
# Load the pre-trained XGBoost model
model_path = 'Binary_security_xgb.sav'
xgboost_model = joblib.load(model_path)

# Function to perform JSMA attack
def jsma_attack(model, sample, target_class, max_iters=1000):
    perturbed_sample = sample.copy()
    num_features = len(sample)
    target_class = int(target_class)
    
    # Calculate the saliency map
    saliency = np.zeros(num_features)
    for i in range(num_features):
        perturbed_sample[i] = 1
        pred = model.predict(np.array(perturbed_sample).reshape(1, -1))
        if pred != target_class:
            saliency[i] = 1
        perturbed_sample[i] = 0
    
    # Generate the adversarial example
    perturbed_sample = sample.copy()
    iter_count = 0
    while perturbed_sample[target_class] != 1 and iter_count < max_iters:
        salient_feature = np.argmax(saliency)
        perturbed_sample[salient_feature] = 1
        iter_count += 1
        
        # Update the saliency map
        for i in range(num_features):
            perturbed_sample[i] = 1
            pred = model.predict(np.array(perturbed_sample).reshape(1, -1))
            if pred == target_class:
                saliency[i] = 0
            perturbed_sample[i] = 0
    
    return perturbed_sample


# Create empty lists to store adversarial samples and evasion rates
adversarial_samples = []
evasion_rates = []

# Loop through all samples in the dataset for the JSMA attack
for i, sample in df.iterrows():
    sample_data = sample.drop('Class').values
    target_class = 1  # Specify the target class for the attack
    
    # Perform the JSMA attack
    adversarial_sample = jsma_attack(xgboost_model, sample_data, target_class)
    
    # Evaluate the predictions
    original_prediction = xgboost_model.predict(np.array(sample_data).reshape(1, -1))
    adversarial_prediction = xgboost_model.predict(np.array(adversarial_sample).reshape(1, -1))

    print("Sample:", i)
    print("Original Prediction:", original_prediction)
    print("Adversarial Prediction:", adversarial_prediction)

    # Print the adversarial sample and evasion rate
    print("Adversarial Sample:")
    print(adversarial_sample)
    evasion_rate = 100.0 * np.sum(original_prediction != adversarial_prediction) / len(sample_data)
    print("Evasion Rate: {:.2f}%".format(evasion_rate))
    print("-" * 30)

    # Append the adversarial sample and evasion rate to the lists
    adversarial_samples.append(adversarial_sample)
    evasion_rates.append(evasion_rate)

# Convert the lists to NumPy arrays
adversarial_samples = np.array(adversarial_samples)
evasion_rates = np.array(evasion_rates)

# Save the adversarial samples and evasion rates to a new CSV file
df_adversarial = pd.DataFrame(adversarial_samples, columns=df.columns.drop('Class'))
df_adversarial['Class'] = 1  # Set the class label to 1 for all adversarial samples
df_adversarial.to_csv('/home/masharifin/Documents/ComprehensiveExam/Datasets/AdversarialObfuscatedMalMem2022.csv', index=False)

# Save the evasion rates to a text file
np.savetxt('/home/masharifin/Documents/ComprehensiveExam/Datasets/EvasionRates.txt', evasion_rates)


In [2]:
print("Number of samples in the dataset:", len(df))

Number of samples in the dataset: 58596
