In [1]:
import numpy as np
import pandas as pd
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Input, LSTM, RepeatVector, Embedding, TimeDistributed, Dense, Dropout
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras import backend as K
from sklearn.metrics import classification_report, confusion_matrix
import ast

In [2]:
df = pd.read_csv('Bruteforce_CWE-307.csv')
b_syscalls = []
print(len(df['syscalls']))

1092


In [3]:
attack = []
normal = []
for i in range(len(df)):
    calls = ast.literal_eval(df.iloc[i]['syscalls'])
    check = df.iloc[i]['is_exploit']
    
    temp_list = []
    for j in range(len(calls)):
        temp_list.append(calls[j]['name'])
    if check:
        attack.append(temp_list)
    else:
        normal.append(temp_list)

In [4]:
both_lists = attack + normal
tokenizer = Tokenizer()
tokenizer.fit_on_texts(both_lists)
word_index = tokenizer.word_index


In [5]:
X_train = normal[:500]
X_val = normal[500:750]
X_test = normal[750:] + attack
tokened_Xtrain = tokenizer.texts_to_sequences(X_train)
tokened_Xtest = tokenizer.texts_to_sequences(X_test)
tokened_Xval = tokenizer.texts_to_sequences(X_val)
max_length = 5000
X_train_padded = pad_sequences(tokened_Xtrain, maxlen=max_length, padding='post')
X_test_padded = pad_sequences(tokened_Xtest, maxlen=max_length, padding='post')
X_val_padded = pad_sequences(tokened_Xval, maxlen=max_length, padding='post')


In [6]:
K = len(word_index) + 1

In [7]:
model = Sequential()
model.add(Embedding(input_dim=K, output_dim=400, input_length=None))
model.add(LSTM(400, return_sequences=True))
model.add(Dropout(0.5))
model.add(Dense(K, activation='softmax'))  

early_stopping = EarlyStopping(
    monitor='val_loss',  # Metric to monitor
    patience=3,          # Number of epochs with no improvement after which training will be stopped
    restore_best_weights=True  # Restore the model weights from the epoch with the best value of the monitored metric
)

# Compile the model
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['sparse_categorical_crossentropy'])

In [8]:
def add_train_labels(x):
        return x[:-1], x[1:]
    
train = [add_train_labels(seq) for seq in X_train_padded]
val = [add_train_labels(seq) for seq in X_val_padded]

train_inputs, train_targets = zip(*train)
val_inputs, val_targets = zip(*val)

# Convert to numpy arrays
train_inputs = np.array(train_inputs)
train_targets = np.array(train_targets)
val_inputs = np.array(val_inputs)
val_targets = np.array(val_targets)



In [9]:
model.fit(
            train_inputs,
            train_targets,
            validation_data=(val_inputs, val_targets),
            epochs=150,
            verbose=2,
            shuffle=True,
            callbacks=[early_stopping],
        )


Epoch 1/150
16/16 - 175s - loss: 3.0972 - sparse_categorical_crossentropy: 3.0972 - val_loss: 5.9580 - val_sparse_categorical_crossentropy: 5.9580 - 175s/epoch - 11s/step
Epoch 2/150
16/16 - 179s - loss: 2.7108 - sparse_categorical_crossentropy: 2.7108 - val_loss: 2.2219 - val_sparse_categorical_crossentropy: 2.2219 - 179s/epoch - 11s/step
Epoch 3/150
16/16 - 187s - loss: 2.0575 - sparse_categorical_crossentropy: 2.0575 - val_loss: 1.7677 - val_sparse_categorical_crossentropy: 1.7677 - 187s/epoch - 12s/step
Epoch 4/150
16/16 - 181s - loss: 1.6507 - sparse_categorical_crossentropy: 1.6507 - val_loss: 1.3904 - val_sparse_categorical_crossentropy: 1.3904 - 181s/epoch - 11s/step
Epoch 5/150
16/16 - 180s - loss: 1.3351 - sparse_categorical_crossentropy: 1.3351 - val_loss: 1.1083 - val_sparse_categorical_crossentropy: 1.1083 - 180s/epoch - 11s/step
Epoch 6/150
16/16 - 188s - loss: 1.0988 - sparse_categorical_crossentropy: 1.0988 - val_loss: 0.9045 - val_sparse_categorical_crossentropy: 0.904

<keras.src.callbacks.History at 0x2da7abe20>

In [10]:
test = X_test_padded
print(len(test))
y_pred = model.predict(test)
print(y_pred.shape)

342
(342, 5000, 46)


In [29]:
probs = np.array([pred.max(axis=-1).prod(axis=-1) for pred in y_pred])
print(probs[0])
final_prob = np.clip(-np.log2(probs), a_min=0, a_max=1e100)
print(final_prob[0])

0.0
1e+100


  final_prob = np.clip(-np.log2(probs), a_min=0, a_max=1e100)


In [26]:
print(y_pred[0])

[[6.2909007e-02 3.0511871e-02 3.1420857e-02 ... 1.0055612e-02
  4.1131340e-03 7.1503883e-03]
 [2.4179790e-02 5.8542420e-03 6.5182569e-03 ... 4.4699226e-04
  1.0418598e-04 3.8687792e-04]
 [2.8951035e-03 1.1088334e-03 1.7825197e-03 ... 2.9526534e-04
  2.6369446e-05 3.1348824e-05]
 ...
 [7.8854818e-08 2.4499556e-07 1.5443118e-05 ... 9.8525907e-09
  1.0491609e-08 1.0637860e-08]
 [7.4043932e-08 1.4120008e-07 7.1686291e-06 ... 1.1938152e-09
  3.1653067e-09 4.5945625e-09]
 [1.3857191e-06 7.8768545e-07 3.0243014e-05 ... 2.1421108e-08
  4.8178840e-08 2.7494286e-08]]


In [33]:
maxes = np.array([pred.max(axis=-1) for pred in y_pred])
print(maxes[0)

[0.28104645 0.8564893  0.8017733  ... 0.9995943  0.9992286  0.60562146]


In [11]:
sums = np.array([-np.log(pred.max(axis=-1)).sum(axis=-1) for pred in y_pred])


In [12]:
val = X_val_padded
y_val = model.predict(val)
sums_val = np.array([-np.log(pred.max(axis=-1)).sum(axis=-1) for pred in y_val])




In [13]:
threshold = np.mean(sums_val) + 2*np.std(sums_val)
print(threshold)

582.3398284912109


In [19]:
successes = 0
total = len(test)
pred = []
norms_len = total - len(attack)
testY = []
for i in range(total):
    if sums[i] <= threshold:
        pred.append(0)
    else:
        pred.append(1)
    if i < norms_len:
        testY.append(0)
        if sums[i] <= threshold:
            successes += 1
    else:
        testY.append(1)
        if sums[i] > threshold:
            successes += 1
print(successes)
print(total)
print(successes / total)

323
342
0.9444444444444444


In [15]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

In [21]:
precision = precision_score(testY, pred, pos_label=0)
recall = recall_score(testY, pred, pos_label=0)
f1 = f1_score(testY, pred, pos_label=0)
print("Precision: ",precision)
print("Recall: ",recall)
print("f1: ",f1)

Precision:  0.9591836734693877
Recall:  0.9631147540983607
f1:  0.9611451942740287


In [18]:
print(test)
print(pred)

[[15 15  8 ...  3  3  4]
 [ 5  5 11 ...  1  1  4]
 [20 19 19 ...  0  0  0]
 ...
 [ 2  4  4 ... 17 17  4]
 [15  5  5 ...  3 33 34]
 [ 8 15 15 ... 19 19 20]]
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 

In [22]:
df2 = pd.read_csv('CVE-2012-2122.csv')
attack = []
normal = []
for i in range(len(df2)):
    calls = ast.literal_eval(df2.iloc[i]['syscalls'])
    check = df2.iloc[i]['is_exploit']
    
    temp_list = []
    for j in range(len(calls)):
        temp_list.append(calls[j]['name'])
    if check:
        attack.append(temp_list)
    else:
        normal.append(temp_list)

both_lists = attack + normal
tokenizer = Tokenizer()
tokenizer.fit_on_texts(both_lists)
word_index = tokenizer.word_index

X_train = normal[:850]
X_val = normal[850:1050]
X_test = normal[1050:] + attack
tokened_Xtrain = tokenizer.texts_to_sequences(X_train)
tokened_Xtest = tokenizer.texts_to_sequences(X_test)
tokened_Xval = tokenizer.texts_to_sequences(X_val)
max_length = 7500
X_train_padded = pad_sequences(tokened_Xtrain, maxlen=max_length, padding='post')
X_test_padded = pad_sequences(tokened_Xtest, maxlen=max_length, padding='post')
X_val_padded = pad_sequences(tokened_Xval, maxlen=max_length, padding='post')

K = len(word_index) + 1

In [23]:
train = [add_train_labels(seq) for seq in X_train_padded]
val = [add_train_labels(seq) for seq in X_val_padded]

train_inputs, train_targets = zip(*train)
val_inputs, val_targets = zip(*val)

# Convert to numpy arrays
train_inputs = np.array(train_inputs)
train_targets = np.array(train_targets)
val_inputs = np.array(val_inputs)
val_targets = np.array(val_targets)

In [24]:
model.fit(
            train_inputs,
            train_targets,
            validation_data=(val_inputs, val_targets),
            epochs=50,
            verbose=2,
            shuffle=True,
            callbacks=[early_stopping],
        )


Epoch 1/150
27/27 - 404s - loss: 0.9476 - sparse_categorical_crossentropy: 0.9476 - val_loss: 0.4495 - val_sparse_categorical_crossentropy: 0.4495 - 404s/epoch - 15s/step
Epoch 2/150
27/27 - 406s - loss: 0.4021 - sparse_categorical_crossentropy: 0.4021 - val_loss: 0.3539 - val_sparse_categorical_crossentropy: 0.3539 - 406s/epoch - 15s/step
Epoch 3/150
27/27 - 374s - loss: 0.3403 - sparse_categorical_crossentropy: 0.3403 - val_loss: 0.3121 - val_sparse_categorical_crossentropy: 0.3121 - 374s/epoch - 14s/step
Epoch 4/150
27/27 - 380s - loss: 0.3062 - sparse_categorical_crossentropy: 0.3062 - val_loss: 0.2828 - val_sparse_categorical_crossentropy: 0.2828 - 380s/epoch - 14s/step
Epoch 5/150
27/27 - 371s - loss: 0.2826 - sparse_categorical_crossentropy: 0.2826 - val_loss: 0.2624 - val_sparse_categorical_crossentropy: 0.2624 - 371s/epoch - 14s/step
Epoch 6/150
27/27 - 373s - loss: 0.2658 - sparse_categorical_crossentropy: 0.2658 - val_loss: 0.2484 - val_sparse_categorical_crossentropy: 0.248

<keras.src.callbacks.History at 0x2d8873400>

In [25]:
val = X_val_padded
y_val = model.predict(val)
sums_val = np.array([-np.log(pred.max(axis=-1)).sum(axis=-1) for pred in y_val])
threshold = np.mean(sums_val) + *np.std(sums_val)
print(threshold)

969.7884521484375


In [27]:
test = X_test_padded
y_pred = model.predict(test)
sums = np.array([-np.log(pred.max(axis=-1)).sum(axis=-1) for pred in y_pred])



In [29]:
threshold = np.mean(sums_val) + np.std(sums_val)
successes = 0
total = len(test)
pred = []
norms_len = total - len(attack)
testY = []
for i in range(total):
    if sums[i] <= threshold:
        pred.append(0)
    else:
        pred.append(1)
    if i < norms_len:
        testY.append(0)
        if sums[i] <= threshold:
            successes += 1
    else:
        testY.append(1)
        if sums[i] > threshold:
            successes += 1
print(successes)
print(total)
print(successes / total)

260
345
0.7536231884057971


In [60]:
print(len(normal))

1240


In [30]:
precision = precision_score(testY, pred, pos_label=0)
recall = recall_score(testY, pred, pos_label=0)
f1 = f1_score(testY, pred, pos_label=0)
print("Precision: ",precision)
print("Recall: ",recall)
print("f1: ",f1)

Precision:  0.7333333333333333
Recall:  0.868421052631579
f1:  0.7951807228915663
