In [None]:
# Imports

import base64
from utils import get_rules_list, create_train_test_split, payload_to_vec, predict_vec
from modsec import init_modsec

from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report
from wafamole.models import Model
from wafamole.evasion import EvasionEngine

Process ForkPoolWorker-8:
Process ForkPoolWorker-6:
Process ForkPoolWorker-12:
Process ForkPoolWorker-10:
Process ForkPoolWorker-3:
Process ForkPoolWorker-7:
Process ForkPoolWorker-13:
Process ForkPoolWorker-5:
Process ForkPoolWorker-11:
Process ForkPoolWorker-9:
Process ForkPoolWorker-4:
Process ForkPoolWorker-2:
Process ForkPoolWorker-15:
Process ForkPoolWorker-14:
Process ForkPoolWorker-1:
Traceback (most recent call last):
Traceback (most recent call last):
Process ForkPoolWorker-16:
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
  File "/usr/local/lib/python3.8/multiprocessing/process.py", line 315, in _bootstrap
    self.run()
  File "/usr/local/lib/python3.8/multiprocessing/process.py", line 315, in _bootstrap
    s

In [2]:
# Set up variables

# TODO: handle large files
attack_data_path = "data/attacks_20k.sql"
sane_data_path = "data/sanes_20k.sql"

rule_ids = get_rules_list()
modsec = init_modsec()

In [3]:
# Create train and test datasets OR load them from disk

train, test = create_train_test_split(
    attack_file=attack_data_path,
    sane_file=sane_data_path,
    train_size=5000,
    test_size=1000,
    modsec=modsec,
    rule_ids=rule_ids,
)

# Create a RF model

X_train = list(train["vector"])
y_train = train["label"]
X_test = list(test["vector"])
y_test = test["label"]

# create and train the Random Forest model
# number of trees is set to 160 for PLs other than PL1 as per the paper
model = RandomForestClassifier(n_estimators=160, random_state=666)
model.fit(X_train, y_train)
print("Model trained successfully!")

print(classification_report(y_test, model.predict(X_test)))

# free the memory of unneeded data
del train, test
del X_train, y_train, X_test, y_test

Reading and parsing data...
Full data shape: (13504, 2)
Splitting into train and test...
Creating vectors...


Processing payloads: 100%|██████████| 5000/5000 [01:31<00:00, 54.63it/s]
Processing payloads: 100%|██████████| 1000/1000 [00:39<00:00, 25.11it/s]


Done!
Train shape: (5000, 3) | Test shape: (1000, 3)
Model trained successfully!
              precision    recall  f1-score   support

      attack       0.96      0.95      0.95       516
        sane       0.95      0.96      0.95       484

    accuracy                           0.95      1000
   macro avg       0.95      0.95      0.95      1000
weighted avg       0.95      0.95      0.95      1000



In [4]:
# Create WAFamole model
class WAFamoleModel(Model):
    # TODO: rework predict payload to take vec ?
    def extract_features(self, value: str):
        payload_base64 = base64.b64encode(value.encode("utf-8")).decode("utf-8")
        return payload_to_vec(
            payload_base64=payload_base64, rule_ids=rule_ids, modsec=modsec
        )

    def classify(self, value: str):
        vec = self.extract_features(value)
        return predict_vec(
            vec=vec,
            model=model,
            rule_ids=rule_ids,
            modsec=modsec,
        )

In [5]:
# Create WAFamole evasion engine
wafamole_model = WAFamoleModel()
engine = EvasionEngine(wafamole_model)

In [6]:
# payload = "UPDATE `tab` SET `col1` = 1 WHERE `col3` >= 1110573056 LIMIT 516358144;" # sane
# payload = "SELECT `col1` FROM `tab` WHERE `col1` LIKE '%'s'%';" # attack
payload = 'SELECT SLEEP(5)#";'  # attack


# Test payload without evasion
payload_base64 = base64.b64encode(payload.encode("utf-8")).decode("utf-8")
vec = payload_to_vec(payload_base64, rule_ids, modsec)
is_attack = predict_vec(
    vec=vec,
    model=model,
    rule_ids=rule_ids,
    modsec=modsec,
)
print(f"Payload: {payload}")
print(f"Vec: {vec}")
print(f"Confidence: {round(is_attack, 5)}")

Payload: SELECT SLEEP(5)#";
Vec: [0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 1 0 0 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 1 0 0 0 0 0 1 0 0 0 1 0 0 0 0 1 0 0 1 0 0 0 0 0 0 0 0 0 0 1 0 0]
Confidence: 0.96094


In [8]:
# Try and evade the WAF with WAFamole

min_confidence, min_payload = engine.evaluate(
    payload=payload,
    max_rounds=200,
    round_size=10,
    timeout=60,
    threshold=0.5,
)
print()
print(f"Min payload: {min_payload.encode('utf-8')}")
print(f"Min confidence: {round(min_confidence, 5)}")
print()
print(
    f"Reduced confidence from {round(is_attack, 5)} to {round(min_confidence, 5)} (reduction of {round(is_attack - min_confidence, 5)})"
)

[!] Execution timed out
Reached confidence 0.5224327165757217
with payload
SeLECt sLEEP((sELECt
0x5))  ANd  0X1#";7k0	z 

Min payload: b'SeLECt sLEEP((sELECt\n0x5))  ANd  0X1#";7k0\tz '
Min confidence: 0.52243

Reduced confidence from 0.96094 to 0.52243 (reduction of 0.4385)
