In [None]:
# Imports

import base64
from utils import (
    get_rules_list,
    create_train_test_split,
    create_model,
    payload_to_vec,
    predict_vec,
)
from modsec import init_modsec

from sklearn.ensemble import RandomForestClassifier  # type: ignore
from wafamole.evasion import EvasionEngine  # type: ignore

In [None]:
# Set up variables

# TODO: handle large files
attack_data_path = "data/attacks_20k.sql"
sane_data_path = "data/sanes_20k.sql"

rule_ids = get_rules_list()
modsec = init_modsec()

In [None]:
# Create train and test datasets OR load them from disk

train, test = create_train_test_split(
    attack_file=attack_data_path,
    sane_file=sane_data_path,
    train_size=5000,
    test_size=1000,
    modsec=modsec,
    rule_ids=rule_ids,
)

In [None]:
# Create + Train + Eval a RF model

wafamole_model, threshold = create_model(
    train=train,
    test=test,
    model=RandomForestClassifier(n_estimators=160, random_state=666),
    desired_fpr=0.01,
    modsec=modsec,
    rule_ids=rule_ids,
)

In [None]:
engine = EvasionEngine(wafamole_model)
# payload = "UPDATE `tab` SET `col1` = 1 WHERE `col3` >= 1110573056 LIMIT 516358144;" # sane
# payload = "SELECT `col1` FROM `tab` WHERE `col1` LIKE '%'s'%';" # attack
payload = 'SELECT SLEEP(5)#";'  # attack


# Test payload without evasion
payload_base64 = base64.b64encode(payload.encode("utf-8")).decode("utf-8")
vec = payload_to_vec(payload_base64, rule_ids, modsec)
is_attack = wafamole_model.classify(payload)
print(f"Payload: {payload}")
print(f"Vec: {vec}")
print(f"Confidence: {round(is_attack, 5)}")

In [None]:
# Try and evade the WAF with WAFamole

# TODO: decide if we want to just evade (reach threshold) or if we want to minimize confidence until convergence
# Probably the latter, but we need to decide on a stopping criterion (time out probably)
min_confidence, min_payload = engine.evaluate(
    payload=payload,
    max_rounds=200,
    round_size=10,
    timeout=120,
    threshold=threshold,
)
print()
print(f"Min payload: {min_payload.encode('utf-8')}")
print(f"Min confidence: {round(min_confidence, 5)}")
print()
print(
    f"Reduced confidence from {round(is_attack, 5)} to {round(min_confidence, 5)} (reduction of {round(is_attack - min_confidence, 5)})"
)

print("Evasion successful" if min_confidence < threshold else "Evasion failed")