# Zero-Day URL Detection Model


In [1]:
# 1. Imports & Configuration
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score, average_precision_score, precision_recall_fscore_support
import json
import os
import warnings
import random
import time
from collections import Counter
from urllib.parse import urlparse



warnings.filterwarnings('ignore')

DATASET_FILE = r'..\datasets\combined_dataset.csv'


DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {DEVICE}")


def set_seed(seed=42):
    torch.manual_seed(seed)
    np.random.seed(seed)
    random.seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(seed)

set_seed(42)
print("Random seeds set.")

NUM_WORKERS = 0


AE_OUTPUT_DIR = 'ae_artifacts'
AE_MODEL_FILE = os.path.join(AE_OUTPUT_DIR, 'char_ae_best.pt')
AE_THRESH_FILE = os.path.join(AE_OUTPUT_DIR, 'ae_threshold.json')
CHAR_VOCAB_FILE = os.path.join(AE_OUTPUT_DIR, 'char_vocab.json')
AE_BUNDLE_FILE = os.path.join(AE_OUTPUT_DIR, 'bundle.json')


HYB_MAX_LEN = 256           
VOCAB_MAX = 128             
VOCAB_BUILD_SAMPLE = 300000 

AE_EMB = 64
AE_LATENT = 128
AE_EPOCHS = 30
AE_BATCH_SIZE = 1024
AE_LR = 3e-4


AE_EARLY_STOPPING_PATIENCE = 5
AE_EARLY_STOPPING_MIN_DELTA = 0.001


AE_THRESHOLD_PCTL = 99.5


USE_AMP = True

os.makedirs(AE_OUTPUT_DIR, exist_ok=True)
print(f"AE artifacts dir: {AE_OUTPUT_DIR}")

Using device: cuda
Random seeds set.
AE artifacts dir: ae_artifacts


## 2. Data Loading & Analysis


In [2]:

if os.path.exists(DATASET_FILE):
    df_raw = pd.read_csv(DATASET_FILE)
    print(f" Loaded {DATASET_FILE}")
    print(f"Raw Shape: {df_raw.shape}")
else:
    print(f" Dataset NOT found at {DATASET_FILE}!")
    df_raw = pd.DataFrame(columns=['URL', 'label', 'Method'])


 Loaded ..\datasets\combined_dataset.csv
Raw Shape: (7007263, 16)


In [3]:
df_raw.info()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7007263 entries, 0 to 7007262
Data columns (total 16 columns):
 #   Column           Dtype  
---  ------           -----  
 0   Method           object 
 1   User-Agent       object 
 2   Pragma           object 
 3   Cache-Control    object 
 4   Accept           object 
 5   Accept-Encoding  float64
 6   Accept-Charset   float64
 7   Accept-Language  float64
 8   Host             float64
 9   Cookie           float64
 10  Connection       float64
 11  content-length   float64
 12  content          object 
 13  URL              object 
 14  type             object 
 15  label            int64  
dtypes: float64(7), int64(1), object(8)
memory usage: 855.4+ MB


In [4]:
print("Missing Values per Column:")
print(df_raw.isnull().sum())


Missing Values per Column:
Method                   0
User-Agent               0
Pragma                   0
Cache-Control            0
Accept                   0
Accept-Encoding    7007263
Accept-Charset     7007263
Accept-Language    7007263
Host               7007263
Cookie             7007263
Connection         7007263
content-length     7007263
content            4255600
URL                      0
type                     0
label                    0
dtype: int64


In [5]:
dupes = df_raw.duplicated().sum()
print(f"Duplicate Rows: {dupes}")


Duplicate Rows: 0


In [6]:
df_raw.drop_duplicates(inplace=True)

In [7]:
if 'label' in df_raw.columns:
    print(df_raw['label'].value_counts())
    print("\nPercentage:")
    print(df_raw['label'].value_counts(normalize=True) * 100)


label
0    4000000
1    3007263
Name: count, dtype: int64

Percentage:
label
0    57.083629
1    42.916371
Name: proportion, dtype: float64


## 3. Cleaning & Separation


In [8]:
required_cols = ['URL', 'Method', 'label']
df_clean = df_raw[required_cols].copy()


df_clean.drop_duplicates(inplace=True)


df_clean.dropna(inplace=True)

print(f"Final Clean Shape: {df_clean.shape}")


Final Clean Shape: (7007263, 3)


In [9]:
df_clean.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7007263 entries, 0 to 7007262
Data columns (total 3 columns):
 #   Column  Dtype 
---  ------  ----- 
 0   URL     object
 1   Method  object
 2   label   int64 
dtypes: int64(1), object(2)
memory usage: 160.4+ MB


In [10]:

print("Missing Values per Column:")
print(df_clean.isnull().sum())


Missing Values per Column:
URL       0
Method    0
label     0
dtype: int64


In [11]:

if 'label' in df_clean.columns:
    print(df_clean['label'].value_counts())
    print("\nPercentage:")
    print(df_clean['label'].value_counts(normalize=True) * 100)


label
0    4000000
1    3007263
Name: count, dtype: int64

Percentage:
label
0    57.083629
1    42.916371
Name: proportion, dtype: float64


In [12]:
clean_file_path = '../datasets/cleaned_dataset.csv'
df_clean.to_csv(clean_file_path, index=False)
print(f" Cleaned dataset saved to {clean_file_path}")
print(f"   Rows: {len(df_clean)}")


 Cleaned dataset saved to ../datasets/cleaned_dataset.csv
   Rows: 7007263


In [13]:
df_hybrid = df_clean.copy()
print(" Created 'df_hybrid' copy.")


 Created 'df_hybrid' copy.


In [14]:
print("Hybrid Data Sample:")
display(df_hybrid.head(10))

Hybrid Data Sample:


Unnamed: 0,URL,Method,label
0,https://amazon.com/gp/product?ref=Axr8gz&k=614...,GET,0
1,https://auth.provider.io/admin?sort=%2e%2e%2f%...,GET,1
2,https://bank-secure.com/path/to/resource?attac...,GET,1
3,http://support.helpdesk.net/dashboard?doc=..%5...,PUT,1
4,https://dev-environment.local?cmd=</nowiki>,DELETE,1
5,"http://api.services.io?doc=UNION ALL SELECT 1,...",PUT,1
6,https://reddit.com/r/news?limit=X6hZqgr&req_id...,GET,0
7,https://twitter.com/explore?s=a1RNOj9mwl&sid=f...,POST,0
8,https://twitter.com/home?t=6437145&ref_src=2Qh...,POST,0
9,https://amazon.com/cart?ref=1681387&k=eS1qstrX...,GET,0


## 4. Deep Learning Pipeline ( Autoencoder Zero-Day)
- **Unsupervised Char Autoencoder** trained on **benign only** to detect zero-day requests


In [15]:
class EarlyStopper:
    def __init__(self, patience=3, min_delta=0.0, mode='max'):
        self.patience = patience
        self.min_delta = min_delta
        self.mode = mode 
        self.best = None
        self.counter = 0
        self.best_epoch = 0
        
    def step(self, metric, epoch=None):
        if self.best is None:
            self.best = metric
            self.best_epoch = epoch if epoch else 0
            return False
        
        if self.mode == 'max':
            improved = metric > self.best + self.min_delta
        else:
            improved = metric < self.best - self.min_delta
            
        if improved:
            self.best = metric
            self.best_epoch = epoch if epoch else self.best_epoch + 1
            self.counter = 0
            return False
        else:
            self.counter += 1
            return self.counter >= self.patience

In [16]:
def build_text(url, method):
    url = "" if pd.isna(url) else str(url)
    method = "" if pd.isna(method) else str(method)
    
    
    try:
        parsed = urlparse(url)
        domain = parsed.netloc if parsed.netloc else ""
        path_query = parsed.path + ("?" + parsed.query if parsed.query else "")
    except:
        domain = ""
        path_query = url
    
    return f"METHOD={method} | PATH={path_query}"

df_hybrid["__text__"] = [
    build_text(u, m)
    for u, m in zip(df_hybrid["URL"].values, df_hybrid["Method"].values)
]
y_h = df_hybrid["label"].astype(int).values

print("Hybrid df:", df_hybrid.shape, "label counts:", pd.Series(y_h).value_counts().to_dict())


Hybrid df: (7007263, 4) label counts: {0: 4000000, 1: 3007263}


In [17]:

idx = np.arange(len(df_hybrid))
idx_train, idx_test = train_test_split(idx, test_size=0.10, random_state=42, stratify=y_h)

y_train = y_h[idx_train]
idx_train, idx_val = train_test_split(idx_train, test_size=0.10, random_state=42, stratify=y_train)

train_texts = df_hybrid["__text__"].values[idx_train]
val_texts   = df_hybrid["__text__"].values[idx_val]
test_texts  = df_hybrid["__text__"].values[idx_test]

train_y = y_h[idx_train]
val_y   = y_h[idx_val]
test_y  = y_h[idx_test]

print("train/val/test:", len(train_texts), len(val_texts), len(test_texts))


train/val/test: 5675882 630654 700727


In [18]:

sample_n = min(VOCAB_BUILD_SAMPLE, len(train_texts))
rng = np.random.default_rng(42)
sample_idx = rng.choice(len(train_texts), size=sample_n, replace=False)
sample_texts = [train_texts[i] for i in sample_idx]

cnt = Counter()
for t in sample_texts:
    cnt.update(list(t))

PAD = "<PAD>"
UNK = "<UNK>"
most_common = [ch for ch, _ in cnt.most_common(VOCAB_MAX - 2)]

itos = [PAD, UNK] + most_common
stoi = {ch: i for i, ch in enumerate(itos)}
pad_id = stoi[PAD]
unk_id = stoi[UNK]
vocab_size = len(itos)

print("vocab_size:", vocab_size, "pad_id:", pad_id, "unk_id:", unk_id)

# Save vocab
with open(CHAR_VOCAB_FILE, "w", encoding="utf-8") as f:
    json.dump({"itos": itos}, f, ensure_ascii=False, indent=2)
print(" Saved char vocab to", CHAR_VOCAB_FILE)


vocab_size: 123 pad_id: 0 unk_id: 1
 Saved char vocab to ae_artifacts\char_vocab.json


In [19]:

def encode_text_to_ids(text: str, max_len: int):
    text = "" if text is None else str(text)
    ids = [stoi.get(ch, unk_id) for ch in text[:max_len]]
    if len(ids) < max_len:
        ids += [pad_id] * (max_len - len(ids))
    return np.asarray(ids, dtype=np.int64)

class HybridTextDataset(Dataset):
    def __init__(self, texts, labels=None):
        self.texts = texts
        self.labels = labels

    def __len__(self):
        return len(self.texts)

    def __getitem__(self, idx):
        x = torch.from_numpy(encode_text_to_ids(self.texts[idx], HYB_MAX_LEN))
        if self.labels is None:
            return x
        y = torch.tensor(int(self.labels[idx]), dtype=torch.float32)
        return x, y


train_ds = HybridTextDataset(train_texts, train_y)
val_ds   = HybridTextDataset(val_texts, val_y)
test_ds  = HybridTextDataset(test_texts, test_y)

train_loader = DataLoader(train_ds, batch_size=AE_BATCH_SIZE, shuffle=True,
                          num_workers=NUM_WORKERS, pin_memory=True, drop_last=False)
val_loader   = DataLoader(val_ds, batch_size=AE_BATCH_SIZE, shuffle=False,
                          num_workers=NUM_WORKERS, pin_memory=True, drop_last=False)
test_loader  = DataLoader(test_ds, batch_size=AE_BATCH_SIZE, shuffle=False,
                          num_workers=NUM_WORKERS, pin_memory=True, drop_last=False)

print(" Dataloaders ready.")


 Dataloaders ready.


### 5. Unsupervised Model: Char Autoencoder (Benign-only)

In [20]:
benign_train_texts = train_texts[train_y == 0]
benign_val_texts   = val_texts[val_y == 0]
benign_test_texts  = test_texts[test_y == 0]

ae_train_loader = DataLoader(HybridTextDataset(benign_train_texts, labels=None),
                             batch_size=AE_BATCH_SIZE, shuffle=True, num_workers=NUM_WORKERS, pin_memory=True)
ae_val_loader   = DataLoader(HybridTextDataset(benign_val_texts, labels=None),
                             batch_size=AE_BATCH_SIZE, shuffle=False, num_workers=NUM_WORKERS, pin_memory=True)
ae_test_loader  = DataLoader(HybridTextDataset(benign_test_texts, labels=None),
                             batch_size=AE_BATCH_SIZE, shuffle=False, num_workers=NUM_WORKERS, pin_memory=True)

print("Benign rows (train/val/test):", len(benign_train_texts), len(benign_val_texts), len(benign_test_texts))


Benign rows (train/val/test): 3240000 360000 400000


In [21]:
class CharAutoencoder(nn.Module):
    def __init__(self, vocab_size: int, emb_dim: int, latent_dim: int, pad_id: int):
        super().__init__()
        self.emb = nn.Embedding(vocab_size, emb_dim, padding_idx=pad_id)

        self.enc_conv1 = nn.Conv1d(emb_dim, 128, 5, padding=2)
        self.enc_conv2 = nn.Conv1d(128, 128, 5, padding=2)
        self.enc_fc = nn.Linear(128, latent_dim)

        self.dec_fc = nn.Linear(latent_dim, 128)
        self.dec_conv1 = nn.Conv1d(128, 128, 5, padding=2)
        self.dec_out = nn.Conv1d(128, vocab_size, 1)

    def forward(self, x):
        e = self.emb(x).transpose(1, 2)             # (B, E, L)
        h = torch.relu(self.enc_conv1(e))           # (B, 128, L)
        h = torch.relu(self.enc_conv2(h))           # (B, 128, L)
        h_pool = torch.max(h, dim=2).values         # (B, 128)
        z = self.enc_fc(h_pool)                     # (B, latent)

        d = torch.relu(self.dec_fc(z))              # (B, 128)
        d = d.unsqueeze(2).repeat(1, 1, x.size(1))  # (B, 128, L)
        d = torch.relu(self.dec_conv1(d))           # (B, 128, L)
        logits = self.dec_out(d)                    # (B, vocab, L)
        return logits, z

ae = CharAutoencoder(vocab_size, AE_EMB, AE_LATENT, pad_id).to(DEVICE)
ae

CharAutoencoder(
  (emb): Embedding(123, 64, padding_idx=0)
  (enc_conv1): Conv1d(64, 128, kernel_size=(5,), stride=(1,), padding=(2,))
  (enc_conv2): Conv1d(128, 128, kernel_size=(5,), stride=(1,), padding=(2,))
  (enc_fc): Linear(in_features=128, out_features=128, bias=True)
  (dec_fc): Linear(in_features=128, out_features=128, bias=True)
  (dec_conv1): Conv1d(128, 128, kernel_size=(5,), stride=(1,), padding=(2,))
  (dec_out): Conv1d(128, 123, kernel_size=(1,), stride=(1,))
)

In [22]:
def train_ae(model, train_loader, val_loader=None):
    opt = torch.optim.AdamW(model.parameters(), lr=AE_LR)
    scaler = torch.cuda.amp.GradScaler(enabled=(USE_AMP and DEVICE.type == "cuda"))
    ce = nn.CrossEntropyLoss(ignore_index=pad_id, reduction="mean")
    
    early_stopper = EarlyStopper(
        patience=AE_EARLY_STOPPING_PATIENCE, 
        min_delta=AE_EARLY_STOPPING_MIN_DELTA,
        mode='min'
    )
    
    best_val_loss = float('inf')
    
    print(f"\n{'='*80}")
    print(f"Training Autoencoder for {AE_EPOCHS} epochs on {DEVICE}")
    print(f"Early Stopping: Patience={AE_EARLY_STOPPING_PATIENCE}, Min Delta={AE_EARLY_STOPPING_MIN_DELTA}")
    print(f"{'='*80}\n")
    
    for epoch in range(1, AE_EPOCHS + 1):
        model.train()
        total, n = 0.0, 0
        start_time = time.time()
        
        for batch_idx, xb in enumerate(train_loader):
            xb = xb.to(DEVICE, non_blocking=True)
            opt.zero_grad(set_to_none=True)
            
            with torch.cuda.amp.autocast(enabled=(USE_AMP and DEVICE.type == "cuda")):
                logits, _ = model(xb)
                loss = ce(logits, xb)
            
            scaler.scale(loss).backward()
            scaler.step(opt)
            scaler.update()
            
            bs = xb.size(0)
            total += loss.item() * bs
            n += bs
            
            if batch_idx % 50 == 0:
                print(f"  Epoch {epoch} | Batch {batch_idx}/{len(train_loader)} | Loss: {loss.item():.4f}", end='\r')
        
        train_loss = total / max(1, n)
        epoch_time = time.time() - start_time
        
        # Validation
        val_loss = train_loss
        if val_loader is not None:
            model.eval()
            val_total, val_n = 0.0, 0
            with torch.no_grad():
                for xb in val_loader:
                    xb = xb.to(DEVICE, non_blocking=True)
                    logits, _ = model(xb)
                    loss = ce(logits, xb)
                    val_total += loss.item() * xb.size(0)
                    val_n += xb.size(0)
            val_loss = val_total / max(1, val_n)
        
        print(f"\nEpoch {epoch}/{AE_EPOCHS} Done | Train Loss: {train_loss:.4f} | "
              f"Val Loss: {val_loss:.4f} | Time: {epoch_time:.1f}s")
        
        if val_loss < best_val_loss:
            best_val_loss = val_loss
            torch.save(model.state_dict(), AE_MODEL_FILE)
            print(f"  [+] Validation loss improved to {best_val_loss:.4f}. Saved model.")
        else:
            print(f"  [-] Validation loss did not improve. Best: {best_val_loss:.4f}")
        
        if early_stopper.step(val_loss, epoch):
            print(f"\n Early Stopping at epoch {epoch}! Best Loss: {early_stopper.best:.4f}")
            break
    
    print(f"\n{'='*80}")
    print("Autoencoder Training Complete.")
    print(f"{'='*80}\n")

# Train
train_ae(ae, ae_train_loader, ae_val_loader)

# Compute threshold
ae.load_state_dict(torch.load(AE_MODEL_FILE, map_location=DEVICE))
ae.eval()

@torch.no_grad()
def ae_recon_scores(model, loader):
    model.eval()
    ce_tok = nn.CrossEntropyLoss(ignore_index=pad_id, reduction="none")
    scores = []
    for xb in loader:
        xb = xb.to(DEVICE, non_blocking=True)
        logits, _ = model(xb)
        loss_pos = ce_tok(logits, xb)
        mask = (xb != pad_id).float()
        denom = mask.sum(dim=1).clamp(min=1.0)
        per_sample = (loss_pos * mask).sum(dim=1) / denom
        scores.append(per_sample.detach().cpu().numpy())
    return np.concatenate(scores)

benign_val_scores = ae_recon_scores(ae, ae_val_loader)
AE_T2 = float(np.percentile(benign_val_scores, AE_THRESHOLD_PCTL))
print(f" AE_T2 (percentile={AE_THRESHOLD_PCTL}) = {AE_T2:.6f}")

with open(AE_THRESH_FILE, "w", encoding="utf-8") as f:
    json.dump({"AE_T2": AE_T2, "percentile": AE_THRESHOLD_PCTL}, f, indent=2)
print(" Saved AE threshold to", AE_THRESH_FILE)


Training Autoencoder for 30 epochs on cuda
Early Stopping: Patience=5, Min Delta=0.001

  Epoch 1 | Batch 3150/3165 | Loss: 3.5899
Epoch 1/30 Done | Train Loss: 3.6594 | Val Loss: 3.5903 | Time: 100.2s
  [+] Validation loss improved to 3.5903. Saved model.
  Epoch 2 | Batch 3150/3165 | Loss: 3.5594
Epoch 2/30 Done | Train Loss: 3.5677 | Val Loss: 3.5574 | Time: 100.9s
  [+] Validation loss improved to 3.5574. Saved model.
  Epoch 3 | Batch 3150/3165 | Loss: 3.5390
Epoch 3/30 Done | Train Loss: 3.5501 | Val Loss: 3.5507 | Time: 101.0s
  [+] Validation loss improved to 3.5507. Saved model.
  Epoch 4 | Batch 3150/3165 | Loss: 3.5357
Epoch 4/30 Done | Train Loss: 3.5412 | Val Loss: 3.5403 | Time: 100.8s
  [+] Validation loss improved to 3.5403. Saved model.
  Epoch 5 | Batch 3150/3165 | Loss: 3.5276
Epoch 5/30 Done | Train Loss: 3.5348 | Val Loss: 3.5330 | Time: 100.6s
  [+] Validation loss improved to 3.5330. Saved model.
  Epoch 6 | Batch 3150/3165 | Loss: 3.5366
Epoch 6/30 Done | Train

### 5.4 Evaluation (AE)

In [23]:
bundle = {
    "HYB_MAX_LEN": HYB_MAX_LEN,
    "VOCAB_MAX": VOCAB_MAX,
    "AE_T2": AE_T2,
    "AE_THRESHOLD_PCTL": AE_THRESHOLD_PCTL,
    "AE_EMB": AE_EMB,
    "AE_LATENT": AE_LATENT,
    "files": {
        "CHAR_VOCAB_FILE": CHAR_VOCAB_FILE,
        "AE_MODEL_FILE": AE_MODEL_FILE,
        "AE_THRESH_FILE": AE_THRESH_FILE
    }
}

with open(AE_BUNDLE_FILE, "w", encoding="utf-8") as f:
    json.dump(bundle, f, indent=2)

print(" Saved AE bundle to", AE_BUNDLE_FILE)

 Saved AE bundle to ae_artifacts\bundle.json


## 6. Production Testing (Standalone with Loading)
This cell Loads the **Saved Artifacts** and randomly selects Methods/User-Agents for robustness.


In [24]:
payloads = [
    ["https://example.com/about?date=90shoq4", False],
    ["https://payment-gateway.io/assets/js/%%32%65%%32%65%%32%66%%32%65%%32%65%%32%66%%32%65%%32%65%%32%66%%32%65%%32%65%%32%66%%36%35%%37%34%%36%33%%32%66%%37%30%%36%31%%37%33%%37%33%%37%37%%36%34", True],
    ["https://social-media.xyz/profile/%2e%68%74%61%63%63%65%73%73", True],
    ["https://admin-console.net/search?date=gtcikalgdjs&id=teq0y&action=me1mtay&date=hy1m9qauifmr", False],
    ["https://wiki-resource.org/settings?date=../../../../../../../../conf/server.xml", True],
    ["https://blog-space.io/downloads?ref=lyhv", False],
    ["https://portal.corp/api/v1/users/\\..\\..\\..\\..\\..\\..\\..\\..\\..\\..\\etc\\shadow", True],
    ["https://blog-space.io/profile?id=2kphxjr85&session_id=y2z3i&view=r1qmb2", False],
    ["https://blog-space.io/login?q=..%255c..%255c..%255c..%255c..%255c..%255c../winnt/system32/cmd.exe%3F/c%2Bdir%2Bc%3A%5C", True],
    ["https://news-daily.com/home?q=99y107&ref=izvvcpghuac&id=c6x821e", False],
    ["https://portal.corp/login?sort=%252e%252e%252f%252e%252e%252f%252e%2568%2574%2561%2563%2563%2565%2573%2573", True],
    ["https://internal-portal.corp/downloads", False],
    ["https://admin-console.net/products/view?date=ik0gbtys", False],
    ["https://example.com/home?sort=k5ldvwak&date=mv1cgyl&category=b4kxfokfp", False],
    ["https://wiki-resource.org/images?category=r6orzlgj3", False],
    ["https://admin-console.net/home?token=%252e%252e%252f%252e%252e%252f%252e%252e%252f%2565%2574%2563%252f%2570%2561%2573%2573%2577%2564", True],
    ["https://social-media.xyz/api/v1/users", False],
    ["https://example.com/assets/js?view=25zm9mt&q=9aysw3ivd&action=a5j2ov6uz1m", False],
    ["https://social-media.xyz/register/../../../../../../../../../etc/passwd", True],
    ["https://wiki-resource.org/images?action=bhyyqe4os&action=mj277tgekis&sort=mbimfhs7pcr&session_id=pj02d2b", False],
    ["https://portal.corp/search//cgi-bin/.%2e/%2e%2e/%2e%2e/%2e%2e/%2e%2e/%2e%2e/etc/passwd", True],
    ["https://social-media.xyz/images?type=debx1&user=5zkzyspl1h9e&date=q719ufa7&sort=6fth", False],
    ["https://bank-secure.com/register?view=qti2m&ref=c5e84v6g", False],
    ["https://wiki-resource.org/about?view=\\\\&apos;/bin/cat%20/etc/shadow\\\\&apos;", True],
    ["https://payment-gateway.io/images", False],
    ["https://bank-secure.com/contact", False],
    ["https://bank-secure.com/search/../../../../../../../../../../../../etc/shadow%00", True],
    ["https://internal-portal.corp/register?session_id=h659v86js0h&lang=s1bof8lu7oj2&type=9giwfupylsnk", False],
    ["https://admin-console.net/images?search=C%3A%5Cboot.ini", True],
    ["https://social-media.xyz/login?session_id=%%32%65%%32%65%%32%66%%32%65%%32%65%%32%66%%32%65%%32%65%%32%66%%32%65%%32%65%%32%66%%36%35%%37%34%%36%33%%32%66%%37%30%%36%31%%37%33%%37%33%%37%37%%36%34", True],
    ["https://example.com/images?ref=%252e%252e%252f%252e%252e%252f%252e%252e%252f%252e%252e%252f%252e%252e%252f%2577%2569%256e%256e%2574%252f%2573%2579%2573%2574%2565%256d%2533%2532%252f%2563%256d%2564%252e%2565%2578%2565%253f%252f%2563%252b%2564%2569%2572%252b%2563%253a%255c", True],
    ["https://blog-space.io/category/%2e%2e%2f%2e%2e%2f%2e%2e%2f%2e%2e%2f%2e%2e%2f%2e%2e%2f%65%74%63%2f%70%61%73%73%77%64", True],
    ["https://target-site.org/settings/%2e%2f%2e%2f%2e%68%74%61%63%63%65%73%73", True],
    ["https://target-site.org/contact/%25%5c..%25%5c..%25%5c..%25%5c..%25%5c..%25%5c..%25%5c..%25%5c..%25%5c..%25%5c..%25%5c..%25%5c..%25%5c..%25%5c..%00", True],
    ["https://social-media.xyz/products/view?id=dvcrxisi&action=dlghp&lang=g8hvn5ibe27&date=gzh0", False],
    ["https://bank-secure.com/category//%2e%2e/%2e%2e/%2e%2e/%2e%2e/%2e%2e/%2e%2e/%2e%2e/%2e%2e/%2e%2e/%2e%2e/boot.ini", True],
    ["https://admin-console.net/feedback?lang=\\..\\..\\..\\..\\..\\..\\WINDOWS\\win.ini", True],
    ["https://blog-space.io/dashboard?type=%5C..%5C..%5C..%5CWINDOWS%5Cwin.ini", True],
    ["https://internal-portal.corp/faq?type=limk5", False],
    ["https://target-site.org/settings//../../../../../../../../../../etc/passwd^^", True],
    ["https://social-media.xyz/profile", False],
    ["https://example.com/dashboard?lang=%252e%252e%252f%252e%252e%252f%252e%252e%252f%252e%252e%252f%2577%2569%256e%256e%2574%252f%2573%2579%2573%2574%2565%256d%2533%2532%252f%2563%256d%2564%252e%2565%2578%2565%253f%252f%2563%252b%2564%2569%2572%252b%2563%253a%255c", True],
    ["https://payment-gateway.io/contact/%5c%2e%2e%5c%2e%2e%5c%2e%2e%5c%2e%2e%5c%2e%2e%5c%2e%2e%5c%57%49%4e%44%4f%57%53%5c%77%69%6e%2e%69%6e%69", True],
    ["https://payment-gateway.io/contact?date=dzvvxwd1zf4&search=7bm6lal", False],
    ["https://social-media.xyz/home?user=ofcwgc&lang=udoxmh8p9", False],
    ["https://social-media.xyz/settings?action=/cgi-bin/.%252e/%252e%252e/%252e%252e/%252e%252e/%252e%252e/%252e%252e/etc/passwd", True],
    ["https://social-media.xyz/about/../../../../../../etc/passwd", True],
    ["https://myshop.net/about", False],
    ["https://portal.corp/register?category=/../../../../../../../../bin/id%7C", True],
    ["https://bank-secure.com/settings?view=87qs&page=q89j74vhq", False],
    ["https://admin-console.net/about?q=../../../.htaccess", True],
    ["https://blog-space.io/feedback?date=vlsbrs8x1&page=rz3ga1h8&category=6bbx&token=g9h46o3", False],
    ["https://example.com/api/v1/users?page=zmogt4var1uy&type=lg63&page=59w0&user=7f3so2l", False],
    ["https://portal.corp/feedback/../../.htaccess", True],
    ["https://internal-portal.corp/faq?session_id=v6tf", False],
    ["https://portal.corp/profile", False],
    ["https://admin-console.net/api/v1/users?page=buml9ogg&search=2krdxitmtoup", False],
    ["https://portal.corp/settings?ref=n5l4fv4v0&q=010ji4hoalj&lang=sh836i2igwi&session_id=ptzs8zfgjdti", False],
    ["https://example.com/dashboard?view=brs4yrbbrvsy&view=vudn2&type=5wk3tcrx47na&lang=1j0tewmprv1u", False],
    ["https://news-daily.com/search?search=zcftysb&category=mtne&page=4vzjs6", False],
    ["https://myshop.net/downloads", False],
    ["https://blog-space.io/settings/..%5c..%5c..%5c..%5c..%5c..%5c..%5c../winnt/system32/cmd.exe?/c+dir+c:\\", True],
    ["https://blog-space.io/products/view/..%5c..%5c..%5c../winnt/system32/cmd.exe?/c+dir+c:\\", True],
    ["https://bank-secure.com/profile?id=%25%2535%2563%25%2532%2565%25%2532%2565%25%2535%2563%25%2532%2565%25%2532%2565%25%2535%2563%25%2532%2565%25%2532%2565%25%2535%2563%25%2535%2537%25%2534%2539%25%2534%2565%25%2534%2534%25%2534%2566%25%2535%2537%25%2535%2533%25%2535%2563%25%2537%2537%25%2536%2539%25%2536%2565%25%2532%2565%25%2536%2539%25%2536%2565%25%2536%2539", True],
    ["https://wiki-resource.org/profile/%2e%2e%2f%2e%2e%2f%2e%2e%2f%2e%2e%2f%2e%2e%2f%2e%2e%2f%2e%2e%2f%65%74%63%2f%70%61%73%73%77%64", True],
    ["https://bank-secure.com/faq", False],
    ["https://target-site.org/profile?sort=ashpj01s7t&ref=zzbyic9l&q=8dilgcu&user=4bn8", False],
    ["https://example.com/faq?search=5ans8iks2kc6&ref=bumo&action=9750wyd&type=5hik0hsk", False],
    ["https://news-daily.com/assets/js//.\\\\./.\\\\./.\\\\./.\\\\./.\\\\./.\\\\./boot.ini", True],
    ["https://example.com/assets/js?sort=%5C..%5C..%5C..%5C..%5C..%5C..%5C..%5C..%5C..%5C..%5Cetc%5Cshadow%2500", True],
    ["https://payment-gateway.io/settings?lang=hi5f&date=se9g9jp8v&ref=llc48kzg&q=tajpgdsamdk", False],
    ["https://blog-space.io/assets/js?action=4p3t22&action=qy4jc3&q=o7ydw&date=n9ojilmddmhc", False],
    ["https://blog-space.io/about?lang=/cgi-bin/.%252e/%252e%252e/%252e%252e/%252e%252e/%252e%252e/%252e%252e/etc/passwd", True],
    ["https://portal.corp/login?search=hxsz2oa1a0e&q=url23ug&search=lpyn4eiunck2", False],
    ["https://example.com/dashboard?q=7mhdix27ji", False],
    ["https://target-site.org/products/view/%0a/bin/cat%20/etc/passwd", True],
    ["https://payment-gateway.io/api/v1/users/..%5c..%5c..%5c..%5c..%5c..%5c..%5c..%5c../winnt/system32/cmd.exe?/c+dir+c:\\", True],
    ["https://wiki-resource.org/downloads?session_id=v1otd2", False],
    ["https://admin-console.net/home/../../../../../../../../../../../../boot.ini", True],
    ["https://target-site.org/feedback/../../../../../../../../../../../../boot.ini", True],
    ["https://target-site.org/products/view?sort=vq4kgcmuyf&lang=e8zjo5", False],
    ["https://social-media.xyz/dashboard/%%32%65%%32%65%%32%66%%32%65%%32%65%%32%66%%36%35%%37%34%%36%33%%32%66%%37%30%%36%31%%37%33%%37%33%%37%37%%36%34", True],
    ["https://admin-console.net/settings?date=9o8v7m0th&action=ubskjkjilh", False],
    ["https://internal-portal.corp/register", False],
    ["https://internal-portal.corp/category?token=m0p79qnyqb8&view=zgheqa", False],
    ["https://internal-portal.corp/images?sort=hi2e3rhz85", False],
    ["https://target-site.org/home?date=%2e%2e%2f%2e%2e%2f%77%69%6e%6e%74%2f%73%79%73%74%65%6d%33%32%2f%63%6d%64%2e%65%78%65%3f%2f%63%2b%64%69%72%2b%63%3a%5c", True],
    ["https://example.com/profile?sort=/./././././././././././etc/passwd", True],
    ["https://payment-gateway.io/contact?session_id=kezipsl&session_id=8qplzvqj4l", False],
    ["https://example.com/search?action=eaz2smbrkehb", False],
    ["https://payment-gateway.io/api/v1/users?session_id=46h83", False],
    ["https://admin-console.net/faq?type=kch0sr&category=20echylxkln&token=9s0j0chjr38&view=rzit24055w8", False],
    ["https://wiki-resource.org/settings?q=6jotk62dxn", False],
    ["https://blog-space.io/faq?id=../../../../../../../../conf/server.xml", True],
    ["https://internal-portal.corp/assets/js/%5c%2e%2e%5c%57%49%4e%44%4f%57%53%5c%77%69%6e%2e%69%6e%69", True],
    ["https://blog-space.io/api/v1/users?id=%2e%2e%2f%2e%2e%2f%2e%2e%2f%2e%68%74%61%63%63%65%73%73", True],
    ["https://blog-space.io/api/v1/users?q=..%5c..%5c..%5c..%5c..%5c..%5c..%5c..%5c../winnt/system32/cmd.exe?/c+dir+c:\\", True],
    ["https://wiki-resource.org/api/v1/users?id=b4ixb&search=1undhcas78&token=fidcjpx17u&ref=6idcxjoof", False],
    ["https://social-media.xyz/settings//../../../../../../../../../../../boot.ini%00.html", True],
    ["https://bank-secure.com/settings?user=uxth8g1&q=5mc8m95n8k8p&q=s8bdg3g4s&q=bmplpn", False],
    ["https://portal.corp/about?search=0b3xp7np0nun&category=9l9e", False],
    ["https://example.com/register/\\..\\..\\..\\..\\WINDOWS\\win.ini", True],
    ["https://blog-space.io/about//cgi-bin/.%%32%65/.%%32%65/.%%32%65/.%%32%65/.%%32%65/.%%32%65/etc/passwd", True],
    ["https://social-media.xyz/faq//cgi-bin/.%%32%65/.%%32%65/.%%32%65/.%%32%65/.%%32%65/.%%32%65/.%%32%65/etc/passwd", True],
    ["https://news-daily.com/category", False],
    ["https://example.com/contact?session_id=C:/inetpub/wwwroot/global.asa", True],
    ["https://bank-secure.com/contact//..%c0%af../..%c0%af../..%c0%af../..%c0%af../..%c0%af../..%c0%af../etc/passwd", True],
    ["https://target-site.org/faq?q=/../../../../../../../../../../etc/passwd%5E%5E", True],
    ["https://portal.corp/register/../../../../../../../../../../../../etc/shadow", True],
    ["https://myshop.net/feedback/%5c%57%49%4e%44%4f%57%53%5c%77%69%6e%2e%69%6e%69", True],
    ["https://news-daily.com/home?view=kmkab&token=y2413u&sort=l78fa5y2kx", False],
    ["https://news-daily.com/about?date=0igb3bd5tkh&id=eg2ee0&lang=l7vz9v&lang=7mbk", False],
    ["https://payment-gateway.io/downloads/../../../../../../../../../../../../boot.ini%00", True],
    ["https://example.com/dashboard?id=.htaccess", True],
    ["https://payment-gateway.io/login?category=tytmbku86&search=d7cyft&type=kgj7mojj0sf8&category=yl4aq3jh", False],
    ["https://portal.corp/downloads?date=o4m8q0qf6&ref=2vleccf&view=2vtbyw&category=0eqig35g82v", False],
    ["https://example.com/api/v1/users", False],
    ["https://myshop.net/downloads/%%32%65%%32%65%%32%66%%32%65%%32%65%%32%66%%32%65%%32%65%%32%66%%32%65%%32%65%%32%66%%36%35%%37%34%%36%33%%32%66%%37%30%%36%31%%37%33%%37%33%%37%37%%36%34", True],
    ["https://target-site.org/downloads?token=%252e%252e%252f%252e%252e%252f%252e%252e%252f%252e%252e%252f%252e%252e%252f%2565%2574%2563%252f%2570%2561%2573%2573%2577%2564", True],
    ["https://admin-console.net/home?type=g8rrtw&page=ffnsxwr", False],
    ["https://myshop.net/api/v1/users?page=/..%c0%af../..%c0%af../..%c0%af../..%c0%af../..%c0%af../..%c0%af../etc/shadow", True],
    ["https://bank-secure.com/home?user=wtxyk8vj&session_id=cu5758tvz7&page=opppxysp2kqu&action=svkjhub0440", False],
    ["https://payment-gateway.io/products/view?view=9a1w85&date=x08873xfu4d&search=4c8r651vn0&category=6dsny2401rg6", False],
    ["https://target-site.org/home?q=9rjg&token=2meq9v&user=p20k4thsrm8", False],
    ["https://myshop.net/api/v1/users?user=if1huabk", False],
    ["https://social-media.xyz/faq/%%32%65%%32%66%%32%65%%32%66%%32%65%%36%38%%37%34%%36%31%%36%33%%36%33%%36%35%%37%33%%37%33", True],
    ["https://wiki-resource.org/feedback?session_id=np3jqgfxigtg&category=63qn7xmd673", False],
    ["https://bank-secure.com/profile?page=hw1r&lang=d9bm3rxu&date=fztjvs&token=1y0pktx", False],
    ["https://bank-secure.com/home?id=p1z3e&token=kwkupqe6j44", False],
    ["https://payment-gateway.io/category/%2e%2e%2f%77%69%6e%6e%74%2f%73%79%73%74%65%6d%33%32%2f%63%6d%64%2e%65%78%65%3f%2f%63%2b%64%69%72%2b%63%3a%5c", True],
    ["https://payment-gateway.io/products/view?q=%25%5c..%25%5c..%25%5c..%25%5c..%25%5c..%25%5c..%25%5c..%25%5c..%25%5c..%25%5c..%25%5c..%25%5c..%", True],
    ["https://portal.corp/home?token=%252e%252e%252f%252e%252e%252f%252e%252e%252f%252e%252e%252f%252e%252e%252f%252e%252e%252f%252e%252e%252f%252e%252e%252f%2565%2574%2563%252f%2570%2561%2573%2573%2577%2564", True],
    ["https://myshop.net/api/v1/users?user=/cgi-bin/.%2e/%2e%2e/%2e%2e/%2e%2e/%2e%2e/etc/passwd", True],
    ["https://social-media.xyz/assets/js?type=%2500/etc/passwd%2500", True],
    ["https://target-site.org/faq/%2e%2e%2f%2e%2e%2f%2e%2e%2f%2e%2e%2f%2e%2e%2f%2e%2e%2f%2e%2e%2f%2e%2e%2f%65%74%63%2f%70%61%73%73%77%64", True],
    ["https://payment-gateway.io/assets/js?user=%%32%65%%32%65%%32%66%%32%65%%36%38%%37%34%%36%31%%36%33%%36%33%%36%35%%37%33%%37%33", True],
    ["https://blog-space.io/help?q=ar5xj&ref=s95yzrclxld", False],
    ["https://myshop.net/about?action=/%2e%2e/%2e%2e/%2e%2e/%2e%2e/%2e%2e/%2e%2e/%2e%2e/%2e%2e/%2e%2e/%2e%2e/etc/passwd", True],
    ["https://target-site.org/register?q=5rqpnulyre1o&token=pj31mvif&token=s7mbwwgx", False],
    ["https://news-daily.com/profile", False],
    ["https://payment-gateway.io/help?category=djdd&page=3cng&action=or9yzm", False],
    ["https://target-site.org/settings?page=xuau0iiuc5k&date=tlorizb", False],
    ["https://portal.corp/profile?q=no6zcpr717zx&q=kc4s", False],
    ["https://example.com/profile", False],
    ["https://admin-console.net/register/%%32%65%%32%65%%32%66%%32%65%%32%65%%32%66%%32%65%%32%65%%32%66%%32%65%%32%65%%32%66%%32%65%%32%65%%32%66%%36%35%%37%34%%36%33%%32%66%%37%30%%36%31%%37%33%%37%33%%37%37%%36%34", True],
    ["https://social-media.xyz/feedback?search=uvq44mjrdi&view=vtg1f4bvgd3m&session_id=qwxilyvlmm", False],
    ["https://blog-space.io/settings?type=sjlmerk&session_id=q38a56k2w5", False],
    ["https://example.com/images?date=2kem&token=92i8", False],
    ["https://wiki-resource.org/downloads/%2e%2e%2f%2e%2e%2f%2e%2e%2f%77%69%6e%6e%74%2f%73%79%73%74%65%6d%33%32%2f%63%6d%64%2e%65%78%65%3f%2f%63%2b%64%69%72%2b%63%3a%5c", True],
    ["https://payment-gateway.io/login?user=l6l5vo&lang=kv1d&action=22oodq3o&q=r34tl3", False],
    ["https://internal-portal.corp/api/v1/users/%5c%2e%2e%5c%2e%2e%5c%2e%2e%5c%2e%2e%5c%57%49%4e%44%4f%57%53%5c%77%69%6e%2e%69%6e%69", True],
    ["https://example.com/faq?q=../../../../../../../../../../../../etc/hosts%2500", True],
    ["https://news-daily.com/login?action=/%2525%255c..%2525%255c..%2525%255c..%2525%255c..%2525%255c..%2525%255c..%2525%255c..%2525%255c..%2525%255c..%2525%255c..%2525%255c..%2525%255c..%2525%255c..%2525%255c..%2500", True],
    ["https://myshop.net/login?sort=4dznw&id=0ezv3an9nml", False],
    ["https://social-media.xyz/dashboard?category=ac5ihp41n&q=lvlbljpo", False],
    ["https://blog-space.io/category?type=%252e%252e%252f%252e%252e%252f%252e%252e%252f%252e%252e%252f%2565%2574%2563%252f%2570%2561%2573%2573%2577%2564", True],
    ["https://example.com/settings/\\..\\..\\..\\..\\..\\..\\..\\..\\..\\..\\etc\\shadow", True],
    ["https://admin-console.net/search?type=8i2ekkxpbeh8&search=csnro4", False],
    ["https://news-daily.com/api/v1/users?id=%255c%252e%252e%255c%252e%252e%255c%252e%252e%255c%252e%252e%255c%2557%2549%254e%2544%254f%2557%2553%255c%2577%2569%256e%252e%2569%256e%2569", True],
    ["https://wiki-resource.org/about?action=%5c%57%49%4e%44%4f%57%53%5c%77%69%6e%2e%69%6e%69", True],
    ["https://bank-secure.com/products/view?lang=%2e%2e%2f%2e%2e%2f%2e%2e%2f%2e%68%74%61%63%63%65%73%73", True],
    ["https://wiki-resource.org/home?search=../../../../../../../../../../../../etc/hosts%00", True],
    ["https://blog-space.io/category?view=0mfw&search=gnnn&token=vl65wve", False],
    ["https://myshop.net/contact?type=/cgi-bin/.%252e/%252e%252e/%252e%252e/%252e%252e/%252e%252e/%252e%252e/%252e%252e/etc/passwd", True],
    ["https://payment-gateway.io/register/..%5c..%5c..%5c..%5c..%5c..%5c..%5c..%5c../winnt/system32/cmd.exe?/c+dir+c:\\", True],
    ["https://portal.corp/api/v1/users?q=sq12ph4&search=hf18if24szi", False],
    ["https://admin-console.net/about?search=%%32%65%%32%66%%32%65%%32%66%%32%65%%36%38%%37%34%%36%31%%36%33%%36%33%%36%35%%37%33%%37%33", True],
    ["https://example.com/feedback?id=x3hnoosmn1&category=gs1agh8ed01x&action=efa1ck", False],
    ["https://admin-console.net/category?ref=j54d", False],
    ["https://blog-space.io/feedback/%2e%68%74%61%63%63%65%73%73", True],
    ["https://admin-console.net/contact?page=ombszhldekl&sort=zw1nck&q=l4gcz", False],
    ["https://internal-portal.corp/help/%%32%65%%32%65%%32%66%%32%65%%32%65%%32%66%%32%65%%32%65%%32%66%%32%65%%32%65%%32%66%%36%35%%37%34%%36%33%%32%66%%37%30%%36%31%%37%33%%37%33%%37%37%%36%34", True],
    ["https://internal-portal.corp/downloads?ref=c9sw09bo&sort=rjwoqke3v1", False],
    ["https://social-media.xyz/help?search=gi5k&page=j410872gi9c&view=6g8l19re", False],
    ["https://admin-console.net/about?search=3sa3pjg4h&search=xlbjm4&token=kd3u2lg", False],
    ["https://payment-gateway.io/settings?category=6k82v&token=g0mjy89pnytm", False],
    ["https://target-site.org/feedback?q=xcy4aix8dq&session_id=auq5&view=qp0u5ga1j", False],
    ["https://social-media.xyz/settings", False],
    ["https://social-media.xyz/assets/js/%00../../../../../../etc/passwd", True],
    ["https://bank-secure.com/feedback?sort=qviu&action=tkeb", False],
    ["https://news-daily.com/api/v1/users?session_id=%2e%2e%2f%2e%2e%2f%77%69%6e%6e%74%2f%73%79%73%74%65%6d%33%32%2f%63%6d%64%2e%65%78%65%3f%2f%63%2b%64%69%72%2b%63%3a%5c", True],
    ["https://blog-space.io/home//../../../../../../../../../../../etc/passwd%00.jpg", True],
    ["https://blog-space.io/settings", False],
    ["https://example.com/home", False],
    ["https://admin-console.net/faq?type=%2e%2e%2f%2e%2e%2f%2e%2e%2f%2e%2e%2f%2e%2e%2f%77%69%6e%6e%74%2f%73%79%73%74%65%6d%33%32%2f%63%6d%64%2e%65%78%65%3f%2f%63%2b%64%69%72%2b%63%3a%5c", True],
    ["https://target-site.org/downloads?search=..%c0%af../..%c0%af../..%c0%af../..%c0%af../..%c0%af../..%c0%af../boot.ini", True],
    ["https://payment-gateway.io/category?action=/../../../../../../../../../../../boot.ini%00.html", True],
    ["https://social-media.xyz/products/view?search=../../../../../../../../../../../../etc/hosts", True],
    ["https://target-site.org/about?token=b29pw&category=iifgn&sort=yy3rv68fnvei&page=9y4gkm", False],
    ["https://payment-gateway.io/profile/..\\..\\..\\..\\..\\..\\..\\..\\..\\..\\etc\\passwd%00", True],
    ["https://internal-portal.corp/assets/js/%2e%2e%2f%2e%2e%2f%2e%2e%2f%2e%2e%2f%2e%2e%2f%77%69%6e%6e%74%2f%73%79%73%74%65%6d%33%32%2f%63%6d%64%2e%65%78%65%3f%2f%63%2b%64%69%72%2b%63%3a%5c", True],
    ["https://target-site.org/about?page=C%3A/boot.ini", True],
    ["https://portal.corp/profile?user=/..%5C../..%5C../..%5C../..%5C../..%5C../..%5C../etc/passwd", True],
    ["https://social-media.xyz/dashboard", False],
    ["https://wiki-resource.org/dashboard?date=9va3lzbo4xoc&search=ioe81q3iic13&user=bmcau4z3g3ps&view=twthl8", False],
    ["https://payment-gateway.io/login?session_id=eayx", False],
    ["https://portal.corp/dashboard?action=%5c%2e%2e%5c%2e%2e%5c%2e%2e%5c%2e%2e%5c%2e%2e%5c%57%49%4e%44%4f%57%53%5c%77%69%6e%2e%69%6e%69", True],
    ["https://admin-console.net/home?session_id=/../../../../../../../../../../../etc/passwd%00.jpg", True],
    ["https://portal.corp/home?session_id=%2525%255c..%2525%255c..%2525%255c..%2525%255c..%2525%255c..%2525%255c..%2525%255c..%2525%255c..%2525%255c..%2525%255c..%2525%255c..%2525%255c..%2525%255c..%2525%255c..%2500", True],
    ["https://social-media.xyz/about?user=/../../../../../../../../../../etc/shadow", True],
]


In [25]:
print("Loading AE artifacts...")

# Load bundle
with open(AE_BUNDLE_FILE, "r", encoding="utf-8") as f:
    bundle = json.load(f)

HYB_MAX_LEN = int(bundle["HYB_MAX_LEN"])
AE_T2 = float(bundle["AE_T2"])
AE_EMB = int(bundle["AE_EMB"])
AE_LATENT = int(bundle["AE_LATENT"])

# Load vocab
with open(bundle["files"]["CHAR_VOCAB_FILE"], "r", encoding="utf-8") as f:
    itos = json.load(f)["itos"]
stoi = {ch:i for i,ch in enumerate(itos)}
pad_id = stoi["<PAD>"]
unk_id = stoi["<UNK>"]
vocab_size = len(itos)

def encode_text_to_ids_runtime(text: str, max_len: int):
    text = "" if text is None else str(text)
    ids = [stoi.get(ch, unk_id) for ch in text[:max_len]]
    if len(ids) < max_len:
        ids += [pad_id] * (max_len - len(ids))
    return torch.tensor(ids, dtype=torch.long)

# Load AE model
ae = CharAutoencoder(vocab_size, AE_EMB, AE_LATENT, pad_id).to(DEVICE)
ae.load_state_dict(torch.load(bundle["files"]["AE_MODEL_FILE"], map_location=DEVICE))
ae.eval()

print(" Loaded AE model")
print(f"Threshold AE_T2: {AE_T2:.6f}")

Loading AE artifacts...
 Loaded AE model
Threshold AE_T2: 3.796452


In [26]:
METHODS = ['GET', 'POST', 'PUT', 'DELETE', 'PATCH', 'HEAD', 'OPTIONS']

ce_tok = nn.CrossEntropyLoss(ignore_index=pad_id, reduction="none")

@torch.no_grad()
def ae_score_single(x_ids: torch.Tensor) -> float:
    xb = x_ids.unsqueeze(0).to(DEVICE)
    logits, _ = ae(xb)
    loss_pos = ce_tok(logits, xb)
    mask = (xb != pad_id).float()
    denom = mask.sum(dim=1).clamp(min=1.0)
    score = ((loss_pos * mask).sum(dim=1) / denom).item()
    return float(score)

# Collect predictions
print("Evaluating payloads with AE-only...")
results = []
for url, expected in payloads:
    method = random.choice(METHODS)

    try:
        parsed = urlparse(url)
        domain = parsed.netloc if parsed.netloc else ""
        path_query = parsed.path + ("?" + parsed.query if parsed.query else "")
    except:
        domain = ""
        path_query = url
    text = f"METHOD={method} | PATH={path_query}"

    x_ids = encode_text_to_ids_runtime(text, HYB_MAX_LEN)
    s_ae = ae_score_single(x_ids)
    results.append({
        'url': url[:55],
        'expected': int(expected),
        'ae_score': s_ae
    })

y_true = np.array([r['expected'] for r in results])
ae_scores = np.array([r['ae_score'] for r in results])

# Threshold search
print(f"\n{'='*80}")
print("THRESHOLD OPTIMIZATION")
print(f"{'='*80}")

best_t, best_acc, best_f1 = AE_T2, 0, 0
for t in np.arange(3.5, 5.5, 0.05):
    preds = (ae_scores >= t).astype(int)
    acc = (preds == y_true).mean()
    tp = ((preds == 1) & (y_true == 1)).sum()
    fp = ((preds == 1) & (y_true == 0)).sum()
    fn = ((preds == 0) & (y_true == 1)).sum()
    prec = tp / max(1, tp + fp)
    rec = tp / max(1, tp + fn)
    f1 = 2 * prec * rec / max(0.001, prec + rec)
    if acc > best_acc or (acc == best_acc and f1 > best_f1):
        best_acc = acc
        best_f1 = f1
        best_t = t

print(f"Best Threshold: {best_t:.2f} â†’ Accuracy: {best_acc:.3f}, F1: {best_f1:.3f}")
print(f"Original Threshold: {AE_T2:.4f}")

# Final metrics with best threshold
ae_preds = (ae_scores >= best_t).astype(int)
tp = ((ae_preds == 1) & (y_true == 1)).sum()
tn = ((ae_preds == 0) & (y_true == 0)).sum()
fp = ((ae_preds == 1) & (y_true == 0)).sum()
fn = ((ae_preds == 0) & (y_true == 1)).sum()
precision = tp / max(1, tp + fp)
recall = tp / max(1, tp + fn)
f1 = 2 * precision * recall / max(0.001, precision + recall)

print(f"\n{'='*80}")
print(f" AE-ONLY RESULTS (threshold={best_t:.2f})")
print(f"{'='*80}")
print(f"   Accuracy:  {(ae_preds == y_true).mean():.3f}")
print(f"   Precision: {precision:.3f}")
print(f"   Recall:    {recall:.3f}")
print(f"   F1 Score:  {f1:.3f}")
print(f"   TP={tp}, TN={tn}, FP={fp}, FN={fn}")

# Detailed predictions
print(f"\n{'='*80}")
print("DETAILED PREDICTIONS")
print(f"{'='*80}")
print(f"{'URL':<57} | Exp | Pred | AE_Score")
print("-" * 85)
for r in results:
    pred = 1 if r['ae_score'] >= best_t else 0
    correct = "âœ“" if pred == r['expected'] else "âœ—"
    print(f"{r['url']:<57} | {r['expected']}   | {pred}    | {r['ae_score']:.4f} {correct}")

# Score distribution
print(f"\n{'='*80}")
print("SCORE DISTRIBUTION")
print(f"{'='*80}")
benign_scores = ae_scores[y_true == 0]
malicious_scores = ae_scores[y_true == 1]
print(f"Benign:    min={benign_scores.min():.4f}, max={benign_scores.max():.4f}, mean={benign_scores.mean():.4f}")
print(f"Malicious: min={malicious_scores.min():.4f}, max={malicious_scores.max():.4f}, mean={malicious_scores.mean():.4f}")
print(f"Best threshold: {best_t:.2f} (between classes)")




LOW_THRESHOLD = 3.85
HIGH_THRESHOLD = 4.90  

print(f"\n{'='*80}")
print("TWO-STAGE THRESHOLD CLASSIFICATION")
print(f"{'='*80}")
print(f"LOW_THRESHOLD (Suspicious): {LOW_THRESHOLD}")
print(f"HIGH_THRESHOLD (Malicious): {HIGH_THRESHOLD}")

# Classify with two stages
two_stage_preds = []
for score in ae_scores:
    if score >= HIGH_THRESHOLD:
        two_stage_preds.append(2)  
    elif score >= LOW_THRESHOLD:
        two_stage_preds.append(1)  
    else:
        two_stage_preds.append(0)  

two_stage_preds = np.array(two_stage_preds)

# Calculate for "malicious if suspicious OR definitely malicious"
suspicious_or_malicious = (two_stage_preds >= 1).astype(int)
# Calculate for "malicious only if definitely malicious"
definitely_malicious = (two_stage_preds >= 2).astype(int)

print(f"\nðŸ“Š LOW threshold ({LOW_THRESHOLD}) - Catches suspicious + malicious:")
tp1 = ((suspicious_or_malicious == 1) & (y_true == 1)).sum()
tn1 = ((suspicious_or_malicious == 0) & (y_true == 0)).sum()
fp1 = ((suspicious_or_malicious == 1) & (y_true == 0)).sum()
fn1 = ((suspicious_or_malicious == 0) & (y_true == 1)).sum()
prec1 = tp1 / max(1, tp1 + fp1)
rec1 = tp1 / max(1, tp1 + fn1)
f1_1 = 2 * prec1 * rec1 / max(0.001, prec1 + rec1)
print(f"   Accuracy:  {(suspicious_or_malicious == y_true).mean():.3f}")
print(f"   Precision: {prec1:.3f}")
print(f"   Recall:    {rec1:.3f}")
print(f"   F1 Score:  {f1_1:.3f}")
print(f"   TP={tp1}, TN={tn1}, FP={fp1}, FN={fn1}")

print(f"\n HIGH threshold ({HIGH_THRESHOLD}) - Only definitely malicious:")
tp2 = ((definitely_malicious == 1) & (y_true == 1)).sum()
tn2 = ((definitely_malicious == 0) & (y_true == 0)).sum()
fp2 = ((definitely_malicious == 1) & (y_true == 0)).sum()
fn2 = ((definitely_malicious == 0) & (y_true == 1)).sum()
prec2 = tp2 / max(1, tp2 + fp2)
rec2 = tp2 / max(1, tp2 + fn2)
f1_2 = 2 * prec2 * rec2 / max(0.001, prec2 + rec2)
print(f"   Accuracy:  {(definitely_malicious == y_true).mean():.3f}")
print(f"   Precision: {prec2:.3f}")
print(f"   Recall:    {rec2:.3f}")
print(f"   F1 Score:  {f1_2:.3f}")
print(f"   TP={tp2}, TN={tn2}, FP={fp2}, FN={fn2}")

print(f"\n CLASSIFICATION BREAKDOWN:")
print(f"   Benign (score < {LOW_THRESHOLD}): {(two_stage_preds == 0).sum()}")
print(f"   Suspicious ({LOW_THRESHOLD} <= score < {HIGH_THRESHOLD}): {(two_stage_preds == 1).sum()}")
print(f"   Definitely Malicious (score >= {HIGH_THRESHOLD}): {(two_stage_preds == 2).sum()}")

Evaluating payloads with AE-only...

THRESHOLD OPTIMIZATION
Best Threshold: 3.75 â†’ Accuracy: 0.995, F1: 0.995
Original Threshold: 3.7965

 AE-ONLY RESULTS (threshold=3.75)
   Accuracy:  0.995
   Precision: 1.000
   Recall:    0.990
   F1 Score:  0.995
   TP=99, TN=100, FP=0, FN=1

DETAILED PREDICTIONS
URL                                                       | Exp | Pred | AE_Score
-------------------------------------------------------------------------------------
https://example.com/about?date=90shoq4                    | 0   | 0    | 3.1119 âœ“
https://payment-gateway.io/assets/js/%%32%65%%32%65%%32   | 1   | 1    | 39.7847 âœ“
https://social-media.xyz/profile/%2e%68%74%61%63%63%65%   | 1   | 1    | 20.3943 âœ“
https://admin-console.net/search?date=gtcikalgdjs&id=te   | 0   | 0    | 3.4464 âœ“
https://wiki-resource.org/settings?date=../../../../../   | 1   | 1    | 10.7803 âœ“
https://blog-space.io/downloads?ref=lyhv                  | 0   | 0    | 3.0237 âœ“
https://portal.corp/