In [1]:
!pip install scapy tqdm -q


[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/2.4 MB[0m [31m?[0m eta [36m-:--:--[0m[2K   [91m━━━━━━━━━━━━━━━━━━━━━━━━[0m[90m╺[0m[90m━━━━━━━━━━━━━━━[0m [32m1.5/2.4 MB[0m [31m43.5 MB/s[0m eta [36m0:00:01[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.4/2.4 MB[0m [31m46.0 MB/s[0m eta [36m0:00:00[0m
[?25h

In [2]:

from scapy.all import rdpcap, TCP, Raw
from collections import Counter
from tqdm import tqdm


pcap_path = "/content/veth5bbeaa2-normal-13.pcap"

print(f"Loading PCAP from: {pcap_path}")
packets = rdpcap(pcap_path)
print(f"Total packets in PCAP: {len(packets)}")

def is_modbus_tcp(pkt):

    if not pkt.haslayer(TCP) or not pkt.haslayer(Raw):
        return False

    tcp = pkt[TCP]
    if tcp.sport != 502 and tcp.dport != 502:
        return False

    payload = bytes(pkt[Raw].load)

    if len(payload) < 7:
        return False

    proto_id = int.from_bytes(payload[2:4], "big")
    if proto_id != 0:
        return False

    return True

modbus_count = 0
func_codes = Counter()
unit_ids = Counter()
ports = Counter()
directions = Counter()

for pkt in tqdm(packets, desc="Scanning packets"):
    if not is_modbus_tcp(pkt):
        continue

    modbus_count += 1
    tcp = pkt[TCP]
    ports[(tcp.sport, tcp.dport)] += 1

    payload = bytes(pkt[Raw].load)
    unit_id = payload[6]

    if len(payload) >= 8:
        fc = payload[7]
    else:
        fc = None

    unit_ids[unit_id] += 1
    if fc is not None:
        func_codes[fc] += 1


    if tcp.sport == 502:
        directions["srv->cli"] += 1
    elif tcp.dport == 502:
        directions["cli->srv"] += 1
    else:
        directions["unknown"] += 1

print("\n===== BASIC MODBUS STATS =====")
print(f"Total Modbus/TCP packets: {modbus_count}")
print(f"Directions: {dict(directions)}")
print(f"Top (sport,dport) pairs: {ports.most_common(10)}")
print("\nFunction code counts:")
for fc, cnt in sorted(func_codes.items()):
    print(f"  FC {fc}: {cnt} packets")

print("\nUnit ID counts:")
for uid, cnt in sorted(unit_ids.items()):
    print(f"  Unit ID {uid}: {cnt} packets")


Loading PCAP from: /content/veth5bbeaa2-normal-13.pcap
Total packets in PCAP: 1176355


Scanning packets: 100%|██████████| 1176355/1176355 [00:12<00:00, 97956.34it/s] 


===== BASIC MODBUS STATS =====
Total Modbus/TCP packets: 220646
Directions: {'cli->srv': 110326, 'srv->cli': 110320}
Top (sport,dport) pairs: [((34546, 502), 9), ((502, 34546), 9), ((34548, 502), 9), ((502, 34548), 9), ((34554, 502), 9), ((502, 34554), 9), ((47198, 502), 9), ((502, 47198), 9), ((47200, 502), 9), ((502, 47200), 9)]

Function code counts:
  FC 1: 87970 packets
  FC 2: 43982 packets
  FC 3: 44050 packets
  FC 4: 43990 packets
  FC 5: 222 packets
  FC 6: 432 packets

Unit ID counts:
  Unit ID 1: 220646 packets





In [3]:
!pip install scapy tqdm -q

from scapy.all import rdpcap, wrpcap, IP, TCP, Raw
from collections import defaultdict
import struct
from tqdm import tqdm
import numpy as np

import os, math, json, random
from collections import Counter

import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader

from scapy.all import rdpcap, wrpcap, IP, TCP, Raw
from tqdm import tqdm


In [4]:
import struct

def parse_modbus_fields(pkt):
    """
    Parse MBAP + minimal PDU fields from a Modbus/TCP packet.
    Returns dict or None if not Modbus.
    """
    if not is_modbus_tcp(pkt):
        return None

    tcp = pkt[TCP]
    raw = bytes(pkt[Raw].load)
    if len(raw) < 8:
        return None

    tx_id    = int.from_bytes(raw[0:2], "big")
    proto_id = int.from_bytes(raw[2:4], "big")
    length   = int.from_bytes(raw[4:6], "big")
    unit_id  = raw[6]
    fc       = raw[7]
    pdu      = raw[8:]

    d = {
        "time": float(pkt.time),
        "src_port": tcp.sport,
        "dst_port": tcp.dport,
        "tx_id": tx_id,
        "proto_id": proto_id,
        "length": length,
        "unit_id": unit_id,
        "func_code": fc,
        "is_exception": bool(fc & 0x80),
    }

    if d["is_exception"]:
        d["exception_code"] = pdu[0] if len(pdu) > 0 else None
        return d


    if fc in [1, 2, 3, 4] and len(pdu) >= 4:
        d["address"]  = struct.unpack(">H", pdu[0:2])[0]
        d["quantity"] = struct.unpack(">H", pdu[2:4])[0]


    if fc in [5, 6] and len(pdu) >= 4:
        d["address"]  = struct.unpack(">H", pdu[0:2])[0]
        d["value"]    = struct.unpack(">H", pdu[2:4])[0]


    if fc in [1, 2, 3, 4] and len(pdu) >= 1:
        bytecount = pdu[0]
        if bytecount + 1 <= len(pdu):
            d["resp_bytecount"] = bytecount
            d["resp_data_len"]  = len(pdu[1:1+bytecount])

    d["pdu_len"] = len(pdu)
    d["payload_len"] = len(raw)

    return d


def build_transactions(packets):
    """
    Group Modbus packets into request/response transactions.
    Returns a list of transaction dicts in temporal order.
    """

    pending = {}
    transactions = []

    for pkt in packets:
        if not is_modbus_tcp(pkt):
            continue
        d = parse_modbus_fields(pkt)
        if d is None:
            continue

        tcp = pkt[TCP]
        time = float(pkt.time)

        if tcp.dport == 502:

            key = (tcp.sport, tcp.dport, d["tx_id"])
            pending[key] = {
                "req_pkt": pkt,
                "req_fields": d,
                "req_time": time,
            }
        elif tcp.sport == 502:

            key = (tcp.dport, tcp.sport, d["tx_id"])
            if key not in pending:
                continue
            entry = pending.pop(key)
            entry["resp_pkt"] = pkt
            entry["resp_fields"] = d
            entry["resp_time"] = time
            transactions.append(entry)

    transactions.sort(key=lambda t: t["req_time"])
    return transactions


transactions = build_transactions(packets)
print("Total transactions built:", len(transactions))
print("Example transaction keys:", list(transactions[0].keys()))


Total transactions built: 110318
Example transaction keys: ['req_pkt', 'req_fields', 'req_time', 'resp_pkt', 'resp_fields', 'resp_time']


In [5]:
import numpy as np
from collections import Counter

all_addrs = []
all_qty   = []
all_val   = []
all_dt    = []

for tr in transactions:
    rf = tr["req_fields"]
    addr = rf.get("address", 0)
    qty  = rf.get("quantity", 1)
    val  = rf.get("value", 0)

    dt = max(tr["resp_time"] - tr["req_time"], 0.0)

    all_addrs.append(addr)
    all_qty.append(qty)
    all_val.append(val)
    all_dt.append(dt)

max_addr = max(all_addrs) if all_addrs else 1
max_qty  = max(all_qty)   if all_qty   else 1
max_val  = max(all_val)   if all_val   else 1
max_dt   = max(all_dt)    if all_dt    else 1e-3

print("max_addr:", max_addr, "max_qty:", max_qty, "max_val:", max_val, "max_dt:", max_dt)

MAX_ADDR      = max_addr
MAX_QTY       = max_qty
MAX_VAL       = max_val
MAX_DREQRESP = float(np.percentile(all_dt, 95))
print("dt percentiles:",
      np.percentile(all_dt, [50, 90, 95, 99]))
MAX_RBC       = 252
MAX_RDL       = 252


fc_counter = Counter(tr["req_fields"]["func_code"] for tr in transactions)
FUNC_CODES = sorted(fc_counter.keys())
fc_to_idx  = {fc: i for i, fc in enumerate(FUNC_CODES)}
NUM_FC     = len(FUNC_CODES)

print("FUNC_CODES:", FUNC_CODES)


def transaction_to_feature(tr):

    rf = tr["req_fields"]
    sf = tr["resp_fields"]
    req_time = tr["req_time"]
    resp_time = tr["resp_time"]


    dt_req_resp = max(resp_time - req_time, 0.0)
    dt_req_resp_01 = dt_req_resp / max_dt            # [0,1]
    dt_req_resp_norm = dt_req_resp_01 * 2.0 - 1.0    # [-1,1]


    fc = rf["func_code"]
    fc_idx = fc_to_idx[fc]
    fc_onehot = np.zeros(NUM_FC, dtype=np.float32)
    fc_onehot[fc_idx] = 1.0

    addr = rf.get("address", 0)
    qty  = rf.get("quantity", 1)
    val  = rf.get("value", 0)


    addr_01 = addr / max_addr if max_addr > 0 else 0.0
    qty_01  = qty  / max_qty  if max_qty  > 0 else 0.0
    val_01  = val  / max_val  if max_val  > 0 else 0.0

    addr_norm = addr_01 * 2.0 - 1.0
    qty_norm  = qty_01  * 2.0 - 1.0
    val_norm  = val_01  * 2.0 - 1.0


    req_pl_01  = rf.get("payload_len", 0)  / 260.0
    resp_pl_01 = sf.get("payload_len", 0) / 260.0
    req_payload_len_norm  = req_pl_01  * 2.0 - 1.0
    resp_payload_len_norm = resp_pl_01 * 2.0 - 1.0

    is_exc_norm = 1.0 if sf.get("is_exception", False) else -1.0

    bc_01 = sf.get("resp_bytecount", 0) / 252.0
    dl_01 = sf.get("resp_data_len", 0) / 252.0
    bytecount_norm = bc_01 * 2.0 - 1.0
    data_len_norm  = dl_01 * 2.0 - 1.0

    dt_next_req_norm = 0.0

    base = np.array([
        dt_req_resp_norm,
        addr_norm,
        qty_norm,
        val_norm,
        req_payload_len_norm,
        resp_payload_len_norm,
        is_exc_norm,
        bytecount_norm,
        data_len_norm,
        dt_next_req_norm,
    ], dtype=np.float32)

    feat = np.concatenate([base, fc_onehot], axis=0)
    return feat


feature_list = [transaction_to_feature(tr) for tr in transactions]
features = np.stack(feature_list, axis=0)

BASE_FEATS = 10
FEATURE_DIM = features.shape[1]

for i in range(len(transactions) - 1):
    curr_resp = transactions[i]["resp_time"]
    next_req  = transactions[i + 1]["req_time"]
    dt_next   = max(next_req - curr_resp, 0.0)
    dt_next_01  = dt_next / max_dt
    dt_next_norm = dt_next_01 * 2.0 - 1.0
    features[i, 9] = dt_next_norm

features[-1, 9] = -1.0

print("Feature matrix shape:", features.shape)
print("Example row:", features[0])


max_addr: 35 max_qty: 1 max_val: 65280 max_dt: 28.91619300842285
dt percentiles: [0.00111794 0.00194287 0.00245323 0.00593763]
FUNC_CODES: [1, 2, 3, 4, 5, 6]
Feature matrix shape: (110318, 16)
Example row: [-0.99996716  1.          1.         -1.         -0.9076923  -0.9153846
 -1.         -0.984127   -0.984127   -0.99857694  0.          0.
  1.          0.          0.          0.        ]


In [6]:
features.shape
features[0]

array([-0.99996716,  1.        ,  1.        , -1.        , -0.9076923 ,
       -0.9153846 , -1.        , -0.984127  , -0.984127  , -0.99857694,
        0.        ,  0.        ,  1.        ,  0.        ,  0.        ,
        0.        ], dtype=float32)

In [7]:

print("Using PCAP:", pcap_path)
print("Total packets:", len(packets))


Using PCAP: /content/veth5bbeaa2-normal-13.pcap
Total packets: 1176355


In [8]:
class TransactionDataset(Dataset):
    def __init__(self, features):
        self.x = torch.from_numpy(features).float()

    def __len__(self):
        return self.x.shape[0]

    def __getitem__(self, idx):
        return self.x[idx]

dataset = TransactionDataset(features)
dataloader = DataLoader(dataset, batch_size=512, shuffle=True, drop_last=True)

print("Num batches:", len(dataloader))


Num batches: 215


In [9]:
def sinusoidal_embedding(timesteps, dim):
    device = timesteps.device
    half = dim // 2
    freqs = torch.exp(
        -math.log(10000) * torch.arange(0, half, device=device).float() / half
    )
    args = timesteps.float().unsqueeze(1) * freqs.unsqueeze(0)
    emb = torch.cat([torch.sin(args), torch.cos(args)], dim=1)
    if dim % 2 == 1:
        emb = torch.cat([emb, torch.zeros_like(emb[:, :1])], dim=1)
    return emb


class DiffusionMLP(nn.Module):
    def __init__(self, feature_dim, time_emb_dim=64, hidden_dim=256):
        super().__init__()
        self.time_mlp = nn.Sequential(
            nn.Linear(time_emb_dim, hidden_dim),
            nn.SiLU(),
        )
        self.net = nn.Sequential(
            nn.Linear(feature_dim + hidden_dim, hidden_dim),
            nn.SiLU(),
            nn.Linear(hidden_dim, hidden_dim),
            nn.SiLU(),
            nn.Linear(hidden_dim, feature_dim),
            nn.Tanh()
        )
        self.time_emb_dim = time_emb_dim

    def forward(self, x, t):
        t_emb = sinusoidal_embedding(t, self.time_emb_dim)
        t_h = self.time_mlp(t_emb)
        h = torch.cat([x, t_h], dim=1)
        return self.net(h)


device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = DiffusionMLP(FEATURE_DIM).to(device)

checkpoint_path = "/content/diffusion_mlp.pth"

if os.path.exists(checkpoint_path):
    print(f"Loading trained model from {checkpoint_path}")
    state = torch.load(checkpoint_path, map_location=device)
    model.load_state_dict(state)
    model.eval()
else:
    print("No saved model found – will need to train the model.")


No saved model found – will need to train the model.


In [10]:
T = 1000

def cosine_beta_schedule(timesteps, s=0.008):
    steps = timesteps + 1
    x = torch.linspace(0, timesteps, steps)
    alphas_cumprod = torch.cos(((x / timesteps) + s) / (1 + s) * math.pi * 0.5) ** 2
    alphas_cumprod = alphas_cumprod / alphas_cumprod[0]
    betas = 1 - (alphas_cumprod[1:] / alphas_cumprod[:-1])
    return torch.clamp(betas, 1e-5, 0.999)

betas = torch.linspace(1e-4, 0.02, T).to(device)
alphas = 1.0 - betas
alphas_cumprod = torch.cumprod(alphas, dim=0)
alphas_cumprod_prev = torch.cat([torch.tensor([1.0], device=device), alphas_cumprod[:-1]], dim=0)

sqrt_alphas_cumprod = torch.sqrt(alphas_cumprod)
sqrt_one_minus_alphas_cumprod = torch.sqrt(1.0 - alphas_cumprod)


def q_sample(x_start, t, noise=None):

    if noise is None:
        noise = torch.randn_like(x_start)
    sqrt_ac = sqrt_alphas_cumprod[t].unsqueeze(-1)
    sqrt_om = sqrt_one_minus_alphas_cumprod[t].unsqueeze(-1)
    return sqrt_ac * x_start + sqrt_om * noise


optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)

EPOCHS = 50
for epoch in range(1, EPOCHS+1):
    model.train()
    total_loss = 0.0
    for x0 in dataloader:
        x0 = x0.to(device)
        bsz = x0.shape[0]
        t = torch.randint(0, T, (bsz,), device=device, dtype=torch.long)
        noise = torch.randn_like(x0)
        x_t = q_sample(x0, t, noise)

        noise_pred = model(x_t, t)
        loss = F.mse_loss(noise_pred, noise)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        total_loss += loss.item() * bsz

    epoch_loss = total_loss / len(dataset)
    print(f"[Epoch {epoch}] loss={epoch_loss:.6f}")

checkpoint_path = "/content/diffusion_mlp.pth"
torch.save(model.state_dict(), checkpoint_path)
print(f"Saved trained model to {checkpoint_path}")


[Epoch 1] loss=0.309903
[Epoch 2] loss=0.222926
[Epoch 3] loss=0.217718
[Epoch 4] loss=0.211697
[Epoch 5] loss=0.206828
[Epoch 6] loss=0.204603
[Epoch 7] loss=0.201817
[Epoch 8] loss=0.201056
[Epoch 9] loss=0.198766
[Epoch 10] loss=0.198875
[Epoch 11] loss=0.197284
[Epoch 12] loss=0.196851
[Epoch 13] loss=0.195977
[Epoch 14] loss=0.195527
[Epoch 15] loss=0.195903
[Epoch 16] loss=0.195178
[Epoch 17] loss=0.194519
[Epoch 18] loss=0.194636
[Epoch 19] loss=0.195005
[Epoch 20] loss=0.194476
[Epoch 21] loss=0.193677
[Epoch 22] loss=0.193062
[Epoch 23] loss=0.193275
[Epoch 24] loss=0.193471
[Epoch 25] loss=0.192751
[Epoch 26] loss=0.192034
[Epoch 27] loss=0.192267
[Epoch 28] loss=0.191260
[Epoch 29] loss=0.191095
[Epoch 30] loss=0.190489
[Epoch 31] loss=0.190881
[Epoch 32] loss=0.189223
[Epoch 33] loss=0.190321
[Epoch 34] loss=0.188594
[Epoch 35] loss=0.189191
[Epoch 36] loss=0.187880
[Epoch 37] loss=0.187258
[Epoch 38] loss=0.187032
[Epoch 39] loss=0.186019
[Epoch 40] loss=0.185381
[Epoch 41

In [12]:
@torch.no_grad()
def p_sample(model, x_t, t):
    """
    Single reverse step p(x_{t-1} | x_t)
    """
    bet = betas[t]
    sqrt_one_minus_ac = sqrt_one_minus_alphas_cumprod[t]
    sqrt_recip_alpha = 1.0 / torch.sqrt(alphas[t])

    eps_theta = model(x_t, torch.full((x_t.shape[0],), t, device=device, dtype=torch.long))
    mean = sqrt_recip_alpha * (x_t - bet / sqrt_one_minus_ac * eps_theta)
    if t == 0:
        return mean
    noise = torch.randn_like(x_t)
    sigma = torch.sqrt(bet)
    return mean + sigma * noise


@torch.no_grad()
def sample_synthetic(model, num_samples):
    x = ddim_sample(model, T, (num_samples, FEATURE_DIM), eta=0.0)
    x = x.clamp(-1.0, 1.0)
    return x.cpu().numpy()

@torch.no_grad()
def ddim_sample(model, n_steps, shape, eta=0.0):
    model.eval()
    x = torch.randn(shape, device=device)

    for i in reversed(range(n_steps)):
        t = torch.full((shape[0],), i, device=device, dtype=torch.long)

        eps = model(x, t)

        alpha_t = alphas_cumprod[i]
        alpha_prev = alphas_cumprod[i - 1] if i > 0 else torch.tensor(1.0, device=device)

        x0 = (x - torch.sqrt(1 - alpha_t) * eps) / torch.sqrt(alpha_t)
        x0 = x0.clamp(-1.0, 1.0)

        dir_xt = torch.sqrt(1 - alpha_prev) * eps

        if eta == 0.0:
            x = torch.sqrt(alpha_prev) * x0 + dir_xt
        else:
            z = torch.randn_like(x)
            sigma = eta * torch.sqrt((1 - alpha_prev) / (1 - alpha_t)) * torch.sqrt(1 - alpha_t / alpha_prev)
            x = torch.sqrt(alpha_prev) * x0 + dir_xt + sigma * z

    return x



N_SYN = 5000
synthetic_features = sample_synthetic(model, N_SYN)
synthetic_features = np.clip(synthetic_features, -1, 1)



In [13]:
def feature_row_to_transaction_dict(row):

    base = row[:BASE_FEATS]
    fc_onehot = row[BASE_FEATS:]

    def denorm01(x):
        return float(np.clip((float(x) + 1.0) / 2.0, 0.0, 1.0))

    dt_req_resp_norm = denorm01(base[0])
    addr_norm        = denorm01(base[1])
    qty_norm         = denorm01(base[2])

    is_exc_raw       = float(base[6])
    bytecount_norm   = denorm01(base[7])
    datalen_norm     = denorm01(base[8])
    dt_next_req_norm = denorm01(base[9])

    fc_idx = int(np.argmax(fc_onehot))
    req_fc = FUNC_CODES[fc_idx]

    return {
        "req_fc": req_fc,
        "req_addr_norm": addr_norm,
        "req_qty_norm": qty_norm,
        "resp_is_exc": 1.0 if is_exc_raw > 0.5 else 0.0,
        "resp_bytecount_norm": bytecount_norm,
        "resp_datalen_norm": datalen_norm,
        "delta_req_resp_norm": dt_req_resp_norm,
        "delta_next_req_norm": dt_next_req_norm,
    }


synthetic_feature_rows = [feature_row_to_transaction_dict(row) for row in synthetic_features]
print("Example synthetic transaction feature row:", synthetic_feature_rows[0])


Example synthetic transaction feature row: {'req_fc': 2, 'req_addr_norm': 0.3265541344881058, 'req_qty_norm': 0.9980323910713196, 'resp_is_exc': 0.0, 'resp_bytecount_norm': 0.005838602781295776, 'resp_datalen_norm': 0.0014705061912536621, 'delta_req_resp_norm': 0.029452741146087646, 'delta_next_req_norm': 0.08567884564399719}


In [14]:

def is_modbus_tcp(pkt):
    if not pkt.haslayer(TCP) or not pkt.haslayer(Raw):
        return False
    tcp = pkt[TCP]
    if tcp.sport != 502 and tcp.dport != 502:
        return False
    payload = bytes(pkt[Raw].load)
    if len(payload) < 7:
        return False
    proto_id = int.from_bytes(payload[2:4], "big")
    return proto_id == 0

def parse_modbus_packet(pkt):
    if not is_modbus_tcp(pkt):
        return None

    raw = bytes(pkt[Raw].load)
    if len(raw) < 8:
        return None

    fields = {}
    fields["time"] = float(pkt.time)

    fields["tx_id"]    = int.from_bytes(raw[0:2], "big")
    fields["proto_id"] = int.from_bytes(raw[2:4], "big")
    fields["length"]   = int.from_bytes(raw[4:6], "big")
    fields["unit_id"]  = raw[6]

    fc = raw[7]
    pdu = raw[8:]
    fields["func_code"] = fc

    tcp = pkt[TCP]
    if tcp.dport == 502:
        fields["direction"] = "req"
    elif tcp.sport == 502:
        fields["direction"] = "resp"
    else:
        fields["direction"] = "unknown"

    if fc & 0x80:
        fields["is_exception"] = True
        fields["exception_code"] = pdu[0] if len(pdu) > 0 else None
        return fields
    else:
        fields["is_exception"] = False
        fields["exception_code"] = None

    fields["address"] = None
    fields["quantity"] = None
    fields["value"] = None
    fields["resp_bytecount"] = None
    fields["resp_data_len"] = None

    if fields["direction"] == "req":
        if fc in [1, 2, 3, 4] and len(pdu) >= 4:
            fields["address"]  = struct.unpack(">H", pdu[0:2])[0]
            fields["quantity"] = struct.unpack(">H", pdu[2:4])[0]
        elif fc in [5, 6] and len(pdu) >= 4:
            fields["address"]  = struct.unpack(">H", pdu[0:2])[0]
            fields["value"]    = struct.unpack(">H", pdu[2:4])[0]

    if fields["direction"] == "resp":
        if fc in [1, 2, 3, 4] and len(pdu) >= 1:
            bytecount = pdu[0]
            if bytecount + 1 <= len(pdu):
                fields["resp_bytecount"] = bytecount
                fields["resp_data_len"]  = len(pdu[1:1+bytecount])

        if fc in [5, 6] and len(pdu) >= 4:
            fields["address"] = struct.unpack(">H", pdu[0:2])[0]
            fields["value"]   = struct.unpack(">H", pdu[2:4])[0]

    return fields


In [15]:

transactions = []

pending = {}

for pkt in tqdm(packets, desc="Building transactions"):
    if not is_modbus_tcp(pkt):
        continue

    tcp = pkt[TCP]
    ip  = pkt[IP]

    parsed = parse_modbus_packet(pkt)
    if parsed is None:
        continue

    if parsed["direction"] == "req":
        key = (ip.src, tcp.sport, parsed["unit_id"], parsed["tx_id"])
        pending[key] = {
            "req_pkt": pkt,
            "req": parsed,
        }
    elif parsed["direction"] == "resp":
        key = (ip.dst, tcp.dport, parsed["unit_id"], parsed["tx_id"])
        if key in pending:
            t = pending.pop(key)
            t["resp_pkt"] = pkt
            t["resp"] = parsed
            transactions.append(t)

print("Total transactions built:", len(transactions))
print("Unmatched pending requests:", len(pending))


Building transactions: 100%|██████████| 1176355/1176355 [00:19<00:00, 61188.99it/s]

Total transactions built: 110318
Unmatched pending requests: 8





In [16]:
req_addrs = []
req_qties = []
resp_bytecounts = []
resp_datalens = []
delta_req_resp = []

for t in transactions:
    req = t["req"]
    resp = t.get("resp")

    if req.get("address") is not None:
        req_addrs.append(req["address"])
    if req.get("quantity") is not None:
        req_qties.append(req["quantity"])

    if resp:
        if resp.get("resp_bytecount") is not None:
            resp_bytecounts.append(resp["resp_bytecount"])
        if resp.get("resp_data_len") is not None:
            resp_datalens.append(resp["resp_data_len"])
        delta_req_resp.append(resp["time"] - req["time"])

MAX_DREQRESP


0.002453231811523432

In [17]:

feature_rows = []

for i, t in enumerate(transactions):
    req = t["req"]
    resp = t.get("resp")

    req_fc = req["func_code"]
    addr = req.get("address") or 0
    qty  = req.get("quantity") or 0

    req_addr_norm = addr / MAX_ADDR if MAX_ADDR > 0 else 0.0
    req_qty_norm  = qty / MAX_QTY  if MAX_QTY  > 0 else 0.0

    if resp:
        is_exc = 1.0 if resp["is_exception"] else 0.0
        rbc    = resp.get("resp_bytecount") or 0
        rdl    = resp.get("resp_data_len") or 0
        dreqresp = max(0.0, resp["time"] - req["time"])
    else:
        is_exc = 0.0
        rbc = 0
        rdl = 0
        dreqresp = 0.0

    resp_bytecount_norm = rbc / MAX_RBC if MAX_RBC > 0 else 0.0
    resp_datalen_norm   = rdl / MAX_RDL if MAX_RDL > 0 else 0.0
    delta_req_resp_norm = dreqresp / MAX_DREQRESP if MAX_DREQRESP > 0 else 0.0

    if i + 1 < len(transactions):
        next_req_time = transactions[i+1]["req"]["time"]
        delta_next_req = max(0.0, next_req_time - req["time"])
    else:
        delta_next_req = 0.0
    delta_next_req_norm = delta_next_req

    feature_rows.append({
        "req_fc": req_fc,
        "req_addr_norm": req_addr_norm,
        "req_qty_norm": req_qty_norm,
        "resp_is_exc": is_exc,
        "resp_bytecount_norm": resp_bytecount_norm,
        "resp_datalen_norm": resp_datalen_norm,
        "delta_req_resp_norm": delta_req_resp_norm,
        "delta_next_req_norm": delta_next_req_norm,
    })

len(feature_rows)


110318

In [18]:
import numpy as np

real_dt_reqresp = []
real_dt_next    = []

for i, tr in enumerate(transactions):
    req = tr["req"]
    resp = tr.get("resp")

    if resp is not None:
        req_t  = req["time"]
        resp_t = resp["time"]
        dt_rr  = max(resp_t - req_t, 0.0)
        real_dt_reqresp.append(dt_rr)

    if i < len(transactions) - 1:
        next_req = transactions[i+1]["req"]
        next_req_t = next_req["time"]

        if resp is not None:
            prev_end_t = resp["time"]
        else:
            prev_end_t = req["time"]

        dt_n = max(next_req_t - prev_end_t, 0.0)
        real_dt_next.append(dt_n)

MEAN_DREQRESP = float(np.mean(real_dt_reqresp)) if real_dt_reqresp else 0.01
MEDIAN_DREQRESP = float(np.median(real_dt_reqresp)) if real_dt_reqresp else 0.01
MEAN_DNEXT    = float(np.mean(real_dt_next))    if real_dt_next    else 0.1

print("MEAN_DREQRESP:", MEAN_DREQRESP, "seconds")
print("MEDIAN_DREQRESP:", MEDIAN_DREQRESP, "seconds")
print("MEAN_DNEXT   :", MEAN_DNEXT, "seconds")


MEAN_DREQRESP: 0.004735315307885936 seconds
MEDIAN_DREQRESP: 0.0011179447174072266 seconds
MEAN_DNEXT   : 0.34875996791639 seconds


In [19]:

def decode_features_to_transaction_feats(feat):
    req_fc = int(round(feat["req_fc"]))
    req_fc = int(np.clip(req_fc, 1, 6))

    addr = int(round(feat["req_addr_norm"] * MAX_ADDR))
    addr = int(np.clip(addr, 0, MAX_ADDR))

    qty = int(round(feat["req_qty_norm"] * MAX_QTY))
    qty = int(np.clip(qty, 1, MAX_QTY))

    is_exc = bool(feat["resp_is_exc"] > 0.5)

    rbc = int(round(feat["resp_bytecount_norm"] * MAX_RBC))
    rbc = int(np.clip(rbc, 0, MAX_RBC))

    rdl = int(round(feat["resp_datalen_norm"] * MAX_RDL))
    rdl = int(np.clip(rdl, 0, MAX_RDL))

    dt_norm = float(feat["delta_req_resp_norm"])
    dt_01   = (dt_norm + 1.0) / 2.0
    dt_01   = float(np.clip(dt_01, 0.0, 1.0))

    dreqresp = dt_01 * MAX_DREQRESP
    # print(MAX_DREQRESP)

    dreqresp = max(1e-5, float(dreqresp))

    return {
        "req_fc": req_fc,
        "req_addr": addr,
        "req_qty": qty,
        "resp_is_exc": is_exc,
        "resp_bytecount": rbc,
        "resp_datalen": rdl,
        "delta_req_resp": dreqresp,
    }





feature_rows = synthetic_feature_rows
decoded_feats = [decode_features_to_transaction_feats(f) for f in feature_rows]
decoded_feats[0], decoded_feats[1]


0.002453231811523432
0.002453231811523432
0.002453231811523432
0.002453231811523432
0.002453231811523432
0.002453231811523432
0.002453231811523432
0.002453231811523432
0.002453231811523432
0.002453231811523432
0.002453231811523432
0.002453231811523432
0.002453231811523432
0.002453231811523432
0.002453231811523432
0.002453231811523432
0.002453231811523432
0.002453231811523432
0.002453231811523432
0.002453231811523432
0.002453231811523432
0.002453231811523432
0.002453231811523432
0.002453231811523432
0.002453231811523432
0.002453231811523432
0.002453231811523432
0.002453231811523432
0.002453231811523432
0.002453231811523432
0.002453231811523432
0.002453231811523432
0.002453231811523432
0.002453231811523432
0.002453231811523432
0.002453231811523432
0.002453231811523432
0.002453231811523432
0.002453231811523432
0.002453231811523432
0.002453231811523432
0.002453231811523432
0.002453231811523432
0.002453231811523432
0.002453231811523432
0.002453231811523432
0.002453231811523432
0.00245323181

({'req_fc': 2,
  'req_addr': 11,
  'req_qty': 1,
  'resp_is_exc': False,
  'resp_bytecount': 1,
  'resp_datalen': 0,
  'delta_req_resp': 0.0012627431065197897},
 {'req_fc': 2,
  'req_addr': 9,
  'req_qty': 1,
  'resp_is_exc': False,
  'resp_bytecount': 2,
  'resp_datalen': 4,
  'delta_req_resp': 0.0012390965271890735})

In [20]:
def build_modbus_request_pdu(req_fc, addr, qty, value=None):
    if req_fc in [1, 2, 3, 4]:
        return bytes([
            req_fc,
            (addr >> 8) & 0xFF, addr & 0xFF,
            (qty  >> 8) & 0xFF, qty  & 0xFF,
        ])
    elif req_fc in [5, 6]:
        val = value if value is not None else 1
        return bytes([
            req_fc,
            (addr >> 8) & 0xFF, addr & 0xFF,
            (val  >> 8) & 0xFF, val  & 0xFF,
        ])
    else:
        return bytes([req_fc])

import numpy as np

def build_modbus_response_pdu(req_fc, qty, is_exc):
    if is_exc:
        exc_code = 2
        return bytes([req_fc | 0x80, exc_code])

    if req_fc in [1, 2]:
        bytecount = (qty + 7) // 8
        bits = np.random.randint(0, 2, size=qty, dtype=np.uint8)

        data_bytes = []
        for i in range(0, qty, 8):
            chunk = bits[i:i+8]
            b = 0
            for bit_idx, bit_val in enumerate(chunk):
                if bit_val:
                    b |= (1 << bit_idx)
            data_bytes.append(b)

        data = bytes(data_bytes)
        return bytes([req_fc, bytecount]) + data

    if req_fc in [3, 4]:
        bytecount = 2 * qty
        regs = np.random.randint(0, 65536, size=qty, dtype=np.uint16)
        data = b"".join(int(r).to_bytes(2, "big") for r in regs)
        return bytes([req_fc, bytecount]) + data

    if req_fc in [5, 6]:
        val = 1
        addr = 0
        return bytes([
            req_fc,
            (addr >> 8) & 0xFF, addr & 0xFF,
            (val  >> 8) & 0xFF, val  & 0xFF,
        ])

    return bytes([req_fc])



In [21]:
def build_modbus_tcp_packets_for_transaction(
    trans_feat,
    tx_id,
    base_time,
    client_ip="10.0.0.10",
    server_ip="10.0.0.20",
    client_port=40000,
    server_port=502,
    unit_id=1,
):
    req_fc  = trans_feat["req_fc"]
    addr    = trans_feat["req_addr"]
    qty     = trans_feat["req_qty"]
    is_exc  = trans_feat["resp_is_exc"]
    dreqres = trans_feat["delta_req_resp"]

    req_pdu = build_modbus_request_pdu(req_fc, addr, qty)
    protocol_id = 0
    req_length = len(req_pdu) + 1
    mbap_req = (
        tx_id.to_bytes(2, "big") +
        protocol_id.to_bytes(2, "big") +
        req_length.to_bytes(2, "big") +
        (unit_id & 0xFF).to_bytes(1, "big")
    )
    req_payload = mbap_req + req_pdu

    req_pkt = (
        IP(src=client_ip, dst=server_ip) /
        TCP(sport=client_port, dport=server_port, flags="PA") /
        Raw(load=req_payload)
    )
    req_pkt.time = base_time

    resp_pdu = build_modbus_response_pdu(req_fc, qty, is_exc)
    resp_length = len(resp_pdu) + 1
    mbap_resp = (
        tx_id.to_bytes(2, "big") +
        protocol_id.to_bytes(2, "big") +
        resp_length.to_bytes(2, "big") +
        (unit_id & 0xFF).to_bytes(1, "big")
    )
    resp_payload = mbap_resp + resp_pdu

    resp_pkt = (
        IP(src=server_ip, dst=client_ip) /
        TCP(sport=server_port, dport=client_port, flags="PA") /
        Raw(load=resp_payload)
    )
    resp_pkt.time = base_time + dreqres

    return req_pkt, resp_pkt


In [22]:
max_trans = 2000
packets_out = []
time_cursor = 0.0
tx_id = 0

for i, feat in enumerate(decoded_feats[:max_trans]):
    tx_id = (tx_id + 1) % 65536 or 1
    req_pkt, resp_pkt = build_modbus_tcp_packets_for_transaction(
        feat,
        tx_id=tx_id,
        base_time=time_cursor,
    )
    packets_out.append(req_pkt)
    packets_out.append(resp_pkt)


    if i < len(feature_rows) - 1:
        dt_next_norm = float(feature_rows[i]["delta_next_req_norm"])

        dt_next_01 = (dt_next_norm + 1.0) / 2.0
        dt_next_01 = float(np.clip(dt_next_01, 0.0, 1.0))

        k_next = 3.0
        dt_next = dt_next_01 * (k_next * MEAN_DNEXT)

        dt_next = max(0.001, dt_next)

        time_cursor = resp_pkt.time + dt_next
    else:
        time_cursor = resp_pkt.time


OUT_PCAP = "/content/synthetic_from_real_features.pcap"
wrpcap(OUT_PCAP, packets_out)
print("Wrote synthetic PCAP:", OUT_PCAP)
print("Total packets:", len(packets_out))


Wrote synthetic PCAP: /content/synthetic_from_real_features.pcap
Total packets: 4000


In [23]:
from scapy.all import IP, TCP, Raw, wrpcap

client_ip   = "10.0.0.10"
server_ip   = "10.0.0.20"
client_port = 40000
server_port = 502
unit_id     = 1

packets_out = []

time_cursor = 0.0

client_seq = 1000
server_seq = 5000

syn = (
    IP(src=client_ip, dst=server_ip) /
    TCP(sport=client_port, dport=server_port, flags="S", seq=client_seq)
)
syn.time = time_cursor
packets_out.append(syn)
time_cursor += 0.001

synack = (
    IP(src=server_ip, dst=client_ip) /
    TCP(sport=server_port, dport=client_port, flags="SA",
        seq=server_seq, ack=client_seq + 1)
)
synack.time = time_cursor
packets_out.append(synack)
time_cursor += 0.001

ack = (
    IP(src=client_ip, dst=server_ip) /
    TCP(sport=client_port, dport=server_port, flags="A",
        seq=client_seq + 1, ack=server_seq + 1)
)
ack.time = time_cursor
packets_out.append(ack)
time_cursor += 0.001

client_seq = client_seq + 1
server_seq = server_seq + 1


In [24]:
max_trans = 2000
tx_id = 0

for i, feat in enumerate(decoded_feats[:max_trans]):
    tx_id = (tx_id + 1) % 65536 or 1

    tfeat = decode_features_to_transaction_feats(feature_rows[i])

    req_fc  = tfeat["req_fc"]
    addr    = tfeat["req_addr"]
    qty     = tfeat["req_qty"]
    is_exc  = tfeat["resp_is_exc"]
    rbc     = tfeat["resp_bytecount"]
    rdl     = tfeat["resp_datalen"]
    dreqres = tfeat["delta_req_resp"]

    req_pdu = build_modbus_request_pdu(req_fc, addr, qty)
    protocol_id = 0
    req_length = len(req_pdu) + 1
    mbap_req = (
        tx_id.to_bytes(2, "big") +
        protocol_id.to_bytes(2, "big") +
        req_length.to_bytes(2, "big") +
        (unit_id & 0xFF).to_bytes(1, "big")
    )
    req_payload = mbap_req + req_pdu

    resp_pdu = build_modbus_response_pdu(req_fc, qty, is_exc)
    resp_length = len(resp_pdu) + 1
    mbap_resp = (
        tx_id.to_bytes(2, "big") +
        protocol_id.to_bytes(2, "big") +
        resp_length.to_bytes(2, "big") +
        (unit_id & 0xFF).to_bytes(1, "big")
    )
    resp_payload = mbap_resp + resp_pdu

    req_pkt = (
        IP(src=client_ip, dst=server_ip) /
        TCP(
            sport=client_port,
            dport=server_port,
            flags="PA",
            seq=client_seq,
            ack=server_seq,
        ) /
        Raw(load=req_payload)
    )
    req_pkt.time = time_cursor
    packets_out.append(req_pkt)

    client_seq += len(req_payload)

    resp_pkt = (
        IP(src=server_ip, dst=client_ip) /
        TCP(
            sport=server_port,
            dport=client_port,
            flags="PA",
            seq=server_seq,
            ack=client_seq,
        ) /
        Raw(load=resp_payload)
    )
    resp_pkt.time = time_cursor + dreqres
    packets_out.append(resp_pkt)

    server_seq += len(resp_payload)

    dt_next = feature_rows[i]["delta_next_req_norm"]
    time_cursor = resp_pkt.time + dt_next


0.002453231811523432
0.002453231811523432
0.002453231811523432
0.002453231811523432
0.002453231811523432
0.002453231811523432
0.002453231811523432
0.002453231811523432
0.002453231811523432
0.002453231811523432
0.002453231811523432
0.002453231811523432
0.002453231811523432
0.002453231811523432
0.002453231811523432
0.002453231811523432
0.002453231811523432
0.002453231811523432
0.002453231811523432
0.002453231811523432
0.002453231811523432
0.002453231811523432
0.002453231811523432
0.002453231811523432
0.002453231811523432
0.002453231811523432
0.002453231811523432
0.002453231811523432
0.002453231811523432
0.002453231811523432
0.002453231811523432
0.002453231811523432
0.002453231811523432
0.002453231811523432
0.002453231811523432
0.002453231811523432
0.002453231811523432
0.002453231811523432
0.002453231811523432
0.002453231811523432
0.002453231811523432
0.002453231811523432
0.002453231811523432
0.002453231811523432
0.002453231811523432
0.002453231811523432
0.002453231811523432
0.00245323181

In [25]:
OUT_PCAP = "/content/synthetic_from_real_features_stream.pcap"
wrpcap(OUT_PCAP, packets_out)
print("Wrote synthetic PCAP:", OUT_PCAP)
print("Total packets:", len(packets_out))


Wrote synthetic PCAP: /content/synthetic_from_real_features_stream.pcap
Total packets: 4003


In [26]:
import numpy as np

BASE_FEATURE_NAMES = [
    "dt_req_resp_norm",
    "addr_norm",
    "qty_norm",
    "val_norm",
    "req_payload_len_norm",
    "resp_payload_len_norm",
    "is_exc",
    "bytecount_norm",
    "data_len_norm",
    "dt_next_req_norm",
]

def summarize_1d(real_col, synth_col):
    return {
        "real_mean":    float(np.mean(real_col)),
        "real_std":     float(np.std(real_col)),
        "real_min":     float(np.min(real_col)),
        "real_max":     float(np.max(real_col)),
        "synth_mean":   float(np.mean(synth_col)),
        "synth_std":    float(np.std(synth_col)),
        "synth_min":    float(np.min(synth_col)),
        "synth_max":    float(np.max(synth_col)),
    }

def compare_distributions(features_real, features_synth):
    assert features_real.shape[1] == features_synth.shape[1], "Dim mismatch"

    print("=== Base feature distributions (real vs synthetic) ===")
    base_dim = BASE_FEATS
    for i in range(base_dim):
        name = BASE_FEATURE_NAMES[i] if i < len(BASE_FEATURE_NAMES) else f"feat_{i}"
        real_col  = features_real[:, i]
        synth_col = features_synth[:, i]
        s = summarize_1d(real_col, synth_col)
        print(f"\n{name}:")
        print(f"  real  : mean={s['real_mean']:.4f}, std={s['real_std']:.4f}, "
              f"min={s['real_min']:.4f}, max={s['real_max']:.4f}")
        print(f"  synth : mean={s['synth_mean']:.4f}, std={s['synth_std']:.4f}, "
              f"min={s['synth_min']:.4f}, max={s['synth_max']:.4f}")

    real_fc_idx  = np.argmax(features_real[:, base_dim:], axis=1)
    synth_fc_idx = np.argmax(features_synth[:, base_dim:], axis=1)

    print("\n=== Function code distributions (real vs synthetic) ===")
    real_fc_counts  = {FUNC_CODES[i]: int((real_fc_idx == i).sum()) for i in range(len(FUNC_CODES))}
    synth_fc_counts = {FUNC_CODES[i]: int((synth_fc_idx == i).sum()) for i in range(len(FUNC_CODES))}

    total_real  = sum(real_fc_counts.values())
    total_synth = sum(synth_fc_counts.values())

    for fc in FUNC_CODES:
        rc = real_fc_counts.get(fc, 0)
        sc = synth_fc_counts.get(fc, 0)
        rp = rc / total_real  if total_real  > 0 else 0.0
        sp = sc / total_synth if total_synth > 0 else 0.0
        print(f"  FC {fc}: real={rc} ({rp:.3f}), synth={sc} ({sp:.3f})")


compare_distributions(features, synthetic_features)


=== Base feature distributions (real vs synthetic) ===

dt_req_resp_norm:
  real  : mean=-0.9997, std=0.0151, min=-1.0000, max=1.0000
  synth : mean=-0.9666, std=0.0318, min=-0.9946, max=-0.5726

addr_norm:
  real  : mean=-0.2856, std=0.4469, min=-0.7143, max=1.0000
  synth : mean=-0.3494, std=0.1345, min=-0.5937, max=0.0773

qty_norm:
  real  : mean=1.0000, std=0.0000, min=1.0000, max=1.0000
  synth : mean=0.9829, std=0.0273, min=0.6225, max=1.0000

val_norm:
  real  : mean=-0.9980, std=0.0634, min=-1.0000, max=1.0000
  synth : mean=-0.9727, std=0.0351, min=-1.0000, max=-0.6731

req_payload_len_norm:
  real  : mean=-0.9077, std=0.0000, min=-0.9077, max=-0.9077
  synth : mean=-0.8851, std=0.0239, min=-0.9210, max=-0.6666

resp_payload_len_norm:
  real  : mean=-0.9200, std=0.0038, min=-0.9231, max=-0.9077
  synth : mean=-0.8977, std=0.0304, min=-0.9321, max=-0.5769

is_exc:
  real  : mean=-1.0000, std=0.0000, min=-1.0000, max=-1.0000
  synth : mean=-0.9836, std=0.0240, min=-1.0000, max=

In [27]:
from scapy.all import rdpcap, TCP, Raw
import struct

def check_modbus_protocol_compliance(pcap_path, max_errors=10):
    pkts = rdpcap(pcap_path)
    total = 0
    modbus_ok = 0
    errors = []

    for idx, pkt in enumerate(pkts):
        total += 1

        # Basic TCP + Raw
        if not pkt.haslayer(TCP) or not pkt.haslayer(Raw):
            errors.append((idx, "No TCP/Raw layer"))
            if len(errors) >= max_errors:
                break
            continue

        tcp = pkt[TCP]
        raw = bytes(pkt[Raw].load)

        if tcp.sport != 502 and tcp.dport != 502:
            errors.append((idx, "No Modbus port 502 in sport/dport"))
            if len(errors) >= max_errors:
                break
            continue

        if len(raw) < 8:
            errors.append((idx, "Payload too short for MBAP+FC"))
            if len(errors) >= max_errors:
                break
            continue

        tx_id    = int.from_bytes(raw[0:2], "big")
        proto_id = int.from_bytes(raw[2:4], "big")
        length   = int.from_bytes(raw[4:6], "big")
        unit_id  = raw[6]
        fc       = raw[7]
        pdu      = raw[8:]

        if proto_id != 0:
            errors.append((idx, f"Bad proto_id={proto_id}"))
            if len(errors) >= max_errors:
                break
            continue

        expected_len = len(raw) - 6
        if length != expected_len:
            errors.append((idx, f"MBAP length mismatch: length={length}, expected={expected_len}"))
            if len(errors) >= max_errors:
                break
            continue

        is_exc = bool(fc & 0x80)
        if is_exc:
            if len(pdu) < 1:
                errors.append((idx, "Exception FC but no exception code byte"))
                if len(errors) >= max_errors:
                    break
                continue
        else:
            if fc in [1, 2, 3, 4]:
                if tcp.dport == 502:
                    if len(pdu) < 4:
                        errors.append((idx, f"FC {fc} request PDU too short: len={len(pdu)}"))
                        if len(errors) >= max_errors:
                            break
                        continue
                elif tcp.sport == 502:
                    if len(pdu) < 1:
                        errors.append((idx, f"FC {fc} response missing bytecount"))
                        if len(errors) >= max_errors:
                            break
                        continue
                    bytecount = pdu[0]
                    if bytecount + 1 > len(pdu):
                        errors.append((idx, f"FC {fc} response bytecount exceeds PDU length"))
                        if len(errors) >= max_errors:
                            break
                        continue

            if fc in [5, 6]:
                if len(pdu) < 4:
                    errors.append((idx, f"FC {fc} PDU too short for addr+value: len={len(pdu)}"))
                    if len(errors) >= max_errors:
                        break
                    continue

        modbus_ok += 1

    print(f"\n=== Protocol compliance for {pcap_path} ===")
    print(f"Total packets in file: {total}")
    print(f"Packets passing basic Modbus/TCP checks: {modbus_ok}")

    if errors:
        print(f"\nFound {len(errors)} error(s) (showing up to {max_errors}):")
        for (idx, msg) in errors:
            print(f"  Packet #{idx}: {msg}")
    else:
        print("No protocol violations detected (within checked rules).")

    return errors

synthetic_errors = check_modbus_protocol_compliance(OUT_PCAP)



=== Protocol compliance for /content/synthetic_from_real_features_stream.pcap ===
Total packets in file: 4003
Packets passing basic Modbus/TCP checks: 4000

Found 3 error(s) (showing up to 10):
  Packet #0: No TCP/Raw layer
  Packet #1: No TCP/Raw layer
  Packet #2: No TCP/Raw layer
