In [1]:
import os, math, random, json, time
from pathlib import Path
from typing import List, Tuple, Dict

import numpy as np
from PIL import Image

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader

from torchvision import transforms
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix
from tqdm import tqdm

# ---- reproducibility
def seed_everything(seed=42):
    random.seed(seed); np.random.seed(seed); torch.manual_seed(seed); torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
seed_everything(42)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Device:", device)

# ---- your dataset layout
SENSOR_DIRS = {
    "Accelerometer":  r"E:\Upwork Project\AI_Leak_Detection_Project\images\cwt_log\Accelerometer\Looped",
    "DynamicPressure":r"E:\Upwork Project\AI_Leak_Detection_Project\images\cwt_log\Dynamic Pressure Sensor\Looped",
    "Hydrophone":     r"E:\Upwork Project\AI_Leak_Detection_Project\images\cwt_log\Hydrophones\Looped",
}
CLASSES = [
    "No-leak",
    "Orifice Leak",
    "Gasket Leak",
    "Longitudinal Crack",
    "Circumferential Crack",
]
CLASS2IDX = {c:i for i,c in enumerate(CLASSES)}

# ---- image + sequence sizing
IMG_H, IMG_W = 128, 256     # resize HxW for CWT (freq x time)
T_STEPS       = 16          # split time axis into 16 slices
SLICE_W       = IMG_W // T_STEPS  # 16px per step

# ---- training hyperparams
BATCH_SIZE    = 16
EPOCHS        = 40
LR            = 3e-4
WEIGHT_DECAY  = 1e-4
DROPOUT       = 0.2
HID_CNN       = 64          # base channels in CNN
LSTM_HID      = 256
LABEL_SMOOTH  = 0.05


Device: cpu


In [2]:
IMG_EXTS = {".png", ".jpg", ".jpeg", ".bmp", ".tif", ".tiff"}

def list_files(d: Path) -> Dict[str, List[Path]]:
    d = Path(d)
    out = {}
    for c in CLASSES:
        class_dir = d / c
        if not class_dir.exists():
            raise FileNotFoundError(f"Missing class folder: {class_dir}")
        files = [p for p in class_dir.rglob("*") if p.suffix.lower() in IMG_EXTS]
        out[c] = files
    return out

sensor_files = {s: list_files(Path(p)) for s,p in SENSOR_DIRS.items()}

def stemset(paths: List[Path]) -> Dict[str, Path]:
    # normalize stems (without extension)
    return {p.stem.lower(): p for p in paths}

def build_triplets() -> List[Tuple[Path,Path,Path,int]]:
    triplets = []
    for c in CLASSES:
        acc = sensor_files["Accelerometer"][c]
        dyn = sensor_files["DynamicPressure"][c]
        hyd = sensor_files["Hydrophone"][c]

        A, D, H = stemset(acc), stemset(dyn), stemset(hyd)
        common = set(A.keys()) & set(D.keys()) & set(H.keys())

        # If naming differs, fallback to positional alignment
        if len(common) < 0.8*min(len(acc), len(dyn), len(hyd)):
            print(f"[WARN] Poor filename overlap for class '{c}'. Falling back to sorted alignment.")
            tripN = min(len(acc), len(dyn), len(hyd))
            acc_s = sorted(acc)[:tripN]
            dyn_s = sorted(dyn)[:tripN]
            hyd_s = sorted(hyd)[:tripN]
            for i in range(tripN):
                triplets.append((acc_s[i], dyn_s[i], hyd_s[i], CLASS2IDX[c]))
        else:
            for k in sorted(common):
                triplets.append((A[k], D[k], H[k], CLASS2IDX[c]))

    print(f"Total triplets: {len(triplets)} (expected ~ 3750)")
    return triplets

TRIPLETS = build_triplets()

# split (stratified by class id)
y = [t[3] for t in TRIPLETS]
train_ids, test_ids = train_test_split(np.arange(len(TRIPLETS)), test_size=0.15, stratify=y, random_state=42)
y_train = [TRIPLETS[i][3] for i in train_ids]
train_ids, val_ids  = train_test_split(train_ids, test_size=0.1765, stratify=y_train, random_state=42)  # 0.1765 of 85% ≈ 15% total
print(f"Splits -> train:{len(train_ids)}, val:{len(val_ids)}, test:{len(test_ids)}")


[WARN] Poor filename overlap for class 'No-leak'. Falling back to sorted alignment.
[WARN] Poor filename overlap for class 'Orifice Leak'. Falling back to sorted alignment.
[WARN] Poor filename overlap for class 'Gasket Leak'. Falling back to sorted alignment.
[WARN] Poor filename overlap for class 'Longitudinal Crack'. Falling back to sorted alignment.
[WARN] Poor filename overlap for class 'Circumferential Crack'. Falling back to sorted alignment.
Total triplets: 1250 (expected ~ 3750)
Splits -> train:874, val:188, test:188


In [3]:
class ToNumpyGray:
    def __call__(self, img: Image.Image):
        if img.mode != "L":
            img = img.convert("L")
        return np.array(img, dtype=np.float32)

class ZScorePerImage:
    def __call__(self, x: np.ndarray):
        m, s = float(x.mean()), float(x.std() + 1e-6)
        return (x - m) / s

class TimeFreqMask:
    """Random time/freq masking (SpecAugment-like) on the 2D map."""
    def __init__(self, time_mask_frac=0.1, freq_mask_frac=0.1, p=0.7):
        self.tfrac = time_mask_frac
        self.ffrac = freq_mask_frac
        self.p = p
    def __call__(self, x: np.ndarray):
        if random.random() < self.p:
            H, W = x.shape
            # time mask
            t = int(W * self.tfrac)
            if t > 0:
                t0 = random.randint(0, max(0, W - t))
                x[:, t0:t0+t] = 0
            # freq mask
            f = int(H * self.ffrac)
            if f > 0:
                f0 = random.randint(0, max(0, H - f))
                x[f0:f0+f, :] = 0
        return x

def make_transform(train=True):
    aug = []
    aug.append(ToNumpyGray())
    if train:
        aug.append(TimeFreqMask(0.1, 0.1, p=0.7))
    aug.append(ZScorePerImage())
    def _tf(img: Image.Image):
        x = img
        for a in aug:
            x = a(x)
        # resize to (IMG_H, IMG_W)
        x = Image.fromarray(((x - x.min()) / (x.max()-x.min()+1e-8) * 255.0).astype(np.uint8))
        x = x.resize((IMG_W, IMG_H), resample=Image.BILINEAR)
        x = np.array(x, dtype=np.float32)
        return x  # (H, W)
    return _tf

train_tf = make_transform(train=True)
eval_tf  = make_transform(train=False)


In [None]:
class CWTSequenceDataset(Dataset):
    def __init__(self, triplets, indices, transform):
        self.triplets = triplets
        self.indices = list(indices)
        self.tf = transform

    def __len__(self):
        return len(self.indices)

    def _load_gray(self, p: Path):
        img = Image.open(p)
        return self.tf(img)  # (H,W) float32

    def __getitem__(self, i):
        idx = self.indices[i]
        p_acc, p_dyn, p_hyd, y = self.triplets[idx]

        a = self._load_gray(p_acc)
        d = self._load_gray(p_dyn)
        h = self._load_gray(p_hyd)

        # stack sensors as channels
        x = np.stack([a, d, h], axis=0)  # (3, H, W)

        # split along time into T_STEPS slices
        slices = []
        for t in range(T_STEPS):
            t0, t1 = t*SLICE_W, (t+1)*SLICE_W
            patch = x[:, :, t0:t1]  # (3, H, slice_w)
            slices.append(patch)
        seq = np.stack(slices, axis=0)  # (T, 3, H, slice_w)

        return torch.from_numpy(seq), torch.tensor(y, dtype=torch.long)

train_ds = CWTSequenceDataset(TRIPLETS, train_ids, train_tf)
val_ds   = CWTSequenceDataset(TRIPLETS, val_ids,   eval_tf)
test_ds  = CWTSequenceDataset(TRIPLETS, test_ids,  eval_tf)

train_loader = DataLoader(train_ds, batch_size=BATCH_SIZE, shuffle=True,  num_workers=4, pin_memory=True)
val_loader   = DataLoader(val_ds,   batch_size=BATCH_SIZE, shuffle=False, num_workers=4, pin_memory=True)
test_loader  = DataLoader(test_ds,  batch_size=BATCH_SIZE, shuffle=False, num_workers=4, pin_memory=True)

xb, yb = next(iter(train_loader))
print("Batch shapes:", xb.shape, yb.shape)  # (B, T, C, H, W) ; (B,)




In [None]:
class ConvBlock(nn.Module):
    def __init__(self, c_in, c_out, k=3, s=1, p=1, pool=True, dropout=0.0):
        super().__init__()
        self.conv = nn.Sequential(
            nn.Conv2d(c_in, c_out, k, s, p, bias=False),
            nn.BatchNorm2d(c_out),
            nn.ReLU(inplace=True),
            nn.Conv2d(c_out, c_out, k, 1, p, bias=False),
            nn.BatchNorm2d(c_out),
            nn.ReLU(inplace=True),
        )
        self.pool = nn.MaxPool2d(2) if pool else nn.Identity()
        self.drop = nn.Dropout2d(dropout)
        # Simple Squeeze-Excite
        self.se_fc1 = nn.Linear(c_out, c_out//4)
        self.se_fc2 = nn.Linear(c_out//4, c_out)

    def forward(self, x):
        x = self.conv(x)
        # SE
        b,c,h,w = x.shape
        s = x.mean(dim=(2,3))                    # (b,c)
        s = F.relu(self.se_fc1(s))
        s = torch.sigmoid(self.se_fc2(s)).view(b,c,1,1)
        x = x * s
        x = self.pool(x)
        x = self.drop(x)
        return x

class CNNEncoder(nn.Module):
    def __init__(self, c_in=3, base=HID_CNN, dropout=DROPOUT):
        super().__init__()
        self.seq = nn.Sequential(
            ConvBlock(c_in,   base,   pool=True,  dropout=dropout/2),
            ConvBlock(base,   base*2, pool=True,  dropout=dropout/2),
            ConvBlock(base*2, base*4, pool=True,  dropout=dropout),
        )
        self.out_channels = base*4

    def forward(self, x):   # x: (B, C, H, W)
        x = self.seq(x)     # (B, C', H', W')
        x = x.mean(dim=(2,3))  # global avg pool → (B, C')
        return x

class CNN_LSTM(nn.Module):
    def __init__(self, n_classes=len(CLASSES), c_in=3, base=HID_CNN, lstm_h=LSTM_HID, dropout=DROPOUT):
        super().__init__()
        self.encoder = CNNEncoder(c_in=c_in, base=base, dropout=dropout)
        self.lstm = nn.LSTM(input_size=self.encoder.out_channels,
                            hidden_size=lstm_h,
                            num_layers=2,
                            batch_first=True,
                            dropout=dropout,
                            bidirectional=True)
        self.head = nn.Sequential(
            nn.LayerNorm(lstm_h*2),
            nn.Dropout(dropout),
            nn.Linear(lstm_h*2, n_classes)
        )

    def forward(self, x):  # x: (B, T, C, H, W)
        B, T, C, H, W = x.shape
        x = x.view(B*T, C, H, W)
        feats = self.encoder(x)          # (B*T, F)
        feats = feats.view(B, T, -1)     # (B, T, F)
        out, _ = self.lstm(feats)        # (B, T, 2*H)
        # use last time step (or pooling over time)
        out = out[:, -1, :]              # (B, 2*H)
        logits = self.head(out)          # (B, n_classes)
        return logits


In [None]:
class LabelSmoothingCE(nn.Module):
    def __init__(self, eps=0.0):
        super().__init__()
        self.eps = eps
    def forward(self, logits, target):
        n = logits.size(-1)
        logp = F.log_softmax(logits, dim=-1)
        with torch.no_grad():
            true = torch.zeros_like(logp).fill_(self.eps / (n - 1))
            true.scatter_(1, target.unsqueeze(1), 1 - self.eps)
        return torch.mean(torch.sum(-true * logp, dim=-1))

def accuracy(logits, y):
    return (logits.argmax(dim=1) == y).float().mean().item()

model = CNN_LSTM().to(device)
criterion = LabelSmoothingCE(eps=LABEL_SMOOTH)
optimizer = torch.optim.AdamW(model.parameters(), lr=LR, weight_decay=WEIGHT_DECAY)
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=EPOCHS)

scaler = torch.cuda.amp.GradScaler(enabled=(device.type=="cuda"))

def run_epoch(loader, train=True):
    model.train(train)
    total_loss, total_acc, n = 0.0, 0.0, 0
    for xb, yb in tqdm(loader, leave=False):
        xb, yb = xb.to(device, non_blocking=True), yb.to(device, non_blocking=True)
        with torch.cuda.amp.autocast(enabled=(device.type=="cuda")), torch.set_grad_enabled(train):
            logits = model(xb)
            loss = criterion(logits, yb)
        if train:
            optimizer.zero_grad(set_to_none=True)
            scaler.scale(loss).backward()
            scaler.step(optimizer)
            scaler.update()
        total_loss += loss.item() * xb.size(0)
        total_acc  += (logits.argmax(1) == yb).float().sum().item()
        n += xb.size(0)
    if (not train):
        return total_loss / n, total_acc / n
    else:
        scheduler.step()
        return total_loss / n, total_acc / n


In [None]:
best_val_acc = 0.0
patience, bad_epochs = 8, 0
ckpt_path = "cnn_lstm_best.pt"

for epoch in range(1, EPOCHS+1):
    tr_loss, tr_acc = run_epoch(train_loader, train=True)
    vl_loss, vl_acc = run_epoch(val_loader,   train=False)
    if vl_acc > best_val_acc:
        best_val_acc = vl_acc
        bad_epochs = 0
        torch.save({"model": model.state_dict(),
                    "cfg": {"IMG_H":IMG_H, "IMG_W":IMG_W, "T_STEPS":T_STEPS, "SLICE_W":SLICE_W,
                            "CLASSES":CLASSES}}, ckpt_path)
    else:
        bad_epochs += 1

    print(f"Epoch {epoch:02d}/{EPOCHS} | "
          f"train loss {tr_loss:.4f} acc {tr_acc*100:.2f}% | "
          f"val loss {vl_loss:.4f} acc {vl_acc*100:.2f}% | "
          f"best val {best_val_acc*100:.2f}%")

    if bad_epochs >= patience:
        print("Early stopping.")
        break

print("Best validation accuracy:", best_val_acc)
