Celda 0 — Config global

In [1]:
# === CELDA 0: CONFIG GLOBAL ===
import os, random, numpy as np, torch

BASE = r"C:\Users\DELL\Desktop\UNI-LEON\DP ULE\comparacion\dataset_gua_crops\cropped_images"
SEED = 42

random.seed(SEED); np.random.seed(SEED)
torch.manual_seed(SEED)
if torch.cuda.is_available(): torch.cuda.manual_seed_all(SEED)

print("BASE:", BASE)
print("Device:", "cuda" if torch.cuda.is_available() else "cpu")


BASE: C:\Users\DELL\Desktop\UNI-LEON\DP ULE\comparacion\dataset_gua_crops\cropped_images
Device: cuda


🔹 Celda 1 — Organizar dataset (raíz → defectuosas/normales + labels.csv)

In [2]:
# === CELDA 1: ORGANIZAR DATASET ===
import os, shutil, csv

IMG_EXTS = {".png", ".jpg", ".jpeg", ".bmp"}
DEF_DIR  = os.path.join(BASE, "defectuosas")
NORM_DIR = os.path.join(BASE, "normales")
os.makedirs(DEF_DIR, exist_ok=True); os.makedirs(NORM_DIR, exist_ok=True)

def find_image(base_dir, basename):
    for ext in IMG_EXTS:
        p = os.path.join(base_dir, basename + ext)
        if os.path.exists(p): return p
    return None

# mover defectuosas usando .json
json_files = [f for f in os.listdir(BASE) if f.lower().endswith(".json")]
for jf in json_files:
    base = os.path.splitext(jf)[0]
    js = os.path.join(BASE, jf)
    im = find_image(BASE, base)
    shutil.move(js, os.path.join(DEF_DIR, jf))
    if im and os.path.exists(im):
        shutil.move(im, os.path.join(DEF_DIR, os.path.basename(im)))

# mover lo demás (imágenes) a normales
for f in list(os.listdir(BASE)):
    p = os.path.join(BASE, f)
    if os.path.isfile(p) and os.path.splitext(f)[1].lower() in IMG_EXTS:
        shutil.move(p, os.path.join(NORM_DIR, f))

# labels.csv
with open(os.path.join(BASE, "labels.csv"), "w", newline="", encoding="utf-8") as f:
    w = csv.writer(f); w.writerow(["filename","label"])
    for fimg in sorted(os.listdir(DEF_DIR)):
        if os.path.splitext(fimg)[1].lower() in IMG_EXTS:
            w.writerow([os.path.join("defectuosas", fimg), 1])
    for fimg in sorted(os.listdir(NORM_DIR)):
        if os.path.splitext(fimg)[1].lower() in IMG_EXTS:
            w.writerow([os.path.join("normales", fimg), 0])

print("✅ Dataset organizado en", BASE)
print(" - defectuosas:", len([x for x in os.listdir(DEF_DIR) if os.path.splitext(x)[1].lower() in IMG_EXTS]))
print(" - normales   :", len([x for x in os.listdir(NORM_DIR) if os.path.splitext(x)[1].lower() in IMG_EXTS]))


✅ Dataset organizado en C:\Users\DELL\Desktop\UNI-LEON\DP ULE\comparacion\dataset_gua_crops\cropped_images
 - defectuosas: 45
 - normales   : 156


Celda 2 — Crear splits (train/val/test)

In [3]:
# === CELDA 2: SPLITS ===
import os, shutil, csv, math, random

SPLITS = os.path.join(BASE, "splits")
for d in ["train","val","test"]:
    os.makedirs(os.path.join(SPLITS, d, "images"), exist_ok=True)

def list_imgs(folder, exts={".png",".jpg",".jpeg",".bmp"}):
    return sorted([f for f in os.listdir(folder) if os.path.splitext(f)[1].lower() in exts])

norm = list_imgs(os.path.join(BASE,"normales"))
defc = list_imgs(os.path.join(BASE,"defectuosas"))
random.shuffle(norm); random.shuffle(defc)

# normales 70/15/15
n = len(norm)
n_tr, n_val = math.floor(0.7*n), math.floor(0.15*n)
norm_train = norm[:n_tr]
norm_val   = norm[n_tr:n_tr+n_val]
norm_test  = norm[n_tr+n_val:]

# defectuosas 40% val / 60% test (ninguna en train)
m = len(defc)
m_val = math.floor(0.4*m)
def_val  = defc[:m_val]
def_test = defc[m_val:]

def cp(files, src, split):
    for fn in files:
        shutil.copy2(os.path.join(src, fn), os.path.join(SPLITS, split, "images", fn))

cp(norm_train, os.path.join(BASE,"normales"), "train")
cp(norm_val,   os.path.join(BASE,"normales"), "val")
cp(norm_test,  os.path.join(BASE,"normales"), "test")
cp(def_val,    os.path.join(BASE,"defectuosas"), "val")
cp(def_test,   os.path.join(BASE,"defectuosas"), "test")

def write_labels(split, normals, defects):
    with open(os.path.join(SPLITS, split, "labels.csv"), "w", newline="", encoding="utf-8") as f:
        w = csv.writer(f); w.writerow(["filename","label"])
        for fn in sorted(normals): w.writerow([os.path.join("images", fn), 0])
        for fn in sorted(defects): w.writerow([os.path.join("images", fn), 1])

write_labels("train", norm_train, [])
write_labels("val",   norm_val,   def_val)
write_labels("test",  norm_test,  def_test)

print("✅ Splits listos en", SPLITS)
print("train: normales=",len(norm_train),", defectuosas=0")
print("val  : normales=",len(norm_val),  ", defectuosas=",len(def_val))
print("test : normales=",len(norm_test), ", defectuosas=",len(def_test))


✅ Splits listos en C:\Users\DELL\Desktop\UNI-LEON\DP ULE\comparacion\dataset_gua_crops\cropped_images\splits
train: normales= 109 , defectuosas=0
val  : normales= 23 , defectuosas= 18
test : normales= 24 , defectuosas= 27


Celda 3 — Autoencoder (entrenar + calibrar umbral en val)

In [4]:
# === CELDA 3 (MEJORADA): AE ENTRENAR + CALIBRAR CON labels.csv ===
import os, csv, json, numpy as np, torch, torch.nn as nn, torch.optim as optim
from torch.utils.data import DataLoader, Dataset
from torchvision import transforms
from sklearn.metrics import roc_auc_score, precision_recall_curve
from PIL import Image

# ----- Config -----
SPLITS_DIR = os.path.join(BASE,"splits")
AE_DIR = os.path.join(SPLITS_DIR, "autoencoder_compare"); os.makedirs(AE_DIR, exist_ok=True)
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
IMG_SIZE = 256
BATCH = 32
EPOCHS = 100
LR = 2e-3
PATIENCE = 10
TOPK_RATIO = 0.02  # 2% de píxeles con más error (mejor para defectos locales)
print("Device:", DEVICE)

# ----- Utils -----
try:
    from pytorch_msssim import ms_ssim
    HAS_MSSSIM = True
except Exception:
    HAS_MSSSIM = False

class AddGaussianNoise(object):
    def __init__(self, std=0.01): self.std=std
    def __call__(self, t):
        if self.std<=0: return t
        n = torch.randn_like(t)*self.std
        t = torch.clamp(t + n, 0.0, 1.0)
        return t

class CSVDataset(Dataset):
    def __init__(self, splits_dir, split, transform=None, only_normals=False):
        self.root = os.path.join(splits_dir, split)
        self.items = []
        with open(os.path.join(self.root, "labels.csv"), newline="", encoding="utf-8") as f:
            rdr = csv.DictReader(f)
            for r in rdr:
                rel = r["filename"]; lbl = int(r["label"])
                if only_normals and lbl != 0: 
                    continue
                self.items.append((rel, lbl))
        self.transform = transform
    def __len__(self): return len(self.items)
    def __getitem__(self, idx):
        rel, lbl = self.items[idx]
        p = os.path.join(self.root, rel)
        img = Image.open(p).convert("RGB")  # mantenemos 3 canales
        if self.transform: img = self.transform(img)
        return img, lbl

# ----- Transforms -----
tf_train = transforms.Compose([
    transforms.Resize((IMG_SIZE, IMG_SIZE)),
    transforms.ColorJitter(0.1,0.1,0.1,0.05),
    transforms.RandomAffine(degrees=3, translate=(0.02,0.02), scale=(0.98,1.02)),
    transforms.ToTensor(),
    AddGaussianNoise(0.01),
])
tf_eval = transforms.Compose([
    transforms.Resize((IMG_SIZE, IMG_SIZE)),
    transforms.ToTensor()
])

train_set = CSVDataset(SPLITS_DIR, "train", transform=tf_train, only_normals=True)
val_set   = CSVDataset(SPLITS_DIR, "val",   transform=tf_eval,   only_normals=False)
train_loader = DataLoader(train_set, batch_size=BATCH, shuffle=True,  num_workers=0)
val_loader   = DataLoader(val_set,   batch_size=BATCH, shuffle=False, num_workers=0)
print(f"Train imgs (solo normales): {len(train_set)} | Val imgs (mixto): {len(val_set)}")

# ----- Modelo -----
class ConvAE(nn.Module):
    def __init__(self):
        super().__init__()
        self.enc = nn.Sequential(
            nn.Conv2d(3, 32, 3, 2, 1), nn.ReLU(inplace=True),
            nn.Conv2d(32,64, 3, 2, 1), nn.ReLU(inplace=True),
            nn.Conv2d(64,128,3, 2, 1), nn.ReLU(inplace=True),
        )
        self.dec = nn.Sequential(
            nn.ConvTranspose2d(128,64, 3,2,1,1), nn.ReLU(inplace=True),
            nn.ConvTranspose2d(64, 32, 3,2,1,1), nn.ReLU(inplace=True),
            nn.ConvTranspose2d(32, 3,  3,2,1,1), nn.Sigmoid(),
        )
    def forward(self,x): return self.dec(self.enc(x))

def ae_loss(x, xhat):
    mse = nn.functional.mse_loss(xhat, x)
    if HAS_MSSSIM:
        ssim_loss = 1 - ms_ssim(xhat, x, data_range=1.0)
        return 0.7*mse + 0.3*ssim_loss
    return mse

def scores_from_batch(x, xhat, topk_ratio=TOPK_RATIO):
    # error por píxel y Top-K por imagen
    err_map = ((x - xhat)**2).mean(1)      # (B,H,W)
    flat = err_map.flatten(1)               # (B, H*W)
    k = max(1, int(topk_ratio * flat.shape[1]))
    vals, _ = torch.topk(flat, k, dim=1)
    return vals.mean(1)                     # (B,)

model = ConvAE().to(DEVICE)
opt = optim.AdamW(model.parameters(), lr=LR, weight_decay=1e-4)
sched = torch.optim.lr_scheduler.CosineAnnealingLR(opt, T_max=EPOCHS, eta_min=1e-5)
scaler = torch.cuda.amp.GradScaler(enabled=(DEVICE=='cuda'))

best_f1, best_thr = 0.0, 0.0
bad_epochs = 0

for ep in range(1, EPOCHS+1):
    # --- train ---
    model.train(); run_loss=0.0
    for x,_ in train_loader:
        x = x.to(DEVICE)
        opt.zero_grad(set_to_none=True)
        with torch.cuda.amp.autocast(enabled=(DEVICE=='cuda')):
            xhat = model(x)
            loss = ae_loss(x, xhat)
        scaler.scale(loss).backward()
        scaler.unscale_(opt)
        torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
        scaler.step(opt); scaler.update()
        run_loss += loss.item() * x.size(0)
    run_loss /= max(1, len(train_loader.dataset))
    sched.step()

    # --- val: scores + umbral óptimo por F1 ---
    model.eval(); scores=[]; ytrue=[]
    with torch.no_grad(), torch.cuda.amp.autocast(enabled=(DEVICE=='cuda')):
        for x,y in val_loader:
            x = x.to(DEVICE)
            xhat = model(x)
            s = scores_from_batch(x, xhat).detach().cpu().numpy()
            scores.extend(s.tolist()); ytrue.extend(list(y.numpy()))
    scores = np.array(scores); ytrue = np.array(ytrue)

    auc = roc_auc_score(ytrue, scores) if len(np.unique(ytrue))>1 else float("nan")
    prec, rec, thr = precision_recall_curve(ytrue, scores)
    f1 = (2*prec*rec)/(prec+rec+1e-8)
    idx = int(np.argmax(f1))
    f1_best = float(f1[idx])
    thr_best = float(thr[idx]) if len(thr)>0 else 0.0

    improved = f1_best > best_f1
    if improved:
        best_f1, best_thr = f1_best, thr_best
        torch.save(model.state_dict(), os.path.join(AE_DIR, "ae_best.pt"))
        with open(os.path.join(AE_DIR, "config.json"), "w") as f:
            json.dump({"threshold": best_thr, "topk_ratio": TOPK_RATIO}, f)
        bad_epochs = 0
    else:
        bad_epochs += 1

    print(f"[{ep:03d}] loss={run_loss:.5f} | val_auc={auc:.4f} | bestF1={f1_best:.4f} | thr*={thr_best:.6f} "
          f"| lr={sched.get_last_lr()[0]:.5f} | {'*' if improved else ''}")

    if bad_epochs >= PATIENCE:
        print("Early stopping.")
        break

print("✅ AE guardado en:", AE_DIR, "| bestF1=", round(best_f1,4), "thr=", round(best_thr,6))



Device: cuda
Train imgs (solo normales): 109 | Val imgs (mixto): 41


  scaler = torch.cuda.amp.GradScaler(enabled=(DEVICE=='cuda'))
  with torch.cuda.amp.autocast(enabled=(DEVICE=='cuda')):
  with torch.no_grad(), torch.cuda.amp.autocast(enabled=(DEVICE=='cuda')):


[001] loss=0.13966 | val_auc=0.3792 | bestF1=0.6102 | thr*=0.268923 | lr=0.00200 | *


  with torch.cuda.amp.autocast(enabled=(DEVICE=='cuda')):
  with torch.no_grad(), torch.cuda.amp.autocast(enabled=(DEVICE=='cuda')):


[002] loss=0.10023 | val_auc=0.6473 | bestF1=0.7059 | thr*=0.277596 | lr=0.00200 | *


  with torch.cuda.amp.autocast(enabled=(DEVICE=='cuda')):
  with torch.no_grad(), torch.cuda.amp.autocast(enabled=(DEVICE=='cuda')):


[003] loss=0.06759 | val_auc=0.6232 | bestF1=0.6800 | thr*=0.199250 | lr=0.00200 | 


  with torch.cuda.amp.autocast(enabled=(DEVICE=='cuda')):
  with torch.no_grad(), torch.cuda.amp.autocast(enabled=(DEVICE=='cuda')):


[004] loss=0.04880 | val_auc=0.6377 | bestF1=0.7059 | thr*=0.168432 | lr=0.00199 | 


  with torch.cuda.amp.autocast(enabled=(DEVICE=='cuda')):
  with torch.no_grad(), torch.cuda.amp.autocast(enabled=(DEVICE=='cuda')):


[005] loss=0.03800 | val_auc=0.6280 | bestF1=0.6923 | thr*=0.150034 | lr=0.00199 | 


  with torch.cuda.amp.autocast(enabled=(DEVICE=='cuda')):
  with torch.no_grad(), torch.cuda.amp.autocast(enabled=(DEVICE=='cuda')):


[006] loss=0.02462 | val_auc=0.7512 | bestF1=0.7442 | thr*=0.128503 | lr=0.00198 | *


  with torch.cuda.amp.autocast(enabled=(DEVICE=='cuda')):
  with torch.no_grad(), torch.cuda.amp.autocast(enabled=(DEVICE=='cuda')):


[007] loss=0.01748 | val_auc=0.6618 | bestF1=0.6800 | thr*=0.109183 | lr=0.00198 | 


  with torch.cuda.amp.autocast(enabled=(DEVICE=='cuda')):
  with torch.no_grad(), torch.cuda.amp.autocast(enabled=(DEVICE=='cuda')):


[008] loss=0.01546 | val_auc=0.6232 | bestF1=0.6667 | thr*=0.108263 | lr=0.00197 | 


  with torch.cuda.amp.autocast(enabled=(DEVICE=='cuda')):
  with torch.no_grad(), torch.cuda.amp.autocast(enabled=(DEVICE=='cuda')):


[009] loss=0.01449 | val_auc=0.6449 | bestF1=0.6800 | thr*=0.102216 | lr=0.00196 | 


  with torch.cuda.amp.autocast(enabled=(DEVICE=='cuda')):
  with torch.no_grad(), torch.cuda.amp.autocast(enabled=(DEVICE=='cuda')):


[010] loss=0.01362 | val_auc=0.6401 | bestF1=0.6667 | thr*=0.108426 | lr=0.00195 | 


  with torch.cuda.amp.autocast(enabled=(DEVICE=='cuda')):
  with torch.no_grad(), torch.cuda.amp.autocast(enabled=(DEVICE=='cuda')):


[011] loss=0.01388 | val_auc=0.6498 | bestF1=0.6923 | thr*=0.103747 | lr=0.00194 | 


  with torch.cuda.amp.autocast(enabled=(DEVICE=='cuda')):
  with torch.no_grad(), torch.cuda.amp.autocast(enabled=(DEVICE=='cuda')):


[012] loss=0.01348 | val_auc=0.6401 | bestF1=0.6923 | thr*=0.102019 | lr=0.00193 | 


  with torch.cuda.amp.autocast(enabled=(DEVICE=='cuda')):
  with torch.no_grad(), torch.cuda.amp.autocast(enabled=(DEVICE=='cuda')):


[013] loss=0.01247 | val_auc=0.6353 | bestF1=0.6923 | thr*=0.099348 | lr=0.00192 | 


  with torch.cuda.amp.autocast(enabled=(DEVICE=='cuda')):
  with torch.no_grad(), torch.cuda.amp.autocast(enabled=(DEVICE=='cuda')):


[014] loss=0.01175 | val_auc=0.6377 | bestF1=0.6923 | thr*=0.097743 | lr=0.00191 | 


  with torch.cuda.amp.autocast(enabled=(DEVICE=='cuda')):
  with torch.no_grad(), torch.cuda.amp.autocast(enabled=(DEVICE=='cuda')):


[015] loss=0.01089 | val_auc=0.6667 | bestF1=0.7059 | thr*=0.096219 | lr=0.00189 | 


  with torch.cuda.amp.autocast(enabled=(DEVICE=='cuda')):
  with torch.no_grad(), torch.cuda.amp.autocast(enabled=(DEVICE=='cuda')):


[016] loss=0.01012 | val_auc=0.6812 | bestF1=0.7200 | thr*=0.090922 | lr=0.00188 | 
Early stopping.
✅ AE guardado en: C:\Users\DELL\Desktop\UNI-LEON\DP ULE\comparacion\dataset_gua_crops\cropped_images\splits\autoencoder_compare | bestF1= 0.7442 thr= 0.128503


🔹 Celda 4 — PatchCore (memory bank + umbral)

In [5]:
# === CELDA 4: PATCHCORE - BANK + UMBRAL ===
import os, json, cv2, numpy as np, torch, torchvision.models as models
from sklearn.neighbors import NearestNeighbors
from tqdm import tqdm

SPLITS_DIR = os.path.join(BASE,"splits")
PATCH_DIR = os.path.join(BASE,"patchcore"); os.makedirs(PATCH_DIR, exist_ok=True)
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
IMG_SIZE = 256

# extractor
backbone = models.resnet18(weights=models.ResNet18_Weights.IMAGENET1K_V1).to(DEVICE); backbone.eval()
class FeatHook:
    def __init__(self, m): self.h=m.register_forward_hook(self.hook); self.feat=None
    def hook(self, m, i, o): self.feat=o.detach()
    def close(self): self.h.remove()
h2 = FeatHook(dict(backbone.named_modules())["layer2"])
h3 = FeatHook(dict(backbone.named_modules())["layer3"])

def load_img(path, size=IMG_SIZE):
    im = cv2.imread(path, cv2.IMREAD_GRAYSCALE)
    im = cv2.resize(im,(size,size), interpolation=cv2.INTER_AREA)
    x = (im.astype(np.float32)/255.0)
    x = np.stack([x,x,x],axis=0)
    return torch.from_numpy(x).unsqueeze(0).to(DEVICE)

def extract_concat_features(pth):
    x = load_img(pth)
    with torch.no_grad(): _ = backbone(x)
    f2, f3 = h2.feat, h3.feat
    f3u = torch.nn.functional.interpolate(f3, size=f2.shape[-2:], mode="bilinear", align_corners=False)
    return torch.cat([f2,f3u], dim=1).squeeze(0)  # (C,H,W)

def patchify(F): C,H,W=F.shape; return F.permute(1,2,0).reshape(H*W,C).contiguous()
def read_csv(csvp):
    rows=[]; 
    with open(csvp,"r",encoding="utf-8") as f:
        next(f)
        for line in f:
            rel,lbl=line.strip().split(","); rows.append((rel,int(lbl)))
    return rows

# 4.1 memory bank (train solo normales)
train_csv = os.path.join(SPLITS_DIR,"train","labels.csv")
items = read_csv(train_csv)
train_imgs = [os.path.join(SPLITS_DIR,"train", rel) for rel,lbl in items if lbl==0]

patches_all=[]
print("Extrayendo parches de TRAIN (normales)...")
for p in tqdm(train_imgs):
    F = extract_concat_features(p)
    P = patchify(F)
    P = torch.nn.functional.normalize(P, p=2, dim=1)
    patches_all.append(P.cpu().numpy())
bank_full = np.vstack(patches_all).astype(np.float32)
print("Banco completo:", bank_full.shape)

# coreset
def random_projection(X, out_dim=128, seed=42):
    rng=np.random.default_rng(seed)
    R=rng.standard_normal((X.shape[1], out_dim)).astype(np.float32)
    Z=X@R; Z/= (np.linalg.norm(Z,axis=1,keepdims=True)+1e-8)
    return Z
def kcenter_greedy(Z, m, seed=42):
    rng=np.random.default_rng(seed); N=Z.shape[0]
    start=int(rng.integers(0,N)); centers=[start]
    d=np.linalg.norm(Z-Z[start],axis=1)
    for _ in range(1,m):
        i=int(np.argmax(d)); centers.append(i)
        d=np.minimum(d, np.linalg.norm(Z-Z[i],axis=1))
    return np.array(centers, dtype=np.int64)

CORESET_RATIO = 0.05
m = max(1, int(CORESET_RATIO * bank_full.shape[0]))
Z = random_projection(bank_full, 128)
idx = kcenter_greedy(Z, m)
bank = bank_full[idx]
np.savez(os.path.join(PATCH_DIR,"memory_bank_core.npz"),
         bank=bank, img_size=np.array([IMG_SIZE], dtype=np.int32))
print("✅ Memory bank:", bank.shape)

# 4.2 calibrar umbral en VAL
val_csv = os.path.join(SPLITS_DIR,"val","labels.csv")
val_items = read_csv(val_csv)

knn = NearestNeighbors(n_neighbors=3).fit(bank)

def anomaly_score(pth):
    F = extract_concat_features(pth)
    Hf,Wf = F.shape[-2:]
    P = patchify(F)
    P = torch.nn.functional.normalize(P, p=2, dim=1).cpu().numpy()
    dists,_ = knn.kneighbors(P, return_distance=True)
    patch_scores = dists.mean(axis=1).reshape(Hf,Wf).astype(np.float32)
    return float(patch_scores.max())

y_true, s = [], []
for rel,lbl in tqdm(val_items):
    p = os.path.join(SPLITS_DIR,"val", rel)
    y_true.append(lbl); s.append(anomaly_score(p))
y_true = np.array(y_true); s = np.array(s)

from sklearn.metrics import roc_auc_score, precision_recall_curve
auc = roc_auc_score(y_true, s)
prec,rec,thr = precision_recall_curve(y_true, s)
f1 = (2*prec*rec)/(prec+rec+1e-8)
thr_p95 = float(np.percentile(s[y_true==0], 95))
print(f"VAL -> ROC-AUC={auc:.4f} | thr(p95 normals)={thr_p95:.6f}")

with open(os.path.join(PATCH_DIR,"config.json"),"w") as f:
    json.dump({"threshold": thr_p95}, f)
print("✅ Umbral guardado")


Extrayendo parches de TRAIN (normales)...


100%|██████████| 109/109 [00:01<00:00, 61.10it/s]


Banco completo: (111616, 384)
✅ Memory bank: (5580, 384)


100%|██████████| 41/41 [00:06<00:00,  6.51it/s]

VAL -> ROC-AUC=0.8865 | thr(p95 normals)=0.356087
✅ Umbral guardado





🔹 Celda 5 — Evaluación AE en test

In [6]:
# === CELDA 5 (MEJORADA): EVAL AE EN TEST CON labels.csv + Top-K ===
import os, csv, json, numpy as np, torch
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from sklearn.metrics import roc_auc_score, precision_recall_fscore_support, confusion_matrix
from PIL import Image

SPLITS_DIR = os.path.join(BASE,"splits")
AE_DIR = os.path.join(SPLITS_DIR,"autoencoder_compare")
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
IMG_SIZE = 256

class CSVDataset(Dataset):
    def __init__(self, splits_dir, split, transform=None):
        self.root = os.path.join(splits_dir, split)
        self.items = []
        with open(os.path.join(self.root, "labels.csv"), newline="", encoding="utf-8") as f:
            rdr = csv.DictReader(f)
            for r in rdr:
                self.items.append((r["filename"], int(r["label"])))
        self.transform = transform
    def __len__(self): return len(self.items)
    def __getitem__(self, idx):
        rel, lbl = self.items[idx]
        p = os.path.join(self.root, rel)
        img = Image.open(p).convert("RGB")
        if self.transform: img = self.transform(img)
        return img, lbl

tf = transforms.Compose([transforms.Resize((IMG_SIZE,IMG_SIZE)), transforms.ToTensor()])
test_set  = CSVDataset(SPLITS_DIR, "test", transform=tf)
test_loader = DataLoader(test_set, batch_size=1, shuffle=False, num_workers=0)

# misma arquitectura
class ConvAE(torch.nn.Module):
    def __init__(self):
        super().__init__()
        self.enc = torch.nn.Sequential(
            torch.nn.Conv2d(3, 32, 3, 2, 1), torch.nn.ReLU(inplace=True),
            torch.nn.Conv2d(32,64, 3, 2, 1), torch.nn.ReLU(inplace=True),
            torch.nn.Conv2d(64,128,3, 2, 1), torch.nn.ReLU(inplace=True),
        )
        self.dec = torch.nn.Sequential(
            torch.nn.ConvTranspose2d(128,64, 3,2,1,1), torch.nn.ReLU(inplace=True),
            torch.nn.ConvTranspose2d(64, 32, 3,2,1,1), torch.nn.ReLU(inplace=True),
            torch.nn.ConvTranspose2d(32, 3,  3,2,1,1), torch.nn.Sigmoid(),
        )
    def forward(self,x): return self.dec(self.enc(x))

def scores_from_batch(x, xhat, topk_ratio):
    err_map = ((x - xhat)**2).mean(1)      # (B,H,W)
    flat = err_map.flatten(1)
    k = max(1, int(topk_ratio * flat.shape[1]))
    vals, _ = torch.topk(flat, k, dim=1)
    return vals.mean(1)

# cargar modelo + umbral + topk_ratio
ae = ConvAE().to(DEVICE)
ae.load_state_dict(torch.load(os.path.join(AE_DIR,"ae_best.pt"), map_location=DEVICE))
ae.eval()
cfg = json.load(open(os.path.join(AE_DIR,"config.json")))
thr_ae = cfg["threshold"]; topk_ratio = cfg.get("topk_ratio", 0.02)

y_true, scores = [], []
with torch.no_grad(), torch.cuda.amp.autocast(enabled=(DEVICE=='cuda')):
    for x,y in test_loader:
        x=x.to(DEVICE)
        xhat=ae(x)
        s = scores_from_batch(x, xhat, topk_ratio).item()
        scores.append(s); y_true.append(int(y.item()))

y_true = np.array(y_true); scores=np.array(scores)
y_pred = (scores>thr_ae).astype(int)

auc = roc_auc_score(y_true, scores) if len(np.unique(y_true))>1 else float("nan")
prec, rec, f1, _ = precision_recall_fscore_support(y_true, y_pred, average="binary", zero_division=0)
cm = confusion_matrix(y_true, y_pred)

print("=== AE / TEST (Top-K) ===")
print("ROC-AUC:", round(auc,4))
print("Precision:", round(prec,4), "Recall:", round(rec,4), "F1:", round(f1,4))
print("CM [[TN FP],[FN TP]]:\n", cm)



  ae.load_state_dict(torch.load(os.path.join(AE_DIR,"ae_best.pt"), map_location=DEVICE))
  with torch.no_grad(), torch.cuda.amp.autocast(enabled=(DEVICE=='cuda')):


=== AE / TEST (Top-K) ===
ROC-AUC: 0.733
Precision: 0.5833 Recall: 0.7778 F1: 0.6667
CM [[TN FP],[FN TP]]:
 [[ 9 15]
 [ 6 21]]


🔹 Celda 6 — Evaluación PatchCore en test + comparación

In [7]:
# === CELDA 6: EVAL PATCHCORE EN TEST + COMPARACIÓN ===
import os, json, cv2, numpy as np, torch, torchvision.models as models
from sklearn.neighbors import NearestNeighbors
from sklearn.metrics import roc_auc_score, precision_recall_fscore_support, confusion_matrix

SPLITS_DIR = os.path.join(BASE,"splits")
PATCH_DIR = os.path.join(BASE,"patchcore")
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
IMG_SIZE = 256

# cargar bank + umbral
mb = np.load(os.path.join(PATCH_DIR,"memory_bank_core.npz"), allow_pickle=True)
bank = mb["bank"]
thr_pc = json.load(open(os.path.join(PATCH_DIR,"config.json")))["threshold"]
knn = NearestNeighbors(n_neighbors=3).fit(bank)

# extractor
backbone = models.resnet18(weights=models.ResNet18_Weights.IMAGENET1K_V1).to(DEVICE); backbone.eval()
class FeatHook:
    def __init__(self,m): self.h=m.register_forward_hook(self.hook); self.feat=None
    def hook(self,m,i,o): self.feat=o.detach()
    def close(self): self.h.remove()
h2=FeatHook(dict(backbone.named_modules())["layer2"])
h3=FeatHook(dict(backbone.named_modules())["layer3"])

def load_img(path, size=IMG_SIZE):
    im=cv2.imread(path, cv2.IMREAD_GRAYSCALE); im=cv2.resize(im,(size,size), interpolation=cv2.INTER_AREA)
    x=(im.astype(np.float32)/255.0); x=np.stack([x,x,x],axis=0)
    return torch.from_numpy(x).unsqueeze(0).to(DEVICE)
def extract_concat_features(pth):
    x=load_img(pth)
    with torch.no_grad(): _=backbone(x)
    f2,f3=h2.feat,h3.feat
    f3u=torch.nn.functional.interpolate(f3, size=f2.shape[-2:], mode="bilinear", align_corners=False)
    return torch.cat([f2,f3u], dim=1).squeeze(0)
def patchify(F): C,H,W=F.shape; return F.permute(1,2,0).reshape(H*W,C).contiguous()

# leer test labels
items=[]
with open(os.path.join(SPLITS_DIR,"test","labels.csv"),"r",encoding="utf-8") as f:
    next(f)
    for line in f:
        rel,lbl=line.strip().split(","); items.append((rel,int(lbl)))

y_true_pc, scores_pc = [], []
for rel,lbl in items:
    p = os.path.join(SPLITS_DIR,"test", rel)
    F = extract_concat_features(p)
    Hf,Wf = F.shape[-2:]
    P = patchify(F)
    P = torch.nn.functional.normalize(P, p=2, dim=1).cpu().numpy()
    dists,_ = knn.kneighbors(P, return_distance=True)
    score = float(dists.mean(axis=1).reshape(Hf,Wf).max())
    y_true_pc.append(lbl); scores_pc.append(score)

y_true_pc = np.array(y_true_pc); scores_pc=np.array(scores_pc)
y_pred_pc = (scores_pc>thr_pc).astype(int)

auc_pc = roc_auc_score(y_true_pc, scores_pc)
prec_pc, rec_pc, f1_pc, _ = precision_recall_fscore_support(y_true_pc, y_pred_pc, average="binary", zero_division=0)
cm_pc = confusion_matrix(y_true_pc, y_pred_pc)

print("=== PATCHCORE / TEST ===")
print("ROC-AUC:", round(auc_pc,4))
print("Precision:", round(prec_pc,4), "Recall:", round(rec_pc,4), "F1:", round(f1_pc,4))
print("CM [[TN FP],[FN TP]]:\n", cm_pc)

# comparación rápida
def fmt(x): 
    return "nan" if (isinstance(x,float) and (x!=x)) else f"{x:.4f}"
print("\n=== COMPARACIÓN (TEST) ===")
print(f"{'Modelo':<12}  AUC     Prec    Recall  F1")
print(f"{'Autoencoder':<12}  {fmt(roc_auc_score(y_true, scores) if len(np.unique(y_true))>1 else float('nan'))}  "
      f"{fmt(precision_recall_fscore_support(y_true, (scores>thr_ae).astype(int), average='binary', zero_division=0)[0])}  "
      f"{fmt(precision_recall_fscore_support(y_true, (scores>thr_ae).astype(int), average='binary', zero_division=0)[1])}  "
      f"{fmt(precision_recall_fscore_support(y_true, (scores>thr_ae).astype(int), average='binary', zero_division=0)[2])}")
print(f"{'PatchCore':<12}  {fmt(auc_pc)}  {fmt(prec_pc)}  {fmt(rec_pc)}  {fmt(f1_pc)}")


=== PATCHCORE / TEST ===
ROC-AUC: 0.9506
Precision: 0.96 Recall: 0.8889 F1: 0.9231
CM [[TN FP],[FN TP]]:
 [[23  1]
 [ 3 24]]

=== COMPARACIÓN (TEST) ===
Modelo        AUC     Prec    Recall  F1
Autoencoder   0.7330  0.5833  0.7778  0.6667
PatchCore     0.9506  0.9600  0.8889  0.9231


🔹 Celda 7 — (Opcional) Visuales PatchCore (overlay/heat/mask)

In [8]:
# === CELDA 7: VISUALIZACIÓN PATCHCORE (opcional) ===
import cv2, json, numpy as np, os

viz_dir = os.path.join(PATCH_DIR, "viz"); os.makedirs(viz_dir, exist_ok=True)

def score_map(pth):
    F = extract_concat_features(pth)
    Hf,Wf = F.shape[-2:]
    P = patchify(F)
    P = torch.nn.functional.normalize(P, p=2, dim=1).cpu().numpy()
    dists,_ = knn.kneighbors(P, return_distance=True)
    return dists.mean(axis=1).reshape(Hf,Wf).astype(np.float32)

def heat_overlay(img_path, patch_scores, percent=98):
    raw = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
    h = cv2.resize(patch_scores, (raw.shape[1], raw.shape[0]), interpolation=cv2.INTER_CUBIC)
    h_norm = (h - h.min())/(h.max()-h.min()+1e-8)
    h_u8 = (h_norm*255).astype(np.uint8)
    heat = cv2.applyColorMap(h_u8, cv2.COLORMAP_JET)
    overlay = cv2.addWeighted(cv2.cvtColor(raw, cv2.COLOR_GRAY2BGR), 0.6, heat, 0.4, 0)

    t = np.percentile(h_u8, percent)
    _, mask = cv2.threshold(h_u8, t, 255, cv2.THRESH_BINARY)
    mask = cv2.morphologyEx(mask, cv2.MORPH_OPEN, np.ones((3,3),np.uint8),1)
    mask = cv2.morphologyEx(mask, cv2.MORPH_CLOSE, np.ones((3,3),np.uint8),1)
    cnts,_ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    for c in cnts:
        if cv2.contourArea(c) < 25: continue
        cv2.polylines(overlay, [c], True, (0,255,0), 2)
    return overlay, heat, mask

# genera 8 visuales
items = []
with open(os.path.join(SPLITS_DIR,"test","labels.csv"),"r",encoding="utf-8") as f:
    next(f)
    for line in f:
        rel,lbl=line.strip().split(","); items.append((rel,int(lbl)))
for rel,_ in items[:8]:
    p = os.path.join(SPLITS_DIR,"test", rel)
    sm = score_map(p)
    ov, heat, mask = heat_overlay(p, sm, percent=98)
    base = os.path.splitext(os.path.basename(p))[0]
    cv2.imwrite(os.path.join(viz_dir, f"{base}_overlay.png"), ov)
    cv2.imwrite(os.path.join(viz_dir, f"{base}_heat.png"), heat)
    cv2.imwrite(os.path.join(viz_dir, f"{base}_mask.png"), mask)

print("✅ Visuales guardadas en:", viz_dir)


✅ Visuales guardadas en: C:\Users\DELL\Desktop\UNI-LEON\DP ULE\comparacion\dataset_gua_crops\cropped_images\patchcore\viz
