In [1]:
import os, time, math, random
from pathlib import Path
import numpy as np
import pandas as pd
import torch
os.environ["CUDA_VISIBLE_DEVICES"] = "6" 
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, random_split, Subset
import torchvision.transforms as T
from torchvision.datasets import ImageFolder
import timm
from tqdm import tqdm


from PIL import ImageFile, Image
ImageFile.LOAD_TRUNCATED_IMAGES = True
Image.MAX_IMAGE_PIXELS = None  


DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
print("Device:", DEVICE)
SEED = 42
if DEVICE == "cuda":
    print("GPU name:", torch.cuda.get_device_name(0))
    print("Total GPU mem (GB):", torch.cuda.get_device_properties(0).total_memory / (1024**3))

random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
if DEVICE == "cuda":
    torch.cuda.manual_seed_all(SEED)
torch.backends.cudnn.benchmark = True

print("timm version:", timm.__version__)
print("PyTorch:", torch.__version__)

  from .autonotebook import tqdm as notebook_tqdm


Device: cuda
GPU name: Tesla V100-SXM2-32GB
Total GPU mem (GB): 31.7325439453125
timm version: 1.0.21
PyTorch: 2.6.0+cu124


In [2]:
ImageFile.LOAD_TRUNCATED_IMAGES = True
Image.MAX_IMAGE_PIXELS = None

# ---- Config ----
DATA_ROOT = Path("/home/23ucc611/Mini/data/BananaLSD")        # <-- set to your dataset root (unzipped)
MODEL_NAME = "densenet201"
IMG_SIZE = 224

BATCH_TRAIN = 64
BATCH_EVAL  = 128
NUM_WORKERS = 8
PIN_MEMORY  = True
SEED = 42

# Training hyperparams
EPOCHS = 130
BASE_LR = 0.05          # base for batch=256; we scale with batch
BATCH_LR_REF = 256.0
MOMENTUM = 0.9
WEIGHT_DECAY = 1e-4
WARMUP_EPOCHS = 3
MIN_LR = 1e-6

# Regularization / goodies
USE_MIXUP = True
MIXUP_ALPHA = 0.8
LABEL_SMOOTHING = 0.0    # not used if mixup on
EMA_DECAY = 0.9999
USE_AMP = True
PRINT_FREQ = 50

# Selection fraction for low-confidence images (20–25% as you asked)
SELECT_PCT = 0.225      # 22.5% by default; set to 0.20–0.25

# Paths for checkpoints and outputs
OUT_DIR = Path("./bananalsd_outputs")
OUT_DIR.mkdir(parents=True, exist_ok=True)
CKPT_DIR = OUT_DIR / "checkpoints"
CKPT_DIR.mkdir(parents=True, exist_ok=True)
BEST_CKPT = CKPT_DIR / "best_densenet201.pth"
BEST_M2_CKPT = CKPT_DIR / "best_densenet201_retrained.pth"

In [3]:

def find_image_root(base: Path):
    # Prefer "original" if present (we'll augment on-the-fly)
    if (base / "AugmentedSet").exists():
        return base / "AugmentedSet"
    # else detect a folder that holds class subfolders
    for child in base.iterdir():
        if child.is_dir():
            subdirs = [d for d in child.iterdir() if d.is_dir()]
            if len(subdirs) >= 2:
                return child
    return base

image_root = find_image_root(DATA_ROOT)
print("Using image root:", image_root)

class_counts = {}
for cls_dir in sorted([d for d in image_root.iterdir() if d.is_dir()]):
    cnt = len(list(cls_dir.rglob("*.*")))
    class_counts[cls_dir.name] = cnt
    print(f"{cls_dir.name:20s}: {cnt}")
print("Total images:", sum(class_counts.values()))

Using image root: /home/23ucc611/Mini/data/BananaLSD/AugmentedSet
cordana             : 400
healthy             : 400
pestalotiopsis      : 400
sigatoka            : 400
Total images: 1600


In [4]:

IMAGENET_MEAN = (0.485, 0.456, 0.406)
IMAGENET_STD  = (0.229, 0.224, 0.225)

train_tf = T.Compose([
    T.RandomResizedCrop(IMG_SIZE, scale=(0.8, 1.0)),
    T.RandomHorizontalFlip(p=0.5),
    # T.RandAugment(num_ops=2, magnitude=9),  # optional
    T.ToTensor(),
    T.Normalize(IMAGENET_MEAN, IMAGENET_STD),
])

val_tf = T.Compose([
    T.Resize((IMG_SIZE, IMG_SIZE)),
    T.ToTensor(),
    T.Normalize(IMAGENET_MEAN, IMAGENET_STD),
])

# Build an index-only dataset to get classes & deterministic indices
index_ds = ImageFolder(root=str(image_root), transform=None)
NUM_CLASSES = len(index_ds.classes)
print("Classes:", index_ds.classes)
print("NUM_CLASSES:", NUM_CLASSES)

# Deterministic split indices (90/10)
val_ratio = 0.20
g = torch.Generator().manual_seed(SEED)
perm = torch.randperm(len(index_ds), generator=g)
val_size = max(1, int(math.ceil(len(index_ds) * val_ratio)))
train_idx = perm[:-val_size].tolist()
val_idx   = perm[-val_size:].tolist()

# Build two base datasets with different transforms
train_base = ImageFolder(root=str(image_root), transform=train_tf)
val_base   = ImageFolder(root=str(image_root), transform=val_tf)

# Subsets using the SAME indices (avoids label mix-ups)
train_ds = Subset(train_base, train_idx)
val_ds   = Subset(val_base,   val_idx)

train_loader = DataLoader(train_ds, batch_size=BATCH_TRAIN, shuffle=True,
                          num_workers=NUM_WORKERS, pin_memory=PIN_MEMORY, drop_last=True)
val_loader   = DataLoader(val_ds, batch_size=BATCH_TRAIN, shuffle=False,
                          num_workers=NUM_WORKERS, pin_memory=PIN_MEMORY)

print("Train size:", len(train_ds), " Val size:", len(val_ds))
print("Batches -> Train:", len(train_loader), " Val:", len(val_loader))

Classes: ['cordana', 'healthy', 'pestalotiopsis', 'sigatoka']
NUM_CLASSES: 4
Train size: 1280  Val size: 320
Batches -> Train: 20  Val: 5


In [5]:
# Cell 4 — model (DenseNet-201), sanity forward
model = timm.create_model(MODEL_NAME, pretrained=True, num_classes=NUM_CLASSES).to(DEVICE)

total_params = sum(p.numel() for p in model.parameters())
trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
print(f"{MODEL_NAME}: params={total_params:,}, trainable={trainable_params:,}")

model.eval()
with torch.no_grad():
    dummy = torch.zeros((1,3,IMG_SIZE,IMG_SIZE), device=DEVICE)
    out = model(dummy)
print("Sanity forward OK. Logits shape:", tuple(out.shape))

densenet201: params=18,100,612, trainable=18,100,612
Sanity forward OK. Logits shape: (1, 4)


In [6]:

def one_hot(labels, num_classes, device, smoothing=0.0):
    if smoothing > 0.0:
        off = smoothing / (num_classes - 1)
        on = 1.0 - smoothing
    else:
        off = 0.0; on = 1.0
    y = torch.full((labels.size(0), num_classes), off, device=device)
    y.scatter_(1, labels.unsqueeze(1), on)
    return y

def mixup_data(x, y, alpha=0.8, num_classes=None, device='cuda'):
    if num_classes is None:
        num_classes = NUM_CLASSES
    if alpha <= 0:
        return x, one_hot(y, num_classes, device, smoothing=0.0), 1.0, None
    lam = np.random.beta(alpha, alpha)
    idx = torch.randperm(x.size(0)).to(x.device)
    x_mix = lam * x + (1 - lam) * x[idx]
    y_a = one_hot(y, num_classes, device, smoothing=0.0)
    y_b = one_hot(y[idx], num_classes, device, smoothing=0.0)
    y_mix = lam * y_a + (1 - lam) * y_b
    return x_mix, y_mix, lam, idx

def soft_cross_entropy(logits, soft_targets):
    logp = F.log_softmax(logits, dim=1)
    return -(soft_targets * logp).sum(dim=1).mean()

class ModelEMA:
    def __init__(self, model, decay=0.9999, device='cpu'):
        self.decay = decay
        self.ema_model = timm.create_model(MODEL_NAME, pretrained=False, num_classes=NUM_CLASSES)
        self.ema_model.load_state_dict(model.state_dict())
        for p in self.ema_model.parameters():
            p.requires_grad_(False)
        self.ema_model.to(device)

    def update(self, model):
        with torch.no_grad():
            msd = model.state_dict()
            esd = self.ema_model.state_dict()
            for k in esd.keys():
                tgt = esd[k]
                src = msd[k].to(tgt.device)
                if tgt.dtype.is_floating_point:
                    src = src.type_as(tgt)
                    tgt.mul_(self.decay).add_(src, alpha=(1.0 - self.decay))
                else:
                    tgt.copy_(src)

# Optimizer & LR schedule
scaled_lr = BASE_LR * (BATCH_TRAIN / BATCH_LR_REF)
print("Scaled base LR:", scaled_lr)
optimizer = torch.optim.SGD(model.parameters(), lr=scaled_lr, momentum=MOMENTUM, weight_decay=WEIGHT_DECAY)

from torch import amp
autocast_enabled = (USE_AMP and DEVICE == 'cuda')
scaler = torch.cuda.amp.GradScaler(enabled=autocast_enabled)

def get_lr(epoch, total_epochs=EPOCHS, warmup=WARMUP_EPOCHS, base_lr=scaled_lr, min_lr=MIN_LR):
    if epoch < warmup:
        return base_lr * float(epoch + 1) / float(max(1, warmup))
    t = float(epoch - warmup) / float(max(1, total_epochs - warmup))
    return min_lr + 0.5 * (base_lr - min_lr) * (1.0 + math.cos(math.pi * t))

Scaled base LR: 0.0125


  scaler = torch.cuda.amp.GradScaler(enabled=autocast_enabled)


In [None]:
import warnings
warnings.filterwarnings('ignore')

K_REPORT = 2
ema = ModelEMA(model, decay=EMA_DECAY, device=DEVICE) if EMA_DECAY and EMA_DECAY > 0 else None



best_val_top1 = 0.0
for epoch in range(EPOCHS):
    cur_lr = get_lr(epoch)
    for g in optimizer.param_groups:
        g['lr'] = cur_lr
    print(f"\nEpoch {epoch+1}/{EPOCHS}  lr={cur_lr:.6f}")

    # ---- Train ----
    model.train()
    running_loss, seen = 0.0, 0
    t0 = time.time()
    for i, (imgs, targets) in enumerate(train_loader):
        imgs = imgs.to(DEVICE, non_blocking=True)
        targets = targets.to(DEVICE, non_blocking=True)

        if USE_MIXUP:
            inputs, soft_targets, _, _ = mixup_data(imgs, targets, alpha=MIXUP_ALPHA, num_classes=NUM_CLASSES, device=DEVICE)
            loss_fn = soft_cross_entropy
        else:
            inputs = imgs
            soft_targets = one_hot(targets, NUM_CLASSES, DEVICE, smoothing=LABEL_SMOOTHING)
            loss_fn = soft_cross_entropy

        optimizer.zero_grad(set_to_none=True)
        with amp.autocast('cuda', enabled=autocast_enabled):
            logits = model(inputs)
            loss = loss_fn(logits, soft_targets)

        if autocast_enabled:
            scaler.scale(loss).backward()
            scaler.step(optimizer)
            scaler.update()
        else:
            loss.backward(); optimizer.step()

        if ema is not None:
            ema.update(model)

        bs = imgs.size(0)
        running_loss += float(loss.item()) * bs
        seen += bs

        if (i+1) % PRINT_FREQ == 0 or (i+1) == len(train_loader):
            print(f"Epoch {epoch+1} Batch {i+1}/{len(train_loader)}  AvgLoss: {running_loss/seen:.4f}  Time: {time.time()-t0:.1f}s")

    # ---- Validate (use EMA if available) ----
    eval_model = ema.ema_model if ema is not None else model
    eval_model.eval()

    val_total = 0
    top1_correct = 0
    topk_correct = 0
    val_loss_sum = 0.0

    with torch.no_grad():
        for imgs, targets in val_loader:
            imgs = imgs.to(DEVICE, non_blocking=True)
            targets = targets.to(DEVICE, non_blocking=True)

            logits = eval_model(imgs)
            loss_v = F.cross_entropy(logits, targets)
            val_loss_sum += float(loss_v.item()) * imgs.size(0)

            # Top-1
            _, pred1 = logits.max(dim=1)
            top1_correct += (pred1 == targets).sum().item()

            # Top-K only if K_REPORT >= 2
            if K_REPORT >= 2:
                _, predk = logits.topk(K_REPORT, dim=1, largest=True, sorted=True)
                topk_correct += (predk == targets.view(-1,1)).any(dim=1).sum().item()

            val_total += imgs.size(0)

    val_top1 = 100.0 * top1_correct / val_total
    val_loss = val_loss_sum / val_total
    msg = f"Epoch {epoch+1} VAL -> Loss: {val_loss:.4f}  Top1: {val_top1:.3f}"
    if K_REPORT >= 2:
        val_topk = 100.0 * topk_correct / val_total
        msg += f"  Top{K_REPORT}: {val_topk:.3f}"
    print(msg)

    # save checkpoint and best
    state = {
        "epoch": epoch+1,
        "model_state": model.state_dict(),
        "optimizer_state": optimizer.state_dict(),
        "val_top1": val_top1,
        "cfg": {"MODEL_NAME": MODEL_NAME, "IMG_SIZE": IMG_SIZE, "NUM_CLASSES": NUM_CLASSES}
    }
    if val_top1 > best_val_top1:
        best_val_top1 = val_top1
        torch.save(state, BEST_CKPT)
        print(f"Saved BEST to {str(BEST_CKPT)} (ValTop1={val_top1:.3f})")

print("Training complete. Best Val Top1:", best_val_top1)


Epoch 1/130  lr=0.004167


AssertionError: No inf checks were recorded for this optimizer.

In [8]:
# Cell 7 — FULL DATASET EVAL (all images), save per-image CSV
class ImageFolderWithPaths(ImageFolder):
    def __getitem__(self, index):
        img, target = super().__getitem__(index)
        path, _ = self.samples[index]
        return img, target, path

eval_tf = T.Compose([
    T.Resize((IMG_SIZE, IMG_SIZE)),
    T.ToTensor(),
    T.Normalize(IMAGENET_MEAN, IMAGENET_STD),
])

full_ds_wp = ImageFolderWithPaths(root=str(image_root), transform=eval_tf)
NUM_CLASSES = len(full_ds_wp.classes)
K_REPORT = 2
full_loader = DataLoader(full_ds_wp, batch_size=BATCH_EVAL, shuffle=False,
                         num_workers=NUM_WORKERS, pin_memory=PIN_MEMORY)

# load best
ck = torch.load(str(BEST_CKPT), map_location=DEVICE)
state_dict = ck.get("model_state", ck)
state_dict = {k.replace("module.",""): v for k, v in state_dict.items()}

eval_model = timm.create_model(MODEL_NAME, pretrained=False, num_classes=NUM_CLASSES).to(DEVICE)
eval_model.load_state_dict(state_dict)
eval_model.eval()

total = 0
top1_correct = 0
topk_correct = 0
rows = []

with torch.no_grad():
    for imgs, targets, paths in full_loader:
        imgs = imgs.to(DEVICE, non_blocking=True)
        targets = targets.to(DEVICE, non_blocking=True)
        logits = eval_model(imgs)
        probs = F.softmax(logits, dim=1)
        top1_prob, pred1 = probs.max(dim=1)

        # metrics
        top1_correct += (pred1 == targets).sum().item()
        if K_REPORT >= 2:
            _, predk = probs.topk(K_REPORT, dim=1, largest=True, sorted=True)
            topk_correct += (predk == targets.view(-1,1)).any(dim=1).sum().item()
        total += imgs.size(0)

        # rows for CSV
        t_np = targets.cpu().numpy()
        p_np = pred1.cpu().numpy()
        c_np = top1_prob.cpu().numpy()
        for pth, t, p, c in zip(paths, t_np, p_np, c_np):
            rows.append({"path": pth, "true_label": int(t), "pred_label": int(p), "top1_prob": float(c)})

top1 = 100.0 * top1_correct / total
print(f"FULL EVAL -> Top1: {top1:.3f}", end="")
if K_REPORT >= 2:
    topk = 100.0 * topk_correct / total
    print(f"  Top{K_REPORT}: {topk:.3f}")
else:
    print()

pred_csv_path = OUT_DIR / "predictions_bananalsd_full.csv"
pd.DataFrame(rows).to_csv(pred_csv_path, index=False)
print("Saved per-image CSV:", pred_csv_path)

FULL EVAL -> Top1: 99.938  Top2: 100.000
Saved per-image CSV: bananalsd_outputs/predictions_bananalsd_full.csv
