Model: Resnet-18
Dataset: caltech 256

In [20]:
# ------------------------------
# 1️⃣ Imports and device setup
# ------------------------------
import random
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets
import torchvision.transforms as transforms
import torchvision.models as models
import numpy as np
import cv2
import matplotlib.pyplot as plt
import os
import urllib.request
import tarfile
from PIL import Image
from tqdm import tqdm
from torch.optim.lr_scheduler import CosineAnnealingLR
from torchvision.models import ResNet18_Weights

SEED =  7777
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
torch.cuda.manual_seed_all(SEED)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
print("Using device:", DEVICE)

Using device: cuda


In [21]:
# ------------------------------
# 2️⃣ Parameters
# ------------------------------
N_EDGE_EPOCHS = 4        # epochs to feed edge maps
N_FULL_SIZE_AFTER = 10    #epochs to train on smaller image
BATCH_SIZE = 32
EPOCHS =15
LEARNING_RATE = 0.001


In [22]:
# ------------------------------
# 3️⃣ Edge transform
# ------------------------------
class EdgeTransform:
    def __call__(self, img):
        img_np = np.array(img)
        gray = cv2.cvtColor(img_np, cv2.COLOR_RGB2GRAY)
        grad_x = cv2.Sobel(gray, cv2.CV_64F, 1, 0, ksize=3)
        grad_y = cv2.Sobel(gray, cv2.CV_64F, 0, 1, ksize=3)
        edges = np.sqrt(grad_x**2 + grad_y**2)
        edges = np.clip(edges / edges.max(), 0, 1)
        edges = np.stack([edges]*3, axis=0)
        return torch.tensor(edges, dtype=torch.float32)

edge_transform = EdgeTransform()

In [23]:
def seed_worker(worker_id):
    worker_seed = SEED + worker_id
    np.random.seed(worker_seed)
    random.seed(worker_seed)

In [24]:
'''import shutil
folder = "/content/caltech256/256_ObjectCategories_preprocessed"
if os.path.exists(folder):
    shutil.rmtree(folder)   # recursively deletes folder and all subfolders/files
    print(f"{folder} deleted successfully!")
else:
    print("Folder does not exist")'''

'import shutil\nfolder = "/content/caltech256/256_ObjectCategories_preprocessed"\nif os.path.exists(folder):\n    shutil.rmtree(folder)   # recursively deletes folder and all subfolders/files\n    print(f"{folder} deleted successfully!")\nelse:\n    print("Folder does not exist")'

In [25]:
# Download Caltech256
import os
import urllib.request
import tarfile
from PIL import Image, UnidentifiedImageError # Import UnidentifiedImageError
from tqdm import tqdm
import torchvision.transforms as transforms


url = 'https://data.caltech.edu/records/nyy15-4j048/files/256_ObjectCategories.tar'
data_dir = '/content/caltech256'


if not os.path.exists(data_dir):
    os.makedirs(data_dir)

tar_path = os.path.join(data_dir, '256_ObjectCategories.tar')
if not os.path.exists(tar_path):
    print("Downloading Caltech256...")
    urllib.request.urlretrieve(url, tar_path)
    print("Download complete!")

# Extract
if not os.path.exists(os.path.join(data_dir, '256_ObjectCategories')):
    print("Extracting...")
    with tarfile.open(tar_path) as tar:
        tar.extractall(path=data_dir)
    print("Extraction complete!")

  # -------------------------------
# Preprocess: resize + center crop + save
# -------------------------------
preprocessed_dir = os.path.join(data_dir, '256_ObjectCategories_preprocessed')
extracted_dir = os.path.join(data_dir, '256_ObjectCategories') # Define extracted_dir
if not os.path.exists(preprocessed_dir):
    print("Preprocessing images (resize + center crop)...")

    preprocess = transforms.Compose([
        transforms.Resize(256),      # resize shorter side
        transforms.CenterCrop(224)   # crop center 224x224
    ])

    # Gather all image paths and corresponding output paths
    all_images = []
    for class_name in os.listdir(extracted_dir):
        class_in = os.path.join(extracted_dir, class_name)
        class_out = os.path.join(preprocessed_dir, class_name)
        os.makedirs(class_out, exist_ok=True)

        for img_name in os.listdir(class_in):
            img_path = os.path.join(class_in, img_name)
            out_path = os.path.join(class_out, img_name)
            if os.path.isfile(img_path): # Check if it's a file
                all_images.append((img_path, out_path))

    # Single progress bar for all images
    for img_path, out_path in tqdm(all_images, desc="Preprocessing images"):
        try:
            img = Image.open(img_path).convert("RGB")
            img = preprocess(img)
            img.save(out_path)
        except UnidentifiedImageError:
            print(f"Skipping {img_path}: Cannot identify image file")


    print("Preprocessing complete!")
else:
    print("Preprocessed images already exist, skipping preprocessing.")

Preprocessed images already exist, skipping preprocessing.


In [26]:
# ------------------------------
# 4️⃣ Data loading with subset
# ------------------------------
from torch.utils.data import Subset, DataLoader

def load_data(batch_size=BATCH_SIZE, subset_percent=None):

    # Define a basic transform to convert PIL Images to Tensors
    basic_transform = transforms.ToTensor()

    full_dataset = datasets.ImageFolder(os.path.join(data_dir,'256_ObjectCategories_preprocessed'), transform=basic_transform)

    # Train/val split: 80/20
    train_size = int(0.8 * len(full_dataset))
    test_size = len(full_dataset) - train_size
    trainset_full, test_set = torch.utils.data.random_split(full_dataset, [train_size, test_size])


    if subset_percent is not None:
        subset_size = int(len(trainset_full) * subset_percent)
        indices = torch.randperm(len(trainset_full))[:subset_size]
        train_set = Subset(trainset_full, indices)
    else:
        train_set = trainset_full

    trainloader = DataLoader(
                            train_set,
                            batch_size=batch_size,
                            shuffle=True,
                            worker_init_fn=seed_worker
                            )
    testloader = DataLoader(test_set, batch_size=batch_size, shuffle=False)
    return trainloader, testloader, len(full_dataset.classes)

In [27]:
trainloader, testloader, num_classes = load_data(subset_percent =1.0)

In [28]:
len(trainloader)

766

In [29]:
def resnet18(output_classes = 256, pretrained = False):
    # Load ResNet18 without pretrained weights
    if pretrained:
        model = models.resnet18(weights=ResNet18_Weights.DEFAULT)
    else:
        model = models.resnet18(weights = None)

    model.fc = nn.Linear(model.fc.in_features, output_classes)
    return model

In [30]:
# ------------------------------
# 6️⃣ Training function
# ------------------------------
def train_one_epoch(model, loader, optimizer, criterion, epoch_num, n_edge_epochs=N_EDGE_EPOCHS, blur_factor = 0.0):
    model.train()
    running_loss = 0.0
    correct, total = 0, 0
    for images, labels in loader:

        if epoch_num < n_edge_epochs:
            # Edge transform
            images = torch.stack([edge_transform(img.permute(1,2,0).numpy()*255) for img in images])
        elif epoch_num < N_FULL_SIZE_AFTER and blur_factor !=0:
            images = torch.stack([
                transforms.Compose([
                transforms.GaussianBlur(kernel_size=5, sigma=min(4, blur_factor)),
                ])(img) for img in images])

        images, labels = images.to(DEVICE), labels.to(DEVICE)
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item() * images.size(0)
        _, predicted = outputs.max(1)
        correct += predicted.eq(labels).sum().item()
        total += labels.size(0)

    return running_loss / total, correct / total

In [31]:
# ------------------------------
# 7️⃣ Validation function (concise Top-1 & Top-5)
# ------------------------------
def validate(model, loader, criterion, topk=(1,5)):
    model.eval()
    val_loss = 0.0
    correct = [0] * len(topk)
    total = 0

    with torch.no_grad():
        for images, labels in loader:
            images, labels = images.to(DEVICE), labels.to(DEVICE)
            outputs = model(images)
            val_loss += criterion(outputs, labels).item() * labels.size(0)
            total += labels.size(0)

            maxk = max(topk)
            _, pred = outputs.topk(maxk, dim=1, largest=True, sorted=True)
            pred = pred.t()
            matches = pred.eq(labels.view(1, -1).expand_as(pred))

            for i, k in enumerate(topk):
                correct[i] += matches[:k].reshape(-1).float().sum().item()

    val_loss /= total
    accuracies = [c / total for c in correct]
    return val_loss, accuracies

In [32]:
def freeze_high_layers(model):
    # Freeze layer3, layer4, and layer2
    for name, param in model.named_parameters():
        if "layer4" in name or "layer3" in name:
            param.requires_grad = False
        else:
            param.requires_grad = True

def unfreeze_all_layers(model):
    for param in model.parameters():
        param.requires_grad = True

In [33]:
def mean_abs_activation(model, loader, device="cuda"):
    """
    Prints mean absolute activation per layer for a single batch.
    Works safely for ResNet-18, including avgpool and fc.
    """
    model.eval()
    x, _ = next(iter(loader))
    x = x.to(device)
    activations = {}
    round_to = 4
    with torch.no_grad():
        out = model.conv1(x); activations["conv1"] = round(out.abs().mean().item(),round_to)
        out = model.layer1(out); activations["layer1"] = round(out.abs().mean().item(),round_to)
        out = model.layer2(out); activations["layer2"] = round(out.abs().mean().item(),round_to)
        out = model.layer3(out); activations["layer3"] = round(out.abs().mean().item(),round_to)
        out = model.layer4(out); activations["layer4"] = round(out.abs().mean().item(),round_to)
        out = model.avgpool(out); activations["avgpool"] = round(out.abs().mean().item(),round_to)
        out = torch.flatten(out, 1); out = model.fc(out); activations["fc"] = round(out.abs().mean().item(),round_to)
    print(activations)

In [34]:
def save_checkpoint(model, optimizer, epoch,
                    method_name="method1", save_dir="checkpoints"):
    """
    Saves a PyTorch model checkpoint at the end of training.
    """
    os.makedirs(save_dir, exist_ok=True)
    filename = f"{method_name}_epoch_{epoch}.pth"
    path = os.path.join(save_dir, filename)

    torch.save({
        'epoch': epoch,
        'model_state_dict': model.state_dict(),
        'optimizer_state_dict': optimizer.state_dict(),
        'method_name': method_name
    }, path)

    print(f"Saved checkpoint for {method_name} at epoch {epoch}: {path}")
    return path

In [35]:
#Function to reset a layer
def reset_block(block):
    for m in block.modules():
        if isinstance(m, nn.Conv2d):
            nn.init.kaiming_normal_(m.weight, mode="fan_out", nonlinearity="relu")
            if m.bias is not None:
                nn.init.constant_(m.bias, 0)
        elif isinstance(m, nn.BatchNorm2d):
            nn.init.constant_(m.weight, 1)
            nn.init.constant_(m.bias, 0)

In [36]:
# ------------------------------
# 8️⃣ Training loop (piecewise)
# ------------------------------
train_losses, val_losses = [[],[]], [[],[]]
train_accs, val_accs = [[],[]], [[],[]]
top5_accs = [[],[]]

print("1.) Baseline")
model = resnet18(num_classes, pretrained= True).to(DEVICE)

optimizer = optim.Adam([
              {'params': model.conv1.parameters(), 'lr': 1e-5,  'weight_decay': 1e-4},
              {'params': model.layer1.parameters(), 'lr': 1e-5,  'weight_decay': 1e-4},
              {'params': model.layer2.parameters(), 'lr': 5e-5,  'weight_decay': 1e-4},
              {'params': model.layer3.parameters(), 'lr': 1e-4,  'weight_decay': 1e-4},
              {'params': model.layer4.parameters(), 'lr': 3e-4,  'weight_decay': 1e-4},  #fine-tune
              {'params': model.fc.parameters(),    'lr': 1e-3,  'weight_decay': 1e-4},  # new classifier
          ])
scheduler = CosineAnnealingLR(optimizer, T_max=(EPOCHS), eta_min=1e-5)
criterion = nn.CrossEntropyLoss()
min_loss, val_acc_at_best_epoch, top5_acc_at_best_epoch = 100,100,100

for epoch in range(EPOCHS):
    train_loss, train_acc = train_one_epoch(model, trainloader, optimizer, criterion, epoch, 0)
    val_loss, [val_acc, top5_acc] = validate(model, testloader, criterion)

    train_losses[0].append(train_loss)
    val_losses[0].append(val_loss)
    train_accs[0].append(train_acc)
    val_accs[0].append(val_acc)
    top5_accs[0].append(top5_acc)


    print(f"Epoch {epoch+1}/{EPOCHS} | "
          f"Train Loss: {train_loss:.4f}, Acc: {train_acc:.4f} | "
          f"Val Loss: {val_loss:.4f}, Acc: {val_acc:.4f}, Top_5_acc : {top5_acc:.4f} ")
    mean_abs_activation(model, trainloader)

    scheduler.step()
    
    if(min_loss > val_loss):
        min_loss = val_loss
        val_acc_at_best_epoch = val_acc
        top5_acc_at_best_epoch = top5_acc

print(f"Best run ---> val_acc : {val_acc_at_best_epoch:.4f} |"
      f"Top5_acc : {top5_acc_at_best_epoch:.4f} |"
      f"val_loss : {min_loss:.4f}")

print("2.)Edge + freeze")
model = resnet18(num_classes, pretrained= True).to(DEVICE)
optimizer = optim.Adam([
              {'params': model.conv1.parameters(), 'lr': 1e-4,  'weight_decay': 1e-4},
              {'params': model.layer1.parameters(), 'lr': 5e-5,  'weight_decay': 1e-4},
              {'params': model.layer2.parameters(), 'lr': 5e-5,  'weight_decay': 1e-4},
              {'params': model.layer3.parameters(), 'lr': 1e-4,  'weight_decay': 1e-4},
              {'params': model.layer4.parameters(), 'lr': 3e-4,  'weight_decay': 1e-4},  #fine-tune
              {'params': model.fc.parameters(),    'lr': 1e-3,  'weight_decay': 1e-4},  # new classifier
          ])


criterion = nn.CrossEntropyLoss()
min_loss, val_acc_at_best_epoch, top5_acc_at_best_epoch = 100,100,100

freeze_high_layers(model)
for epoch in range(EPOCHS):
    if(epoch == N_EDGE_EPOCHS):
        unfreeze_all_layers(model)
        optimizer = optim.Adam([
              {'params': model.conv1.parameters(), 'lr': 1e-5,  'weight_decay': 1e-4},
              {'params': model.layer1.parameters(), 'lr': 1e-5,  'weight_decay': 1e-4},
              {'params': model.layer2.parameters(), 'lr': 5e-5,  'weight_decay': 1e-4},
              {'params': model.layer3.parameters(), 'lr': 1e-4,  'weight_decay': 1e-4},
              {'params': model.layer4.parameters(), 'lr': 3e-4,  'weight_decay': 1e-4},  #fine-tune
              {'params': model.fc.parameters(),    'lr': 1e-3,  'weight_decay': 1e-4},  # new classifier
          ])
        scheduler = CosineAnnealingLR(optimizer, T_max=(EPOCHS-N_EDGE_EPOCHS), eta_min=1e-5)
    train_loss, train_acc = train_one_epoch(model, trainloader, optimizer, criterion, epoch, n_edge_epochs = N_EDGE_EPOCHS)
    val_loss, [val_acc, top5_acc] = validate(model, testloader, criterion)

    train_losses[1].append(train_loss)
    val_losses[1].append(val_loss)
    train_accs[1].append(train_acc)
    val_accs[1].append(val_acc)
    top5_accs[1].append(top5_acc)

    print(f"Epoch {epoch+1}/{EPOCHS} | "
          f"Train Loss: {train_loss:.4f}, Acc: {train_acc:.4f} | "
          f"Val Loss: {val_loss:.4f}, Acc: {val_acc:.4f}, Top_5_acc : {top5_acc:.4f} ")
    mean_abs_activation(model, trainloader)

    
    if(epoch >= N_EDGE_EPOCHS):
        scheduler.step()
    
    if(min_loss > val_loss):
        min_loss = val_loss
        val_acc_at_best_epoch = val_acc
        top5_acc_at_best_epoch = top5_acc

print(f"Best run ---> val_acc : {val_acc_at_best_epoch:.4f} |"
      f"Top5_acc : {top5_acc_at_best_epoch:.4f} |"
      f"val_loss : {min_loss:.4f}")

1.) Baseline
Epoch 1/15 | Train Loss: 1.8423, Acc: 0.5898 | Val Loss: 1.1604, Acc: 0.7107, Top_5_acc : 0.8973 
{'conv1': 0.4559, 'layer1': 1.2326, 'layer2': 0.3783, 'layer3': 0.3599, 'layer4': 0.5176, 'avgpool': 0.5176, 'fc': 5.2539}
Epoch 2/15 | Train Loss: 0.6472, Acc: 0.8312 | Val Loss: 1.1347, Acc: 0.7208, Top_5_acc : 0.8994 
{'conv1': 0.4923, 'layer1': 1.3109, 'layer2': 0.3969, 'layer3': 0.3668, 'layer4': 0.5779, 'avgpool': 0.5779, 'fc': 6.8193}
Epoch 3/15 | Train Loss: 0.2765, Acc: 0.9266 | Val Loss: 1.1815, Acc: 0.7329, Top_5_acc : 0.8956 
{'conv1': 0.4393, 'layer1': 1.249, 'layer2': 0.3691, 'layer3': 0.3947, 'layer4': 1.0731, 'avgpool': 1.0731, 'fc': 13.7968}
Epoch 4/15 | Train Loss: 0.1163, Acc: 0.9723 | Val Loss: 1.1028, Acc: 0.7435, Top_5_acc : 0.9072 
{'conv1': 0.4501, 'layer1': 1.2982, 'layer2': 0.3916, 'layer3': 0.4046, 'layer4': 0.4817, 'avgpool': 0.4817, 'fc': 6.3462}
Epoch 5/15 | Train Loss: 0.0997, Acc: 0.9747 | Val Loss: 1.1456, Acc: 0.7468, Top_5_acc : 0.8974 
{'con

In [37]:
# ------------------------------
# 9️⃣ Plot function (can run separately)
# ------------------------------
def plot_curves_comparison(train_losses1, val_losses1, train_accs1, val_accs1,
                           train_losses2, val_losses2, train_accs2, val_accs2):
    plt.figure(figsize=(12, 10))

    # Train Loss Comparison
    plt.subplot(2, 2, 1)
    plt.plot(train_losses1, label="Model 1 Train Loss")
    plt.plot(train_losses2, label="Model 2 Train Loss")
    plt.xlabel("Epoch")
    plt.ylabel("Loss")
    plt.title("Training Loss Comparison")
    plt.legend()

    # Validation Loss Comparison
    plt.subplot(2, 2, 2)
    plt.plot(val_losses1, label="Model 1 Val Loss")
    plt.plot(val_losses2, label="Model 2 Val Loss")
    plt.xlabel("Epoch")
    plt.ylabel("Loss")
    plt.title("Validation Loss Comparison")
    plt.legend()

    # Train Accuracy Comparison
    plt.subplot(2, 2, 3)
    plt.plot(train_accs1, label="Model 1 Train Acc")
    plt.plot(train_accs2, label="Model 2 Train Acc")
    plt.xlabel("Epoch")
    plt.ylabel("Accuracy")
    plt.title("Training Accuracy Comparison")
    plt.legend()

    # Validation Accuracy Comparison
    plt.subplot(2, 2, 4)
    plt.plot(val_accs1, label="Model 1 Val Acc")
    plt.plot(val_accs2, label="Model 2 Val Acc")
    plt.xlabel("Epoch")
    plt.ylabel("Accuracy")
    plt.title("Validation Accuracy Comparison")
    plt.legend()

    plt.tight_layout()
    plt.show()

In [38]:
plot_curves_comparison(train_losses, val_losses, train_accs, val_accs,
                           train_losses1, val_losses1, train_accs1, val_accs1)

NameError: name 'train_losses1' is not defined