# GradCAM++ Model Train and Mask Generation

In [1]:
import sys
import json
import pathlib

In [2]:
def get_root_dir(cwd: pathlib.Path = pathlib.Path().resolve(), anchor="README.md") -> pathlib.Path:
    """
    Get the root directory of the project by searching for a specific anchor file. 
    i.e. find the root directory where anchor file README.md/.git is located.
    
    Args:
        cwd (pathlib.Path): Current working directory.
        anchor (str): The name of the anchor file to search for.

    Returns:
        pathlib.Path: The root directory of the project.

    Raises:
        FileNotFoundError: If the anchor file is not found in any parent directories.
    """
    # Check if the anchor file exists in the current working directory
    # If it does, return the current working directory
    # If it doesn't, check the parent directories until the anchor file is found
    if cwd.joinpath(anchor).exists():
        return cwd
    else:
        for parent in cwd.parents:
            if (parent / anchor).exists():
                return parent
    
    # If the anchor file is not found in any parent directories, raise an error
    raise FileNotFoundError(f"Anchor file '{anchor}' not found in any parent directories of {cwd}.")

In [3]:
# Git repository information
REPO_GIT_OWNER = "bennylao"
REPO_NAME = "cv-cam-based-img-segmentation"


### Logics to set up paths based on the environment (Google Colab or local machine) ###
COLAB_ROOT_PATH = pathlib.Path("/content")
IS_COLAB = COLAB_ROOT_PATH.exists()

if IS_COLAB:
    # Working on Google Colab
    from google.colab import drive

    # Mount Google Drive
    DRIVE_PATH = COLAB_ROOT_PATH.joinpath("drive")
    drive.flush_and_unmount()
    drive.mount(str(DRIVE_PATH))

    # Load git credentials from Google Drive
    DRIVE_FOLDER_PATH = DRIVE_PATH.joinpath("MyDrive", "Colab Notebooks")
    if DRIVE_FOLDER_PATH.exists():
        with open(DRIVE_FOLDER_PATH.joinpath("git_credentials.json"), "r") as f:
            git_config = json.load(f)
    else:
        raise FileNotFoundError(f"Config file not found at {DRIVE_FOLDER_PATH}")

    # Set up Git credentials
    GIT_USER_NAME = git_config["GIT_USER_NAME"]
    GIT_TOKEN = git_config["GIT_TOKEN"]
    GIT_USER_EMAIL = git_config["GIT_USER_EMAIL"]

    !git config --global user.email {GIT_USER_EMAIL}
    !git config --global user.name {GIT_USER_NAME}

    # Set up project paths
    CURRENT_PATH = pathlib.Path().resolve()
    ROOT = COLAB_ROOT_PATH.joinpath(REPO_NAME)
    DATA_DIR = ROOT.joinpath("data")
    MODEL_DIR = DRIVE_FOLDER_PATH.joinpath("models")
    OUTPUT_DIR = DRIVE_FOLDER_PATH.joinpath("output")

    # Clone repo
    GIT_PATH = f"https://{GIT_TOKEN}@github.com/{REPO_GIT_OWNER}/{REPO_NAME}.git"

    if not ROOT.exists():
        !git clone --depth 1 "{GIT_PATH}" "{ROOT}"
    else:
        print(f"Git repo already cloned at {ROOT}")
        !git -C "{ROOT}" pull

else:
    # Working on local machine
    CURRENT_PATH = pathlib.Path().resolve()
    ROOT = get_root_dir(CURRENT_PATH, anchor="README.md")
    DATA_DIR = ROOT.joinpath("data")
    MODEL_DIR = ROOT.joinpath("models")
    OUTPUT_DIR = ROOT.joinpath("output")

# Create folder if not exist
if not DATA_DIR.exists():
    DATA_DIR.mkdir(parents=True, exist_ok=True)
    print(f"Created data directory at {DATA_DIR}")

if not OUTPUT_DIR.exists():
    OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
    print(f"Created output directory at {OUTPUT_DIR}")

if not MODEL_DIR.exists():
    MODEL_DIR.mkdir(parents=True, exist_ok=True)
    print(f"Created model directory at {MODEL_DIR}")

# Add root path to sys.path
sys.path.append(str(ROOT))

print("=" * 50)
print(f"Runtime: {'Google Colab' if IS_COLAB else 'Local Machine'}")
print(f"{CURRENT_PATH=}")
print(f"{ROOT=}")
print(f"{DATA_DIR=}")
print(f"{MODEL_DIR=}")
print(f"{OUTPUT_DIR=}")
print("=" * 50)

Runtime: Local Machine
CURRENT_PATH=PosixPath('/home/benny/vscode-projects/cv-cam-based-img-segmentation/notebooks')
ROOT=PosixPath('/home/benny/vscode-projects/cv-cam-based-img-segmentation')
DATA_DIR=PosixPath('/home/benny/vscode-projects/cv-cam-based-img-segmentation/data')
MODEL_DIR=PosixPath('/home/benny/vscode-projects/cv-cam-based-img-segmentation/models')
OUTPUT_DIR=PosixPath('/home/benny/vscode-projects/cv-cam-based-img-segmentation/output')


In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np

from torchvision.models import resnet18
from torch.utils.data import DataLoader
from PIL import Image
from tqdm import tqdm
from src import utils
from src.cam.gradcam import generate_multiscale_cam, threshold_cam_three

In [5]:
# Hyperparameters
IMAGE_SIZE = 256
NUM_CLASSES = 2

TRAIN_EPOCHS = 10
TRAIN_BATCH_SIZE = 16
TEST_BATCH_SIZE = 16
NUM_WORKERS = 4

In [6]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
pin_memory = True if torch.cuda.is_available() else False
print(f"Using device: {device}")

# load dataset
train_transforms = utils.Compose([
    utils.PILToTensor(),
    utils.ResizeImgAndMask(size=(IMAGE_SIZE, IMAGE_SIZE)),
    utils.ConvertMaskToBinary(),
    utils.RandomHorizontalFlip(flip_prob=0.5),
    utils.RandomVerticalFlip(flip_prob=0.5),
    utils.RandomRotation(degrees=30),
    utils.ToDtype(dtype=torch.float32, scale=True),
])

test_transforms = utils.Compose([
    utils.PILToTensor(),
    utils.ResizeImgAndMask(size=(IMAGE_SIZE, IMAGE_SIZE)),
    utils.ConvertMaskToBinary(),
    utils.ToDtype(dtype=torch.float32, scale=True),
])

trainset, testset = utils.construct_dataset(
    data_dir=DATA_DIR,
    train_transforms=train_transforms,
    test_transforms=test_transforms,
)

trainloader = DataLoader(
    trainset,
    batch_size=TRAIN_BATCH_SIZE,
    shuffle=True,
    num_workers=NUM_WORKERS,
    pin_memory=pin_memory,
)

testloader = DataLoader(
    testset,
    batch_size=TEST_BATCH_SIZE,
    shuffle=False,
    num_workers=NUM_WORKERS,
    pin_memory=pin_memory,
)

Using device: cuda


## Resnet18 Label Classifier

In [7]:
num_classes = 37
model = resnet18(weights="DEFAULT")
model.fc = nn.Linear(model.fc.in_features, num_classes)
model.to(device)

loss_fn = nn.CrossEntropyLoss()
optimiser = optim.Adam(model.parameters(), lr=1e-4, weight_decay=1e-3)

In [8]:
model.train()

for epoch in range(TRAIN_EPOCHS):
    print(f"Epoch {epoch+1}/{TRAIN_EPOCHS}")

    total_loss = 0
    correct = 0

    for images, _, _, labels in tqdm(trainloader):
        images, labels = images.to(device), labels.to(device)

        optimiser.zero_grad()
        logits = model(images)
        loss = loss_fn(logits, labels)
        loss.backward()
        optimiser.step()

        total_loss += loss.item()
        preds = logits.argmax(dim=1)
        correct += (preds == labels).sum().item()

    train_acc = correct / len(trainloader.dataset)

    test_acc = 0.0
    model.eval()
    val_correct = 0
    with torch.no_grad():
        for images, _, _, labels in testloader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            preds = outputs.argmax(dim=1)
            val_correct += (preds == labels).sum().item()
    test_acc = val_correct / len(testloader.dataset)

    print(f"Epoch {epoch+1}/{TRAIN_EPOCHS} | Train Loss: {total_loss:.4f} | Train Acc: {train_acc:.4f} | Test Acc: {test_acc:.4f}")


Epoch 1/10


100%|██████████| 322/322 [00:20<00:00, 15.67it/s]


Epoch 1/10 | Train Loss: 537.8190 | Train Acc: 0.5877 | Test Acc: 0.8118
Epoch 2/10


100%|██████████| 322/322 [00:21<00:00, 15.17it/s]


Epoch 2/10 | Train Loss: 336.8641 | Train Acc: 0.6787 | Test Acc: 0.7229
Epoch 3/10


100%|██████████| 322/322 [00:17<00:00, 18.09it/s]


Epoch 3/10 | Train Loss: 229.1401 | Train Acc: 0.7710 | Test Acc: 0.7850
Epoch 4/10


100%|██████████| 322/322 [00:20<00:00, 15.85it/s]


Epoch 4/10 | Train Loss: 188.1818 | Train Acc: 0.8093 | Test Acc: 0.7746
Epoch 5/10


100%|██████████| 322/322 [00:20<00:00, 15.60it/s]


Epoch 5/10 | Train Loss: 173.6308 | Train Acc: 0.8239 | Test Acc: 0.7714
Epoch 6/10


100%|██████████| 322/322 [00:17<00:00, 17.98it/s]


Epoch 6/10 | Train Loss: 153.7452 | Train Acc: 0.8455 | Test Acc: 0.8463
Epoch 7/10


100%|██████████| 322/322 [00:17<00:00, 18.34it/s]


Epoch 7/10 | Train Loss: 125.3838 | Train Acc: 0.8762 | Test Acc: 0.8476
Epoch 8/10


100%|██████████| 322/322 [00:20<00:00, 15.98it/s]


Epoch 8/10 | Train Loss: 144.2324 | Train Acc: 0.8509 | Test Acc: 0.8027
Epoch 9/10


100%|██████████| 322/322 [00:20<00:00, 15.65it/s]


Epoch 9/10 | Train Loss: 115.9217 | Train Acc: 0.8820 | Test Acc: 0.8018
Epoch 10/10


100%|██████████| 322/322 [00:17<00:00, 18.00it/s]


Epoch 10/10 | Train Loss: 107.4724 | Train Acc: 0.8871 | Test Acc: 0.8313


In [9]:
save_path = MODEL_DIR.joinpath("resnet18_classifier.pth")
torch.save(model.state_dict(), save_path)

In [18]:
model = resnet18(weights="DEFAULT")
model.fc = nn.Linear(model.fc.in_features, num_classes)
model.load_state_dict(torch.load(save_path, weights_only=True))
model.to(device)
model.eval()

train_transforms = utils.Compose([
	utils.PILToTensor(),
	utils.ResizeImgAndMask(size=(256, 256)),
	utils.ToDtype(dtype=torch.float32, scale=True),
])

test_transforms = utils.Compose([
	utils.PILToTensor(),
	utils.ResizeImgAndMask(size=(256, 256)),
	utils.ToDtype(dtype=torch.float32, scale=True),
])

with open(DATA_DIR / "train_ids.json") as f:
    train_ids = json.load(f)

with open(DATA_DIR / "test_ids.json") as f:
    test_ids = json.load(f)

cam_output_dir = DATA_DIR.joinpath("cam_dataset")
cam_output_dir.mkdir(parents=True, exist_ok=True)

trainset, testset = utils.construct_dataset(
    data_dir=DATA_DIR,
    train_transforms=train_transforms,
    test_transforms=test_transforms,
)

target_layer = model.layer4[-1]

In [19]:
def generate_cam_images(dataset, ids, save_dir):
    for i in tqdm(range(len(dataset)), desc=f"Generating CAM"):
        sample, _, _, _ = dataset[i]
        input_tensor = sample.unsqueeze(0).to(device)

        cam = generate_multiscale_cam(
            model=model,
            image_tensor=input_tensor,
            target_layer=target_layer,
            target_size= (256, 256),
            scales=[128, 256, 512, 1024]
        )
        # image id
        image_id = ids[i]
        
        # cam 
        cam = np.clip(cam, 0, 1)
        cam_uint8 = (cam * 255).astype(np.uint8)
        Image.fromarray(cam_uint8, mode='L').save(save_dir / f"{image_id}_graycam.png")
        # mask
        mask = threshold_cam_three(cam, high_threshold=0.6, low_threshold=0.4)
        Image.fromarray(mask.astype(np.uint8), mode='L').save(save_dir / f"{image_id}_mask.png")

    print(f"Saved CAM & CAM Mask PNGs to {save_dir}/")

In [22]:
generate_cam_images(trainset, train_ids, cam_output_dir)
generate_cam_images(testset, test_ids, cam_output_dir)

Generating CAM: 100%|██████████| 5144/5144 [15:38<00:00,  5.48it/s]


Saved CAM & CAM Mask PNGs to /home/benny/vscode-projects/cv-cam-based-img-segmentation/data/cam_dataset/


Generating CAM: 100%|██████████| 2205/2205 [06:14<00:00,  5.89it/s]

Saved CAM & CAM Mask PNGs to /home/benny/vscode-projects/cv-cam-based-img-segmentation/data/cam_dataset/



