## Notebook to reproduce all the numeric results in the paper

In [1]:
import os
import numpy as np
import matplotlib.pyplot as plt
import time
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
import random
from pathlib import Path
from dataloader import get_dataloader, walk_through_dir
from models import SimpleSegmentationModel, SegmentationModel, pretrained_UNet
from sklearn.metrics import precision_score, recall_score, f1_score
import argparse

In [2]:
def evaluate_model(model, test_dataloader, criterion, device):
    model.eval()  # Set the model to evaluation mode
    total_loss = 0
    all_preds = []  # List to store all predictions
    all_labels = []  # List to store all ground truth labels
    
    with torch.no_grad():  # Disable gradient computation
        for images, masks in test_dataloader:
            images = images.to(device)
            masks = masks.to(device)

            # Forward pass
            outputs = model(images)
            loss = criterion(outputs, masks)
            total_loss += loss.item()

            # Convert outputs to binary predictions (threshold at 0.5 for binary classification)
            binary_outputs = (outputs > 0.5).float()

            # Flatten the outputs and masks for evaluation
            binary_outputs_np = binary_outputs.cpu().numpy().flatten()
            masks_np = masks.cpu().numpy().flatten()

            # Collect predictions and true labels for metrics calculation
            all_preds.extend(binary_outputs_np)
            all_labels.extend(masks_np)

    # Calculate average loss
    avg_loss = total_loss / len(test_dataloader)
    
    # Ensure that labels and predictions are both binary (0 or 1)
    all_preds = [int(x) for x in all_preds]  # Convert predictions to integer binary
    all_labels = [int(x) for x in all_labels]  # Convert labels to integer binary

    # Compute Precision, Recall, and F1 score
    precision = precision_score(all_labels, all_preds)
    recall = recall_score(all_labels, all_preds)
    f1 = f1_score(all_labels, all_preds)

    # Compute Dice Score
    intersection = np.sum(np.array(all_preds) * np.array(all_labels))
    dice_score = (2.0 * intersection) / (np.sum(all_preds) + np.sum(all_labels) + 1e-8)

    # Compute IoU (Intersection over Union)
    union = np.sum((np.array(all_preds) + np.array(all_labels)) > 0)
    iou = intersection / (union + 1e-8)

    # Print results
    print(f"Average Loss on Test Set: {avg_loss:.4f}")
    print(f"Precision: {precision:.4f}")
    print(f"Recall: {recall:.4f}")
    print(f"F1 Score: {f1:.4f}")
    print(f"Dice Score: {dice_score:.4f}")
    print(f"IoU: {iou:.4f}")

In [3]:
data_path = "/zhome/70/5/14854/nobackup/deeplearningf24/forcebiology/data/"
image_dirs = [data_path + 'brightfield/Alexa488_Fibroblasts_well1_50locations',
                data_path + 'brightfield/Alexa488_Fibroblasts_well2_200locations',
                data_path + 'brightfield/Alexa488_Fibroblasts_well3_200locations',
                data_path + 'brightfield/Alexa488_Fibroblasts_well4_225locations',
                data_path + 'brightfield/Alexa488_Fibroblasts_well5_225locations',
                data_path + 'brightfield/Alexa488_Fibroblasts_well6_135locations',
                data_path + 'brightfield/Alexa488_Fibroblasts_well7_135locations']
mask_dir = data_path + 'masks'

def set_seed(seed=111):
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed(seed)
        torch.cuda.manual_seed_all(seed)  # For multi-GPU
    random.seed(seed)
    np.random.seed(seed)

set_seed()

data_transform = transforms.Compose([
    transforms.Resize((128, 128)),
    transforms.ToTensor()
])
mask_transform = data_transform

In [4]:
def run_model(channels, pth_model, model_name):
    # Get the dataloaders with optional channel selection
    train_dataloader, val_dataloader, test_dataloader = get_dataloader(
        image_dirs, 
        mask_dir, 
        data_transform, 
        mask_transform, 
        display_sample=False, 
        train_percentage=1.0, 
        channel_indices=channels
    )

    # Initialize the device
    # device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    device = "cpu"
    criterion = nn.BCELoss()
    current_directory = os.getcwd()
    model_save_path = os.path.join(current_directory, pth_model)

    # Load the model
    if model_name == "Simple":
        model = SimpleSegmentationModel().to(device)
    elif model_name == "UNet":
        model = SegmentationModel(channels=len(channels)).to(device)
    elif model_name == "Pretrained":
        model = pretrained_UNet().to(device)
    
    model.load_state_dict(torch.load(model_save_path, map_location=torch.device('cpu'), weights_only=True))
    print(f"Model {pth_model} loaded successfully.")

    return model, test_dataloader, criterion, device


# Run evaluation for different models

Inital UNet model

In [6]:
channels = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
pth_model = 'models/segmentation_model_UNet_train100%_channels0_1_2_3_4_5_6_7_8_9_10.pth'
model_name = 'UNet'

model, test_dataloader, criterion, device = run_model(channels, pth_model, model_name)
evaluate_model(model, test_dataloader, criterion, device)

Number of images in the trainset: 889
Number of images in the valset: 223
Number of images in the testset: 50
Model models/segmentation_model_UNet_train100%_channels0_1_2_3_4_5_6_7_8_9_10.pth loaded successfully.
Average Loss on Test Set: 0.7551
Precision: 0.2806
Recall: 0.8834
F1 Score: 0.4259
Dice Score: 0.4259
IoU: 0.2706


In [None]:
channels = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
pth_model = 'models/segmentation_model_Pretrained.pth'
model_name = 'Pretrained'

model, test_dataloader, criterion, device = run_model(channels, pth_model, model_name)
criterion = nn.BCEWithLogitsLoss()
evaluate_model(model, test_dataloader, criterion, device)

Number of images in the trainset: 889
Number of images in the valset: 223
Number of images in the testset: 50


Downloading: "https://download.pytorch.org/models/resnet34-333f7ec4.pth" to /zhome/2b/8/212341/.cache/torch/hub/checkpoints/resnet34-333f7ec4.pth


## Based on train percentaje

In [8]:
channels = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
pth_model = 'models/segmentation_model_UNet_train20%_channels0_1_2_3_4_5_6_7_8_9_10.pth'
model_name = 'UNet'

model, test_dataloader, criterion, device = run_model(channels, pth_model, model_name)
evaluate_model(model, test_dataloader, criterion, device)

Number of images in the trainset: 889
Number of images in the valset: 223
Number of images in the testset: 50
Model models/segmentation_model_UNet_train20%_channels0_1_2_3_4_5_6_7_8_9_10.pth loaded successfully.
Average Loss on Test Set: 0.4475
Precision: 0.8730
Recall: 0.3180
F1 Score: 0.4662
Dice Score: 0.4662
IoU: 0.3039


In [None]:
channels = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
pth_model = 'models/segmentation_model_UNet_train40%_channels0_1_2_3_4_5_6_7_8_9_10.pth'
model_name = 'UNet'

model, test_dataloader, criterion, device = run_model(channels, pth_model, model_name)
evaluate_model(model, test_dataloader, criterion, device)

NameError: name 'run_model' is not defined

In [5]:
channels = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
pth_model = 'models/segmentation_model_UNet_train60%_channels0_1_2_3_4_5_6_7_8_9_10.pth'
model_name = 'UNet'

model, test_dataloader, criterion, device = run_model(channels, pth_model, model_name)
evaluate_model(model, test_dataloader, criterion, device)

Number of images in the trainset: 889
Number of images in the valset: 223
Number of images in the testset: 50
Model models/segmentation_model_UNet_train60%_channels0_1_2_3_4_5_6_7_8_9_10.pth loaded successfully.
Average Loss on Test Set: 0.3568
Precision: 0.9073
Recall: 0.4138
F1 Score: 0.5684
Dice Score: 0.5684
IoU: 0.3970


In [6]:
channels = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
pth_model = 'models/segmentation_model_UNet_train80%_channels0_1_2_3_4_5_6_7_8_9_10.pth'
model_name = 'UNet'

model, test_dataloader, criterion, device = run_model(channels, pth_model, model_name)
evaluate_model(model, test_dataloader, criterion, device)

Number of images in the trainset: 889
Number of images in the valset: 223
Number of images in the testset: 50
Model models/segmentation_model_UNet_train80%_channels0_1_2_3_4_5_6_7_8_9_10.pth loaded successfully.
Average Loss on Test Set: 0.6034
Precision: 0.5244
Recall: 0.7316
F1 Score: 0.6110
Dice Score: 0.6110
IoU: 0.4398


## With augmentation and fft

In [8]:
channels = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
pth_model = 'models/segmentation_model_UNet_train100%_channels0_1_2_3_4_5_6_7_8_9_10_augmentation_fft.pth'
model_name = 'UNet'

model, test_dataloader, criterion, device = run_model(channels, pth_model, model_name)
evaluate_model(model, test_dataloader, criterion, device)


Number of images in the trainset: 889
Number of images in the valset: 223
Number of images in the testset: 50
Model models/segmentation_model_UNet_train100%_channels0_1_2_3_4_5_6_7_8_9_10_augmentation_fft.pth loaded successfully.
Average Loss on Test Set: 0.3591
Precision: 0.9843
Recall: 0.0995
F1 Score: 0.1807
Dice Score: 0.1807
IoU: 0.0993


## With different channels
Channels based on GradCAM channel importance.

In [9]:
channels = [1]
pth_model = 'models/segmentation_model_UNet_train100%_channels1.pth'
model_name = 'UNet'

model, test_dataloader, criterion, device = run_model(channels, pth_model, model_name)
evaluate_model(model, test_dataloader, criterion, device)

Number of images in the trainset: 889
Number of images in the valset: 223
Number of images in the testset: 50
Model models/segmentation_model_UNet_train100%_channels1.pth loaded successfully.
Average Loss on Test Set: 0.3376
Precision: 0.7518
Recall: 0.5985
F1 Score: 0.6665
Dice Score: 0.6665
IoU: 0.4998


In [10]:
channels = [1, 7]
pth_model = 'models/segmentation_model_UNet_train100%_channels1_7.pth'
model_name = 'UNet'

model, test_dataloader, criterion, device = run_model(channels, pth_model, model_name)
evaluate_model(model, test_dataloader, criterion, device)

Number of images in the trainset: 889
Number of images in the valset: 223
Number of images in the testset: 50
Model models/segmentation_model_UNet_train100%_channels1_7.pth loaded successfully.
Average Loss on Test Set: 0.2780
Precision: 0.8307
Recall: 0.4821
F1 Score: 0.6102
Dice Score: 0.6102
IoU: 0.4390


In [11]:
channels = [1, 7, 2, 4, 3, 9, 6]
pth_model = 'models/segmentation_model_UNet_train100%_channels1_7_2_4_3_9_6.pth'
model_name = 'UNet'

model, test_dataloader, criterion, device = run_model(channels, pth_model, model_name)
evaluate_model(model, test_dataloader, criterion, device)

Number of images in the trainset: 889
Number of images in the valset: 223
Number of images in the testset: 50
Model models/segmentation_model_UNet_train100%_channels1_7_2_4_3_9_6.pth loaded successfully.
Average Loss on Test Set: 0.3865
Precision: 0.8687
Recall: 0.3934
F1 Score: 0.5415
Dice Score: 0.5415
IoU: 0.3713


Channels chosen based on focal position.

In [12]:
channels = [6]
pth_model = 'models/segmentation_model_UNet_train100%_channels6.pth'
model_name = 'UNet'

model, test_dataloader, criterion, device = run_model(channels, pth_model, model_name)
evaluate_model(model, test_dataloader, criterion, device)

Number of images in the trainset: 889
Number of images in the valset: 223
Number of images in the testset: 50
Model models/segmentation_model_UNet_train100%_channels6.pth loaded successfully.
Average Loss on Test Set: 0.3760
Precision: 0.8211
Recall: 0.2561
F1 Score: 0.3905
Dice Score: 0.3905
IoU: 0.2426


In [13]:
channels = [5, 6, 7]
pth_model = 'models/segmentation_model_UNet_train100%_channels5_6_7.pth'
model_name = 'UNet'

model, test_dataloader, criterion, device = run_model(channels, pth_model, model_name)
evaluate_model(model, test_dataloader, criterion, device)

Number of images in the trainset: 889
Number of images in the valset: 223
Number of images in the testset: 50
Model models/segmentation_model_UNet_train100%_channels5_6_7.pth loaded successfully.
Average Loss on Test Set: 0.4450
Precision: 0.8419
Recall: 0.0827
F1 Score: 0.1507
Dice Score: 0.1507
IoU: 0.0815


In [14]:
channels = [4, 5, 6, 7, 8]
pth_model = 'models/segmentation_model_UNet_train100%_channels4_5_6_7_8.pth'
model_name = 'UNet'

model, test_dataloader, criterion, device = run_model(channels, pth_model, model_name)
evaluate_model(model, test_dataloader, criterion, device)

Number of images in the trainset: 889
Number of images in the valset: 223
Number of images in the testset: 50
Model models/segmentation_model_UNet_train100%_channels4_5_6_7_8.pth loaded successfully.
Average Loss on Test Set: 1.8912
Precision: 0.1787
Recall: 0.9442
F1 Score: 0.3005
Dice Score: 0.3005
IoU: 0.1768


In [15]:
channels = [3, 4, 5, 6, 7, 8, 9]
pth_model = 'models/segmentation_model_UNet_train100%_channels3_4_5_6_7_8_9.pth'
model_name = 'UNet'

model, test_dataloader, criterion, device = run_model(channels, pth_model, model_name)
evaluate_model(model, test_dataloader, criterion, device)

Number of images in the trainset: 889
Number of images in the valset: 223
Number of images in the testset: 50
Model models/segmentation_model_UNet_train100%_channels3_4_5_6_7_8_9.pth loaded successfully.
Average Loss on Test Set: 0.5919
Precision: 0.4435
Recall: 0.7516
F1 Score: 0.5578
Dice Score: 0.5578
IoU: 0.3868
