In [1]:
#Imports

import os
import numpy as np
import cv2
from random import randint
import matplotlib.pyplot as plt

from PIL import Image

import torch

import torchvision.transforms as transforms
import numpy as np

import models
import dataprep
import evalmetrics
import torch.nn as nn
import custom_dataset

from torch.utils.data import Dataset, DataLoader, random_split

# Loading Models

In [3]:
#For inference, we can still to cpu
device = torch.device("cpu")


################# Load EU-Net
checkpoint = torch.load('eunet_trained2.pth',map_location=torch.device('cpu'))

# Create a new instance of your model
model_e = models.EdgeU1_Net(img_ch=3,output_ch=1)
model_e.to(device)

# Load the model state from the checkpoint
model_e.load_state_dict(checkpoint['model_state_dict'])

# Load the optimizer state from the checkpoint, if needed
optimizer_e = torch.optim.Adam(model_e.parameters())
optimizer_e.load_state_dict(checkpoint['optimizer_state_dict'])

model_e.to(device)
model_e.eval() 

################# Load AU-Net
checkpoint = torch.load('aunet_trained2.pth',map_location=torch.device('cpu'))

# Create a new instance of your model
model_a = models.AttU_Net(img_ch=3,output_ch=1)
model_a.to(device)

# Load the model state from the checkpoint
model_a.load_state_dict(checkpoint['model_state_dict'])

# Load the optimizer state from the checkpoint, if needed
optimizer_a = torch.optim.Adam(model_a.parameters())
optimizer_a.load_state_dict(checkpoint['optimizer_state_dict'])

model_a.to(device)
model_a.eval()

################# Load U-Net
checkpoint = torch.load('unet_trained2.pth',map_location=torch.device('cpu'))

# Create a new instance of your model
model_u = models.U_Net(img_ch=3,output_ch=1)
model_u.to(device)

# Load the model state from the checkpoint
model_u.load_state_dict(checkpoint['model_state_dict'])

# Load the optimizer state from the checkpoint, if needed
optimizer_u = torch.optim.Adam(model_u.parameters())
optimizer_u.load_state_dict(checkpoint['optimizer_state_dict'])

model_u.to(device)
model_u.eval()

U_Net(
  (Maxpool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (Conv1): conv_block(
    (conv): Sequential(
      (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU(inplace=True)
      (3): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (4): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (5): ReLU(inplace=True)
    )
  )
  (Conv2): conv_block(
    (conv): Sequential(
      (0): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU(inplace=True)
      (3): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (4): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (5): ReLU(inplace=True)
    )

# Loading Test dataset and preparing DataLoader

In [13]:
test_dataset = torch.load('testc_dataset.pth')
test_loader = DataLoader(test_dataset, batch_size = 8)

### Running U-Net on test data and evaluating metrics

In [14]:
model_u.to(device)

# Initialize accumulators for each metric
sensitivity_sum = 0
specificity_sum = 0
precision_sum = 0
F1_sum = 0
JS_sum = 0
DC_sum = 0
num_batches = 0

# Loop over the test dataset
for data, labels in test_loader:
    # Perform inference
    
    
    data = data.to(device)
    labels = labels.to(device)
    preds = model_u(data)

    # Evaluate the predictions
    sensitivity = evalmetrics.get_sensitivity(preds, labels)
    specificity = evalmetrics.get_specificity(preds, labels)
    precision = evalmetrics.get_precision(preds, labels)
    F1 = evalmetrics.get_F1(preds, labels)
    JS = evalmetrics.get_JS(preds, labels)
    DC = evalmetrics.get_DC(preds, labels)

    # Accumulate the metric values
    sensitivity_sum += sensitivity * len(data)
    specificity_sum += specificity * len(data)
    precision_sum += precision * len(data)
    F1_sum += F1 * len(data)
    JS_sum += JS * len(data)
    DC_sum += DC * len(data)
    num_batches += 1

# Compute the average of each metric
sensitivity_avg_u = sensitivity_sum / len(test_loader.dataset)
specificity_avg_u = specificity_sum / len(test_loader.dataset)
precision_avg_u = precision_sum / len(test_loader.dataset)
F1_avg_u = F1_sum / len(test_loader.dataset)
JS_avg_u = JS_sum / len(test_loader.dataset)
DC_avg_u = DC_sum / len(test_loader.dataset)

# Print the average of each metric
print("Average Sensitivity: {:.4f}".format(sensitivity_avg_u))
print("Average Specificity: {:.4f}".format(specificity_avg_u))
print("Average Precision: {:.4f}".format(precision_avg_u))
print("Average F1 Score: {:.4f}".format(F1_avg_u))
print("Average Jaccard Similarity: {:.4f}".format(JS_avg_u))
print("Average Dice Coefficient: {:.4f}".format(DC_avg_u))

Average Sensitivity: 0.8886
Average Specificity: 0.9993
Average Precision: 0.9888
Average F1 Score: 0.9186
Average Jaccard Similarity: 0.8822
Average Dice Coefficient: 0.9186


### Running AU-Net on test data and evaluating metrics

In [15]:
model_a.to(device)

# Initialize accumulators for each metric
sensitivity_sum = 0
specificity_sum = 0
precision_sum = 0
F1_sum = 0
JS_sum = 0
DC_sum = 0
num_batches = 0

# Loop over the test dataset
for data, labels in test_loader:
    # Perform inference
    data = data.to(device)
    labels = labels.to(device)
    preds = model_a(data)

    # Evaluate the predictions
    sensitivity = evalmetrics.get_sensitivity(preds, labels)
    specificity = evalmetrics.get_specificity(preds, labels)
    precision = evalmetrics.get_precision(preds, labels)
    F1 = evalmetrics.get_F1(preds, labels)
    JS = evalmetrics.get_JS(preds, labels)
    DC = evalmetrics.get_DC(preds, labels)

    # Accumulate the metric values
    sensitivity_sum += sensitivity * len(data)
    specificity_sum += specificity * len(data)
    precision_sum += precision * len(data)
    F1_sum += F1 * len(data)
    JS_sum += JS * len(data)
    DC_sum += DC * len(data)
    num_batches += 1

# Compute the average of each metric
sensitivity_avg_a = sensitivity_sum / len(test_loader.dataset)
specificity_avg_a = specificity_sum / len(test_loader.dataset)
precision_avg_a = precision_sum / len(test_loader.dataset)
F1_avg_a = F1_sum / len(test_loader.dataset)
JS_avg_a = JS_sum / len(test_loader.dataset)
DC_avg_a = DC_sum / len(test_loader.dataset)

# Print the average of each metric
print("Average Sensitivity: {:.4f}".format(sensitivity_avg_a))
print("Average Specificity: {:.4f}".format(specificity_avg_a))
print("Average Precision: {:.4f}".format(precision_avg_a))
print("Average F1 Score: {:.4f}".format(F1_avg_a))
print("Average Jaccard Similarity: {:.4f}".format(JS_avg_a))
print("Average Dice Coefficient: {:.4f}".format(DC_avg_a))


Average Sensitivity: 0.9160
Average Specificity: 0.9977
Average Precision: 0.9537
Average F1 Score: 0.9293
Average Jaccard Similarity: 0.8957
Average Dice Coefficient: 0.9293


### Running EU-Net on test data and evaluating metrics

In [16]:
model_e.to(device)

# Initialize accumulators for each metric
sensitivity_sum = 0
specificity_sum = 0
precision_sum = 0
F1_sum = 0
JS_sum = 0
DC_sum = 0
num_batches = 0

# Loop over the test dataset
for data, labels in test_loader:
    # Perform inference
    
    data = data.to(device)
    labels = labels.to(device)
    preds = model_e(data)

    # Evaluate the predictions
    sensitivity = evalmetrics.get_sensitivity(preds, labels)
    specificity = evalmetrics.get_specificity(preds, labels)
    precision = evalmetrics.get_precision(preds, labels)
    F1 = evalmetrics.get_F1(preds, labels)
    JS = evalmetrics.get_JS(preds, labels)
    DC = evalmetrics.get_DC(preds, labels)

    # Accumulate the metric values
    sensitivity_sum += sensitivity * len(data)
    specificity_sum += specificity * len(data)
    precision_sum += precision * len(data)
    F1_sum += F1 * len(data)
    JS_sum += JS * len(data)
    DC_sum += DC * len(data)
    num_batches += 1

# Compute the average of each metric
sensitivity_avg_e = sensitivity_sum / len(test_loader.dataset)
specificity_avg_e = specificity_sum / len(test_loader.dataset)
precision_avg_e = precision_sum / len(test_loader.dataset)
F1_avg_e = F1_sum / len(test_loader.dataset)
JS_avg_e = JS_sum / len(test_loader.dataset)
DC_avg_e = DC_sum / len(test_loader.dataset)

# Print the average of each metric
print("Average Sensitivity: {:.4f}".format(sensitivity_avg_e))
print("Average Specificity: {:.4f}".format(specificity_avg_e))
print("Average Precision: {:.4f}".format(precision_avg_e))
print("Average F1 Score: {:.4f}".format(F1_avg_e))
print("Average Jaccard Similarity: {:.4f}".format(JS_avg_e))
print("Average Dice Coefficient: {:.4f}".format(DC_avg_e))

Average Sensitivity: 0.9123
Average Specificity: 0.9987
Average Precision: 0.9788
Average F1 Score: 0.9358
Average Jaccard Similarity: 0.9004
Average Dice Coefficient: 0.9358


In [18]:
# Define the metric names
metric_names = ['Sensitivity', 'Specificity', 'Precision', 'F1 Score', 'Jaccard Similarity', 'Dice Coefficient']

# Define the model names
model_names = ['model_u', 'model_a', 'model_e']

# Define a dictionary to store the metric values for each model
model_metrics = {
    'model_u': [sensitivity_avg_u, specificity_avg_u, precision_avg_u, F1_avg_u, JS_avg_u, DC_avg_u],
    'model_a': [sensitivity_avg_a, specificity_avg_a, precision_avg_a, F1_avg_a, JS_avg_a, DC_avg_a],
    'model_e': [sensitivity_avg_e, specificity_avg_e, precision_avg_e, F1_avg_e, JS_avg_e, DC_avg_e]
}

# Print the table
print("{:<20}{:<20}{:<20}{:<20}".format('', *model_names))
for i in range(len(metric_names)):
    print("{:<20}{:<20.4f}{:<20.4f}{:<20.4f}".format(metric_names[i], model_metrics[model_names[0]][i], model_metrics[model_names[1]][i], model_metrics[model_names[2]][i]))


                    model_u             model_a             model_e             
Sensitivity         0.8886              0.9160              0.9123              
Specificity         0.9993              0.9977              0.9987              
Precision           0.9888              0.9537              0.9788              
F1 Score            0.9186              0.9293              0.9358              
Jaccard Similarity  0.8822              0.8957              0.9004              
Dice Coefficient    0.9186              0.9293              0.9358              
