In [1]:
# Import libraries and dependencies
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import torch
import os

from torchvision.io import read_image
from PIL import Image
from script_util import create_model_diffusion

from SDE_datareduction import get_test_data, get_data
from main import IMAGE_DATASET_PATH, STRUCTURE_DATASET_PATH, BASE_OUTPUT

from SDE_utils import *
from SDE_test import mae, count_parameters

Script Dir C:\Users\tabor\Documents\Programming\MachineLearning\MachineLearningModels\SDE_conditioned


Create a model and print its parameters according to config

In [2]:
# Create a model and print its parameters
with open('config.json', "r", encoding="utf-8") as f:
    config = json.load(f)
    
model, *_ = create_model_diffusion('cpu', **config)

parameters = count_parameters(model)

print(f"Parameters: {parameters}")

Parameters: 581958406


Create a Comparison Plot from the results excel MAE vs Train Split

In [None]:
# --- Configuration ---
# IMPORTANT: Replace this with the actual path to your Excel file
file_path = r"C:\Users\tabor\Documents\Studie\Bachelor\Jaar 4\BEP\Results\Results_journal_comparison.xlsx"
sheet_name = "Good Val"
output_filename = 'unet_mae_distribution_plot.png' # Name for the saved plot image

# --- Load Data ---
try:
    df = pd.read_excel(file_path, sheet_name=sheet_name)
    print(f"Successfully loaded data from '{file_path}', sheet '{sheet_name}'.")
except FileNotFoundError:
    print(f"Error: File not found at '{file_path}'. Please check the path.")
    # Exit the script if the file isn't found
    exit()
except Exception as e:
    print(f"An error occurred while reading the Excel file: {e}")
    exit()

In [None]:
# --- Data Cleaning and Preparation ---
# Remove rows where 'Job ID' is missing (adjust column name if different)
if 'Job ID' in df.columns:
    df = df.dropna(subset=['Job ID'])
else:
    print("Warning: 'Job ID' column not found. Skipping dropna based on it.")

# Check if required columns exist
required_columns = ['Model', 'Attention Split', 'Train Split', 'MAE']
missing_cols = [col for col in required_columns if col not in df.columns]
if missing_cols:
    print(f"Error: Missing required columns: {', '.join(missing_cols)}. Cannot proceed.")
    exit()

# Filter data for the 'UNet' model
# Using .copy() to avoid potential SettingWithCopyWarning later
unet_data = df[df['Model'] == 'UNet'].copy()

if unet_data.empty:
    print("No data found for Model = 'UNet'. Cannot generate plot.")
    exit()

In [None]:
# --- Plotting ---
print("Generating plot...")

# Create a figure and a single axes object
fig, ax = plt.subplots(figsize=(12, 7)) # Single plot

# Get unique attention splits and assign colors
unique_attention_splits = sorted(unet_data['Attention Split'].unique())
color_map = {unique_attention_splits[0]: 'blue', unique_attention_splits[1]: 'orange'}

# --- Plot data for each Attention Split group ---
for attention_split in unique_attention_splits:
    group = unet_data[unet_data['Attention Split'] == attention_split]
    label = f"Attention Split = {attention_split}"
    current_color = color_map[attention_split]

    # 1. Plot all individual MAE points as scatter plot
    ax.scatter(
        group['Train Split'],
        group['MAE'],
        marker='x',
        alpha=0.3,  # Transparency to show density
        s=25,       # Marker size
        color=current_color,
        label='_nolegend_' # Hide scatter points from the legend
    )

    # 2. Calculate and plot the average MAE line
    average_mae = group.groupby('Train Split')['MAE'].mean().reset_index()
    # Sort by 'Train Split' in descending order for plotting with inverted axis
    average_mae = average_mae.sort_values('Train Split', ascending=False)

    ax.plot(
        average_mae['Train Split'],
        average_mae['MAE'],
        marker='o',        # Marker style
        linestyle='-',     # Line style
        linewidth=2.5,     # Line thickness
        markersize=6,      # Marker size for average points
        color=current_color,
        label=label        # Label for the legend (only for the average line)
    )

# --- Customize Plot ---
ax.set_title('UNet: MAE vs Train Size (Average and Distribution)', fontsize=16, pad=15)
ax.set_xlabel('Train Size (% of total dataset)', fontsize=12)
ax.set_ylabel('MAE (Mean Absolute Error)', fontsize=12)

# Invert the x-axis (Train Size)
ax.invert_xaxis()

# Add grid lines for better readability
ax.grid(True, which='major', linestyle='--', linewidth=0.5, alpha=0.7)
ax.grid(True, which='minor', linestyle=':', linewidth=0.5, alpha=0.5)
ax.minorticks_on() # Enable minor ticks

# Add legend for the average lines
ax.legend(title="Average MAE per Attention Split", fontsize=10, title_fontsize=11)

# Adjust layout to prevent labels from overlapping
plt.tight_layout()

In [None]:
# --- Save and Show Plot ---
try:
    plt.savefig(output_filename, dpi=300, bbox_inches='tight')
    print(f"Plot saved successfully as '{output_filename}'.")
except Exception as e:
    print(f"Error saving plot: {e}")

# Display the plot
plt.show()

Sample Images using an Existing Model

In [2]:
# Load config and establish paths for testing an existing model
folder_path0 = r"C:\Users\tabor\Documents\Studie\Bachelor\Jaar 4\BEP\Results\Results_journal\SmallUNet_nblocks_2_noisesteps_250_smartsplit_False_3906"
folder_path1 = r"C:\Users\tabor\Documents\Studie\Bachelor\Jaar 4\BEP\Results\Results_journal\results\UNet_nblocks_2_noisesteps_250_smartsplit_False_2077"
folder_paths = [folder_path0, folder_path1]
model_paths = []
configs = []
for folder_path in folder_paths:
    model_paths.append(os.path.join(folder_path, r"models\best_model.pth"))
    with open(os.path.join(folder_path, "config.json"), "r", encoding="utf-8") as f:
        config = json.load(f)
        configs.append(config)

In [4]:
# Get dataloaders, model and diffusiontools
# Get dataloaders with the first config file so that we can take the same sample for all models
config = configs[0]
train_dataloader, val_dataloader, test_dataloader, train_dataset, val_dataset, test_dataset = get_data(image_dataset_path=IMAGE_DATASET_PATH, structure_dataset_path=STRUCTURE_DATASET_PATH, **config)

Operating System: Windows. Number of DataLoader workers set to: 0
Loading dataset from: Images='C:\Users\tabor\Documents\Programming\MachineLearning\MachineLearningModels\data\figure_B_maxrange_5000\Output', Structures='C:\Users\tabor\Documents\Programming\MachineLearning\MachineLearningModels\data\figure_B_maxrange_5000\Structure'
Total dataset size: 5000 items.
Validation set size: 500
Using random split for train/test...
Train set size: 3500
Test set size: 1000


In [5]:
# Get a batch and sample images with all models to test speed for each
input_images, label_images, *_ = next(iter(test_dataloader))
sample_count = 1 # Max value equal to value of batchsize

for model_path, config in zip(model_paths, configs):
    print(f"Sampling for model {model_path}")
    model, diffusion = create_model_diffusion('cuda', **config)
    model.load_state_dict(torch.load(model_path, weights_only=True))
    samples = diffusion.p_sample_loop(model, sample_count, label_images)

Sampling for model C:\Users\tabor\Documents\Studie\Bachelor\Jaar 4\BEP\Results\Results_journal\SmallUNet_nblocks_2_noisesteps_250_smartsplit_False_3906\models\best_model.pth


01:55:09 - INFO: Sampling 1 images
249it [00:06, 35.80it/s]


Sampling for model C:\Users\tabor\Documents\Studie\Bachelor\Jaar 4\BEP\Results\Results_journal\results\UNet_nblocks_2_noisesteps_250_smartsplit_False_2077\models\best_model.pth


01:55:17 - INFO: Sampling 1 images
249it [00:08, 31.04it/s]


In [None]:
# Noise an image for a set amount of steps
target_image, label_image = test_dataset[11]
t = torch.tensor([50])
x_start = target_image.to('cuda')
print(x_start.shape)
noisy_image, *_ = diffusion.noise_images(x_start, t)

noisy_image = tensor_to_PIL(noisy_image)
target_image = tensor_to_PIL(target_image.unsqueeze(0))
label_image = tensor_to_PIL(label_image.unsqueeze(0))

print(target_image[0].size)
noisy_image[0].save(os.path.join("results", "noisy_image.png"))
target_image[0].save(os.path.join("results","target_image.png"))
label_image[0].save(os.path.join("results", "label_image.png"))
