## Hierarchical DivNoising (HDN)

## Bootstrapping Noise Model

In [None]:
import torch
from hdn.lib.gaussianMixtureNoiseModel import GaussianMixtureNoiseModel
from hdn.lib import histNoiseModel
from hdn.lib.utils import plotProbabilityDistribution

import tifffile
import numpy as np
import matplotlib.pyplot as plt
import os
import argparse
import logging as log

import matplotlib.pyplot as plt
import matplotlib.animation as animation
from PIL import Image
from pathlib import Path
import numpy as np


# Parameters

experiment_name = "REFINED_HDN_1"

signal_folder = "/localscratch/calcium_imaging_dataset/calcium_imaging/refined/train"
denoised_folder = "output/REFINED_N2V_1/train"
# FIXME: Experiment is currently running using results folder instead of models
output_folder = f"models/{experiment_name}/"

device = "cuda"
n_coeff = 2
n_gaussian=3
gmm_epochs = 2000
histogram_bins = 256

In [2]:
# Original dataset
signal_tiff = list(Path(signal_folder).rglob("*.tif"))
denoised_tiff = list(Path(denoised_folder).rglob("*.tif"))
input_tiff = list()
# Ensure signal and denoised files are loaded together
for stiff in signal_tiff:
    dtiff = [t for t in list(denoised_tiff) if t.name == stiff.name][0]
    input_tiff.append((stiff, dtiff))

signal = []
denoised = []

for tsig, tden in zip(signal_tiff, denoised_tiff):
    signal.append(tifffile.imread(tsig).flatten())
    denoised.append(tifffile.imread(tden).flatten())
signal = np.concatenate(signal, axis=0)
denoised = np.concatenate(denoised, axis=0)
minval, maxval = signal.min(), signal.max()
signal.shape, denoised.shape

((2116026368,), (2116026368,))

Picking 20% of the pixels to tweak the GMM noise model.

In [3]:
random_perc = 0.20
random_indices = np.random.choice(signal.shape[0], int(signal.shape[0]*random_perc), replace=False)
signal = signal[random_indices]
denoised = denoised[random_indices]

In [4]:
signal.shape, denoised.shape

((423205273,), (423205273,))

Training a GMM model with 3 components on the sampled pixels.

In [6]:
histogram = histNoiseModel.createHistogram(bins=histogram_bins, 
                                           minVal=minval, 
                                           maxVal=maxval, 
                                           observation=denoised, 
                                           signal=signal)
histogramFD = histogram[0]


KeyboardInterrupt: 

In [None]:
# Create output folder and save histogram
Path(output_folder).joinpath("noise_model").mkdir(exist_ok=True, parents=True)
np.save(str(Path(output_folder).joinpath("noise_model").joinpath("histogram.npy")), histogram)

gaussianMixtureNoiseModel = GaussianMixtureNoiseModel(min_signal = minval, max_signal = maxval, path=str(Path(output_folder).joinpath("noise_model"))+'/', weight = None, n_gaussian = n_gaussian, n_coeff = n_coeff, device = device, min_sigma = 50)


In [None]:
# Train GMM
gaussianMixtureNoiseModel.train(signal, denoised, batchSize = 250000, n_epochs = gmm_epochs, learning_rate = 0.1, name = 'GMM', lowerClip = 0.1, upperClip = 99.9)

0 3.641833543777466
100 3.60882830619812

The trained parameters (GMM) is saved at location: models/REFINED_HDN_1//


In [None]:
!pip install -U ipywidgets

In [12]:
import numpy as np
import torch
import matplotlib.pyplot as plt
from ipywidgets import interact, FloatSlider

def plot_gmm_likelihood(gaussianMixtureNoiseModel, min_signal, max_signal, n_bin, device):
    """
    Interactive function to analyze the GMM likelihood for various signal values.
    Uses ipywidgets for dynamic interaction.
    
    Args:
        gaussianMixtureNoiseModel: A GMM with a `likelihood(observations, signal)` method.
        min_signal (float): Minimum signal value.
        max_signal (float): Maximum signal value.
        n_bin (int): Number of bins for discretizing the signal range.
        device (torch.device): Device for tensor computations (CPU/GPU).
    """
    def update(signal_value):
        # Discretize the observation range
        bin_size = (max_signal - min_signal) / n_bin
        observation_values = np.arange(min_signal, max_signal, bin_size) + bin_size / 2
        observations_torch = torch.from_numpy(observation_values).float().to(device)
        
        # Convert the signal value to tensor
        signal_torch = torch.tensor(signal_value, dtype=torch.float32).to(device)
        
        # Compute likelihood from the GMM
        likelihood_torch = gaussianMixtureNoiseModel.likelihood(observations_torch, signal_torch)
        likelihood_numpy = likelihood_torch.cpu().detach().numpy()
        
        # Plot the likelihood
        plt.figure(figsize=(10, 5))
        plt.plot(observation_values, likelihood_numpy, label=f'GMM Likelihood (s = {signal_value:.2f})', color='red', linewidth=2)
        plt.ylim(0, 1)  # Fix y-axis scale
        plt.xlabel('Observation (x)')
        plt.ylabel('Probability Density')
        plt.title(f'Probability Distribution P(x|s) for Signal s = {signal_value:.2f}')
        plt.legend()
        plt.grid(True)
        plt.show()
    
    # Create interactive slider for signal value
    interact(update, signal_value=FloatSlider(value=(min_signal + max_signal) / 2, 
                                              min=min_signal, max=max_signal, step=(max_signal - min_signal) / 100))

plot_gmm_likelihood(gaussianMixtureNoiseModel, minval, maxval, 100, device)

interactive(children=(FloatSlider(value=881.0, description='signal_value', max=1047.0, min=715.0, step=3.32), …

In [7]:
# Load the GMM model
gaussianMixtureNoiseModel = GaussianMixtureNoiseModel(min_signal = minval, 
                                                      max_signal = maxval, 
                                                      path=output_folder+'/', 
                                                      weight = None, 
                                                      n_gaussian = n_gaussian,
                                                      n_coeff = n_coeff,
                                                      device = device, 
                                                      min_sigma = 50, 
                                                      params=np.load(output_folder+'GMM.npz', allow_pickle=True))



In [6]:
gaussianMixtureNoiseModel.likelihood()

NpzFile 'models/REFINED_HDN_1/GMM.npz' with keys: trained_weight, min_signal, max_signal, min_sigma

In [None]:
histogram_bins = 250
histogram = histNoiseModel.createHistogram(bins=histogram_bins, minVal=minval, maxVal=maxval, observation=denoised, signal=signal)