## Data Preparation

We use data that is contained in the provided "Test Data" folder as they have enough dynamic range to extract information.

Test data has been originally subtracted with the microscope ground noise. We added it back to obtain raw data.


In [None]:
import numpy as np
import tifffile
import seaborn as sns
from pathlib import Path
import matplotlib.pyplot as plt


DATASET_FOLDER = "/localscratch/calcium_imaging_dataset/calcium_imaging/refined"
datast_path = Path(DATASET_FOLDER)
average_image_path = datast_path.joinpath("..", "average_image.tif")

In [None]:
APPLY_AVERAGE = False # Safelock to avoid altering the dataset twice

if APPLY_AVERAGE:
    average_img = tifffile.imread(average_image_path)

    for im_path in list(datast_path.rglob("*.tif")):
        tifffile.imwrite(im_path, tifffile.imread(im_path) + average_img)
        print(f"Added average noise to {im_path}")


    frame_to_show = 10
    fig, ax = plt.subplots(1, 3, figsize=(18, 6))
    ax[0].imshow(average_img, cmap="grey")
    ax[0].set_title(f"Microscope Noise")
    ax[1].imshow(img[frame_to_show], cmap="grey")
    ax[1].set_title(f"Clean Test Data (Frame {frame_to_show})")
    ax[2].imshow(img[frame_to_show] + average_img, cmap="grey")
    ax[2].set_title(f"Raw Data (Frame {frame_to_show})")
    for a in ax:
        a.axis("off")

![](docs/imgs/test_data_example.png)

## Autocorrelation

In [None]:
import tifffile
import matplotlib.pyplot as plt
from careamics.utils import autocorrelation

train_dataset_path = datast_path.joinpath("train")
train_imgs_path = list(train_dataset_path.rglob("*.tif"))

frame = 10
half_size = 16

fig, axes = plt.subplots(1, len(train_imgs_path), figsize=(6*len(train_imgs_path), 6))

for ip, ax in zip(train_imgs_path, axes):
    img = tifffile.imread(ip)
    # Compute autocorrelation at given frame
    f, h, w = img.shape
    
    center_x = (w - 1) // 2
    center_y = (h - 1) // 2
    left_edge = max(0, center_x - half_size)
    right_edge = min(w, center_x + half_size)
    bottom_edge = max(0, center_y - half_size)
    top_edge = min(h, center_y + half_size)
    cropped_img = img[frame, bottom_edge:top_edge, left_edge:right_edge]

    ac = autocorrelation(image=cropped_img)

    ax.imshow(ac)
    ax.axis("off")
    ax.set_title(f"Autocorrelation around {half_size} px from the center. \n {ip.name}, frame {frame}")



# TRAINING Noise2Void

To train Noise2Void, we can use the script provided in this repository, which is based on CAREamics. Run the following command after changing the paths to match your dataset folders either in the provided .env_* file, or by specifying them as arguments:

```python n2v_train.py -e .env_hpc --level DEBUG --train_dataset_name=train --validation_dataset_name=val --DATASET_FOLDER=/scratch/edoardo.giacomello/calcium_imaging/refined --experiment_name=REFINED_N2V_1 --batch_size=1 --axes=TYX```

If needed, you can call the python script with the -h file to obtain a description of the arguments. This script has been written to be able to run also in headless machines (like an HPC).

After training, the trained model is stored in models/REFINED_N2V_1.

# Noise2Void Predictions

We now use CAREamics the trained model to predict both the training and validation folds of the dataset.

In [None]:
from envutils import ENV, load_env, get_argparser, log

from pathlib import Path
import os
import tifffile
import matplotlib.pyplot as plt
import numpy as np

from careamics import CAREamist
import careamics.dataset.tiling as tiling
from careamics.prediction_utils import stitch_prediction_single
from typing import List

In [None]:

dataset_folder = "/localscratch/calcium_imaging_dataset/calcium_imaging/refined"
models_folder = "./models"
output_folder = "./output"
experiment_name = "REFINED_N2V_1"
patch_size_z=None
patch_size=64
batch_size=16
axes="TYX"
model_ckpt='last.ckpt'


# Variables and Paths
model_folder = os.path.join(models_folder, experiment_name)
ckpt_path = os.path.join(model_folder, 'checkpoints', model_ckpt)
output_path = os.path.join(output_folder, experiment_name)
os.makedirs(output_path, exist_ok=True)

# instantiate a CAREamist
careamist = CAREamist(
    ckpt_path,
    work_dir=model_folder, 
)

for dataset_name in ['train', 'val']:
    # Predicts over files
    for tiff_path_in in sorted(Path(dataset_folder).joinpath(dataset_name).glob(f'*.tif*')):

        print(f"Predicting file {tiff_path_in}")
        tiff_in = tifffile.imread(tiff_path_in)

        tiff_out = []
        for slice in tiff_in:
            tiff_out += careamist.predict(slice, data_type='array', axes="YX")

        tiff_out = np.concatenate(tiff_out).squeeze()

        tiff_path_out = Path.joinpath(Path(output_path), dataset_name, tiff_path_in.name)
        print(f"Writing prediction to {tiff_path_out}")
        os.makedirs(output_path, exist_ok=True)
        tifffile.imwrite(tiff_path_out, tiff_out)

Images are now stored in `output/REFINED_N2V_1/[train|val]/*.tif`. 

If we inspect the validation stack, we can appreciate that the uncorrelated noise has been greatly reduced:

![20ms_20%25_Yoda1_008, Frame 10](docs/imgs/n2v_20ms_20%25_Yoda1_008_10.png)

However, some correlated noise is still present.

In order to also remove correlated noise we can feed the dataset in a second algorithm. \
Possible options could be:
- [HDN]() a denoising algorithm that is able to remove correlated noise. However, it requires a Noise Model to be estimated.
- [COSDD](https://github.com/krulllab/COSDD), a denoising algorithm specific for horizontally / vertically correlated noise. One current limitation of this method is that only one direction of correlation can be removed at a time.
