# Libraries

In [1]:
from pathlib import Path
from metadata import ImageDataset, patient
import utils
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import SimpleITK as sitk

In [2]:
notebooks_path = Path.cwd()
repo_path = notebooks_path.parent
print(f'The current directory is: {notebooks_path}')

The current directory is: /home/ricardino/Documents/MAIA/tercer_semestre/MISA/final_project/MISA_FINAL_PROJECT/notebooks


# MAIN

We now have the propagated labels, as well as a metric for the similarity between the registered images.<br>
We can now build several versions of the atlas.
We start with the most common, the **probabilistic atlas**.

## Probabilistic mean atlas

- All atlases (labels) are summed up and divided by the number of images. This is basically weighted voting with the same weight for all images (1/n).

In [3]:
#For each patient in the validation, we will accumulate the labels, summing them.
im_data_val = ImageDataset(set_name='Validation')
im_data_train = ImageDataset(set_name='Training')
#dataframe to store probabilistic atlas
df_mean_atlas = None

for id_val in im_data_val.IDs:
    #instantiate patient
    pat_val = patient(id_val, im_data_val)
    #accumulated label array
    accumulated_label = np.zeros((4,) + pat_val.im(format='np').shape)
    for id_train in im_data_train.IDs:
        moved_label_path = repo_path / 'data'/'voxelmorph'/f'moved_labels_{id_train}_to_{id_val}.nii.gz'
        moved_label = utils.getArrayfromPath(moved_label_path)
        #accumulate per tissue
        for tissue in range(1,4):
            accumulated_label[tissue] += (moved_label==tissue)
    #divide by the number of labels
    accumulated_label /= im_data_train.len

    #Now we can take the argmax of the accumulated label to get the final mean atlas label
    mean_label = np.argmax(accumulated_label, axis=0)
    #compute the metrics
    df_metrics = utils.compute_metrics(mean_label, pat_val, id_val)
    #concatenate metrics in main df
    df_mean_atlas = pd.concat([df_mean_atlas, df_metrics], axis=0)
#save csv
df_mean_atlas.to_csv(repo_path / 'data'/'results'/f'mean_atlas_metrics.csv', index=False)

## Weighted atlas

- Similar to the previous one, but the weights are not the same for all images. The weights are the similarity metric between the registered images and the target (validation) image.

In [4]:
#Define image datasets
im_data_val = ImageDataset(set_name='Validation')
im_data_train = ImageDataset(set_name='Training')
#dataframe to store probabilistic atlas
df_weightedAtlas = None

for id_val in im_data_val.IDs:
    #instantiate patient
    pat_val = patient(id_val, im_data_val)
    #array to store the weighted atlas
    weighted_atlas = np.zeros((4,) + pat_val.im(format='np').shape)

    #Compute the sum of similarity metrics for this validation patient
    df_mostSimilar = pd.read_csv(repo_path / 'data'/'results'/'most_similar'/ f'most_similar_{id_val}.csv')
    #get metric values and sum them up
    sigma = df_mostSimilar['metric'].abs().sum()

    for id_train in im_data_train.IDs:
        moved_label_path = repo_path / 'data'/'voxelmorph'/f'moved_labels_{id_train}_to_{id_val}.nii.gz'
        moved_label = utils.getArrayfromPath(moved_label_path)
        #get similarity metric value
        simMetric = df_mostSimilar[df_mostSimilar['id_train']==int(id_train)]['metric'].abs().values[0]
        #accumulate per tissue
        for tissue in range(1,4):
            weighted_atlas[tissue] += (moved_label==tissue)*(simMetric/sigma) #wieghting by the similarity metric

    #get argmax of the weighted atlas
    weighted_label = np.argmax(weighted_atlas, axis=0)
    #compute the metrics
    df_metrics = utils.compute_metrics(weighted_label, pat_val, id_val)
    #concatenate in df_maxM
    df_weightedAtlas = pd.concat([df_weightedAtlas, df_metrics], axis=0)
#save as csv
df_weightedAtlas.to_csv(repo_path / 'data'/'results'/f'weighted_labels_metrics.csv', index=False)

## Top atlases
- Now only atlases with high similarity to the target image are used. Basically like mean atlas but with a threshold on the similarity metric.
    - Additionally, the max number of atlases can be set (3 by default).

In [6]:
#For each patient in the validation, we will accumulate the labels, summing them.
im_data_val = ImageDataset(set_name='Validation')
im_data_train = ImageDataset(set_name='Training')
#dataframe to store probabilistic atlas
df_top_atlases = None
for id_val in im_data_val.IDs:
    #instantiate patient
    pat_val = patient(id_val, im_data_val)
    #accumulated label array
    accumulated_label = np.zeros((4,) + pat_val.im(format='np').shape)
    #we'll go through the top 3 atlases
    df_mostSimilar = pd.read_csv(repo_path / 'data'/'results'/'most_similar'/ f'most_similar_{id_val}.csv', dtype={'id_train': str})
    #get the top 3 atlases
    df_mostSimilar = df_mostSimilar.iloc[:3]
    #remove the last row if the metric values differences are greater than 0.05
    df_mostSimilar = df_mostSimilar.iloc[:-1] if df_mostSimilar['metric'].diff().abs().max()>0.05 else df_mostSimilar
    for id_train in df_mostSimilar['id_train']:
        moved_label_path = repo_path / 'data'/'voxelmorph'/f'moved_labels_{id_train}_to_{id_val}.nii.gz'
        moved_label = utils.getArrayfromPath(moved_label_path)
        #accumulate per tissue
        for tissue in range(1,4):
            accumulated_label[tissue] += (moved_label==tissue)
    #divide by the number of labels
    accumulated_label /= len(df_mostSimilar['id_train'])
    #save the accumulated label (top probabilistic atlas) as nifti
    for tissue in range(3):
        filename = str(repo_path / 'data'/'atlas_data'/f'top_prob_atlas_{id_val}_{tissue}.nii.gz')
        utils.save_as_nifti(accumulated_label[tissue+1], filename, pat_val.labels_path, dtype=np.float32)

    #Now we can take the argmax of the accumulated label to get the final mean atlas label
    top_atlases_label = np.argmax(accumulated_label, axis=0)
    #compute the metrics
    df_metrics = utils.compute_metrics(top_atlases_label, pat_val, id_val)
    #concatenate in df_maxM
    df_top_atlases = pd.concat([df_top_atlases, df_metrics], axis=0)
    
    #save segmentation as nifti
    filename = str(repo_path / 'data'/'segmentations'/f'top_atlases_{id_val}_seg.nii.gz')
    reference_path = pat_val.labels_path
    utils.save_as_nifti(top_atlases_label, filename, reference_path)
#save as csv
df_top_atlases.to_csv(repo_path / 'data'/'results'/f'top_atlases_metrics.csv', index=False)

## Bayesian atlas
- Finally we can combine the probabilistic atlas and the tissue model to obtain a combination of bth intensity and spatial information
    - The probabilistic atlas that we choose is the top probabilistic atlas (using only the infomation of the top 3 atlases) as we consider it more reliable than the mean atlas.
    - The tissue model is the one that we got in another notebook.

In [None]:
#For each patient in the validation, we will accumulate the labels, summing them.
im_data_val = ImageDataset(set_name='Validation')
im_data_train = ImageDataset(set_name='Training')
#dataframe to store probabilistic atlas
df_bayesian = None

for id_val in im_data_val.IDs:
    pat_val = patient(id_val, im_data_val)
    bayesian_atlas = np.zeros((4,) + pat_val.labels(format='np').shape)
    for tissue_num in range(3):
        #get paths
        TModel_prob_path = repo_path / 'data'/'atlas_data'/f'TModel_prob_{id_val}_{tissue_num}.nii.gz'
        prob_atlas_path = repo_path / 'data'/'atlas_data'/f'top_prob_atlas_{id_val}_{tissue_num}.nii.gz'
        #get arrays
        TModel_prob = utils.getArrayfromPath(TModel_prob_path, dtype=np.float32)
        prob_atlas = utils.getArrayfromPath(prob_atlas_path, dtype=np.float32)
        #get the bayesian atlas
        bayesian_atlas[tissue_num+1] = TModel_prob*prob_atlas
    #get argmax to get the final bayesian atlas
    bayesian_label = np.argmax(bayesian_atlas, axis=0)
    
    #compute the metrics
    df_metrics = utils.compute_metrics(bayesian_label, pat_val, id_val)
    #concatenate in df_bayesian
    df_bayesian = pd.concat([df_bayesian, df_metrics], axis=0)
    
    #save segmentation as nifti
    filename = str(repo_path / 'data'/'segmentations'/f'bayesian_{id_val}_seg.nii.gz')
    reference_path = pat_val.labels_path
    utils.save_as_nifti(bayesian_label, filename, reference_path)
#save as csv
df_bayesian.to_csv(repo_path / 'data'/'results'/f'bayesian_metrics.csv', index=False)