# CryoVesNet semi automatic pipeline
disable tensorflow thousands of messages and select a GPU that is hopefully not used


In [None]:
import warnings
import os
warnings.filterwarnings("ignore")
os.environ["TF_CPP_MIN_LOG_LEVEL"]="2"
os.environ["CUDA_VISIBLE_DEVICES"]="3"

In [None]:
import cryovesnet
import numpy as np
import napari
import tqdm
import skimage
import matplotlib.pyplot as plt

In [None]:
dataset_directory = dataset_directory = "/home/bzuber/projects/cryovesnet/102_4e_trimmed/"

In [None]:
pl = cryovesnet.Pipeline(dataset_directory)
pl.setup_cryovesnet_dir()


## Run the deep vesicle? 
if you haven't ran the next cell for this dataset, then do it (you'll need a GPU);

otherwise skip it and run the next one.

In [None]:
pl.run_deep_at_multiple_rescale()

In [None]:
pl.set_array('deep_mask')

## label statistics
We can select the best initial threshold for making spheres based on statistics

In [None]:
diameter_dict = {}
for threshold in tqdm.tqdm(np.arange(0.9,1,0.01)):
    image_label = skimage.morphology.label(pl.deep_mask>threshold)
    area = skimage.measure.regionprops_table(image_label, properties=('bbox_area',))['bbox_area']
    diameter_dict[threshold] = area**(1/3)/pl.voxel_size

In [None]:
fig = plt.figure(figsize=(10,6))
fig.suptitle("diameter distribution")
_ = plt.hist(list(diameter_dict.values())[::1], label=list(diameter_dict.keys())[::1],bins=5)
_ = plt.legend()

In [None]:
fig = plt.figure(figsize=(10,6))
fig.suptitle("number of labels")
plt.xlabel("threshold")
plt.ylabel("number of labels")
plt.grid()
plt.scatter(x=list(diameter_dict.keys()), y=[len(v) for v in list(diameter_dict.values())])

## How to chose the optimal starting threshold
The higher the threshold, the lower the false positive but the higher the false negative. 
There is no single best threshold. 

On our test dataset, a threshold of 0.96 found most of the vesicles and gave only few false positives.

In [None]:
pl.label_vesicles_simply(threshold=0.96)

In [None]:
pl.make_spheres()

### optional visualization of deep_labels and sphere_labels

In [None]:
pl.visualization_old_new('deep_labels','sphere_labels')

### Outlier detection
Detect outliers using multivariate statistics on 3 parameters of the detected spheres. We get a list of the most deviating labels (those that have a mahalanobis distance larger than min_mahalanobis_distance). So far the default min_mahalanobis_distance of 2.0 is good. If the red line on the plot does not include enough outliers to your liking, then use a lower value (optional parameter of pl.identify_sphere_outliers).

In [None]:
pl.sphere_df.hist()

In [None]:
pl.identify_spheres_outliers(min_mahalanobis_distance=2)

In [None]:
pl.fix_spheres_interactively()

In [None]:
pl.visualization_old_new('sphere_labels','mancorr_labels')

It is recommended to compute a new sphere dataframe (which is one of the steps of make_spheres()) in order to identify remaining outliers.

In [None]:
pl.compute_sphere_dataframe('mancorr_labels')

In [None]:
pl.sphere_df.hist()

In [None]:
pl.identify_spheres_outliers()

In [None]:
pl.fix_spheres_interactively('mancorr_labels')

In [None]:
pl.compute_sphere_dataframe('mancorr_labels')

In [None]:
pl.identify_spheres_outliers()

In [None]:
pl.visualization_old_new('sphere_labels','mancorr_labels')

In [None]:
pl.make_full_modfile()
pl.make_full_label_file()
pl.initialize_pyto()