In [10]:
import pandas as pd
import matplotlib
import comseg
import numpy as np
import random
import tifffile
import importlib
from comseg import dataset as ds
import scanpy
%matplotlib inline
import importlib
from pathlib import Path

In [11]:
#### HYPERPARAMETER ####
MEAN_CELL_DIAMETER = 15  # in micrometer
MAX_CELL_RADIUS = 50  # in micrometer
#########################

## here we present an extensive example of how to run ComSeg with 


          
config = {
    ### dataset initialisation
    "path_dataset_folder" : "/home/tom/Bureau/test_set_tutorial_comseg/small_df",
    "dict_scale" : {"x": 0.103, 'y': 0.103, "z": 0.3},
    "mean_cell_diameter" : MEAN_CELL_DIAMETER,
    "gene_column" : "gene",
    "path_to_mask_prior" : "/home/tom/Bureau/test_set_tutorial_comseg/mask",
    ### CO-EXPRESSION COMPUTATION
    "n_neighbors" : 40,
    "sampling" : True,
    "sampling_size": 10000,    
    ### KNN GRPAH 
    'k_nearest_neighbors': 10,
    'prior' : 'in_nucleus',
    ### IN SITU CLUSTERING
    'size_commu_min': 3,
    'norm_vector': True,
    'n_pcs': 4,
    'clustering_method': 'leiden',
    'n_neighbors': 20,
    'resolution': 1,
    'n_clusters_kmeans': 5,
    'nb_min_cluster': 1,
    'min_merge_correlation': 0.9,
    # RNA ASSIGMENT
    "max_cell_radius": MAX_CELL_RADIUS,
    ### final result 
    "alpha" :  0.5,
    "min_rna_per_cell" : 5,
    }

        :param k_nearest_neighbors: number of nearest neighbors to consider for the KNN graph creation, reduce K to speed computation
        :type k_nearest_neighbors: int
        :param max_cell_radius: maximum distance between a cell centroid and an RNA to be associated
        :type max_cell_radius: float
        :param size_commu_min: minimum number of RNA in a community to be considered for the clustering (default 3)
        :type size_commu_min: int
        :param norm_vector: if True, the expression vector will be normalized using the scTRANSFORM normalization parameters, the normaliztion requires the following R package : sctransform, feather, arrow
        The normalization is important to do on dataset with a high number of gene.
        :type norm_vector: bool
        :param n_pcs: number of principal component to compute for the clustering of the RNA communities expression vector; Lets 0 if no pca
        :type n_pcs: int
        :param clustering_method: choose in ["leiden", "kmeans", "louvain"]
        :type clustering_method: str
        :param n_neighbors: number of neighbors similarity graph of the RNA communities expression vector clustering
        :type n_neighbors: int
        :param resolution:  resolution paramter  for the in-situ-clustering step if louvain or leiden are used
        :type resolution: float
        :param n_clusters_kmeans: number of cluster for the kmeans clustering for ```clustering_method```= "kmeans"
        :type n_clusters_kmeans: int
        :param nb_min_cluster: minimum number of cluster to keep after the merge of the cluster
        :type nb_min_cluster: int
        :param min_merge_correlation: minimum correlation to merge cluster in the in situ clustering
        :type min_merge_correlation: float
        :param path_dataset_folder_centroid: path to the folder containing the centroid in a csv or dictionary {cell : {z:,y:,x:}} for each image, use the same scale than then input csv
        :type path_dataset_folder_centroid: str
        :param file_extension: file extension of the centroid dictionary (.npy) or csv file (.csv)
        :type file_extension: str
        :return:


In [14]:


dataset = ds.ComSegDataset(
    path_dataset_folder=config["path_dataset_folder"],
    dict_scale=config["dict_scale"],
    mean_cell_diameter=config["mean_cell_diameter"],
    gene_column=config["gene_column"],
    path_to_mask_prior=config["path_to_mask_prior"]
)

## if not already in the csv file 
dataset.add_prior_from_mask(prior_keys_name = 'in_nucleus', 
                            overwrite = True,
                            compute_centroid = True # compute cell centroid 
                           )
dataset.compute_edge_weight()

Comsegdict = dictionary.ComSegDict(
    dataset=dataset,
    mean_cell_diameter=config["mean_cell_diameter"],
    prior_name=SopaKeys.DEFAULT_CELL_KEY,
)

Comsegdict.run_all(config=config)


anndata_comseg, json_dict = Comsegdict.anndata_from_comseg_result(
    return_polygon=True, alpha=config["alpha"], min_rna_per_cell=config["min_rna_per_cell"]
)
anndata_comseg.write_h5ad(path_dataset_folder / "segmentation_counts.h5ad")
with open(path_dataset_folder / "segmentation_polygons.json", "w") as f:
    json.dump(json_dict["transcripts"], f)

add 07_CtrlNI_Pdgfra-Cy3_Serpine1-Cy5_006
add 07_CtrlNI_Pdgfra-Cy3_Serpine1-Cy5_004
add prior to 07_CtrlNI_Pdgfra-Cy3_Serpine1-Cy5_006
prior added to 07_CtrlNI_Pdgfra-Cy3_Serpine1-Cy5_006 and saved in csv file
dict_centroid added for 07_CtrlNI_Pdgfra-Cy3_Serpine1-Cy5_006 
add prior to 07_CtrlNI_Pdgfra-Cy3_Serpine1-Cy5_004
prior added to 07_CtrlNI_Pdgfra-Cy3_Serpine1-Cy5_004 and saved in csv file
dict_centroid added for 07_CtrlNI_Pdgfra-Cy3_Serpine1-Cy5_004 


  0%|                                                                                                                                                                                                             | 0/2 [00:00<?, ?it/s]

image name :  07_CtrlNI_Pdgfra-Cy3_Serpine1-Cy5_006


 50%|██████████████████████████████████████████████████████████████████████████████████████████████████▌                                                                                                  | 1/2 [00:04<00:04,  4.26s/it]

image name :  07_CtrlNI_Pdgfra-Cy3_Serpine1-Cy5_004


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:08<00:00,  4.47s/it]


count_matrix.shape (10002, 13)
sampling True vectors
count_matrix.shape (10000, 13)


100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 13/13 [00:00<00:00, 161.34it/s]


NameError: name 'dictionary' is not defined