diff --git a/dynamo/preprocessing/Preprocessor.py b/dynamo/preprocessing/Preprocessor.py index d060ee3f6..2829486d4 100644 --- a/dynamo/preprocessing/Preprocessor.py +++ b/dynamo/preprocessing/Preprocessor.py @@ -452,6 +452,7 @@ def preprocess_adata_seurat_wo_pca( self.standardize_adata(adata, tkey, experiment_type) self._filter_cells_by_outliers(adata) self._filter_genes_by_outliers(adata) + self._calc_size_factor(adata) self._normalize_by_cells(adata) self._select_genes(adata) self._norm_method(adata) @@ -787,6 +788,7 @@ def preprocess_adata_monocle_pearson_residuals( self._force_gene_list(adata) X_copy = adata.X.copy() + self._calc_size_factor(adata) self._normalize_by_cells(adata) adata.X = X_copy self._normalize_selected_genes(adata) diff --git a/dynamo/preprocessing/normalization.py b/dynamo/preprocessing/normalization.py index 991f599cf..f67d2a962 100644 --- a/dynamo/preprocessing/normalization.py +++ b/dynamo/preprocessing/normalization.py @@ -168,26 +168,29 @@ def get_sz_exprs( corresponding to the size factor. """ - if layer == "raw": - CM = adata.raw.X - szfactors = adata.obs[layer + "Size_Factor"].values[:, None] - elif layer == "X": - CM = adata.X - szfactors = adata.obs["Size_Factor"].values[:, None] - elif layer == "protein": - if "protein" in adata.obsm_keys(): - CM = adata.obsm[layer] - szfactors = adata.obs["protein_Size_Factor"].values[:, None] + try: + if layer == "raw": + CM = adata.raw.X + szfactors = adata.obs[layer + "Size_Factor"].values[:, None] + elif layer == "X": + CM = adata.X + szfactors = adata.obs["Size_Factor"].values[:, None] + elif layer == "protein": + if "protein" in adata.obsm_keys(): + CM = adata.obsm[layer] + szfactors = adata.obs["protein_Size_Factor"].values[:, None] + else: + CM, szfactors = None, None else: - CM, szfactors = None, None - else: - CM = adata.layers[layer] - szfactors = adata.obs[layer + "_Size_Factor"].values[:, None] + CM = adata.layers[layer] + szfactors = adata.obs[layer + "_Size_Factor"].values[:, None] - if total_szfactor is not None and total_szfactor in adata.obs.keys(): - szfactors = adata.obs[total_szfactor][:, None] - elif total_szfactor is not None: - main_warning("`total_szfactor` is not `None` and it is not in adata object.") + if total_szfactor is not None and total_szfactor in adata.obs.keys(): + szfactors = adata.obs[total_szfactor][:, None] + elif total_szfactor is not None: + main_warning("`total_szfactor` is not `None` and it is not in adata object.") + except KeyError: + raise KeyError(f"Size factor for layer {layer} is not in adata object. Please run `dynamo.tl.calc_sz_factor`.") return szfactors, CM @@ -231,22 +234,23 @@ def normalize( An updated anndata object that are updated with normalized expression values for different layers. """ + layers = DKM.get_available_layer_keys(adata, layers) + if recalc_sz: if "use_for_pca" in adata.var.columns and keep_filtered is False: adata = adata[:, adata.var.loc[:, "use_for_pca"]] adata.obs = adata.obs.loc[:, ~adata.obs.columns.str.contains("Size_Factor")] - layers = DKM.get_available_layer_keys(adata, layers) - - calc_sz_factor( - adata, - layers=layers, - locfunc=np.nanmean, - round_exprs=False, - method=sz_method, - scale_to=scale_to, - ) + if np.count_nonzero(adata.obs.columns.str.contains("Size_Factor")) < len(layers): + calc_sz_factor( + adata, + layers=layers, + locfunc=np.nanmean, + round_exprs=False, + method=sz_method, + scale_to=scale_to, + ) excluded_layers = DKM.get_excluded_layers( X_total_layers=X_total_layers, diff --git a/dynamo/tools/clustering.py b/dynamo/tools/clustering.py index 9f274ceb8..70c13be52 100644 --- a/dynamo/tools/clustering.py +++ b/dynamo/tools/clustering.py @@ -12,7 +12,7 @@ from ..configuration import DKM from ..dynamo_logger import main_info -from ..preprocessing.normalization import normalize +from ..preprocessing.normalization import calc_sz_factor, normalize from ..preprocessing.QC import filter_genes_by_outliers as filter_genes from ..preprocessing.pca import pca from ..preprocessing.transform import log1p @@ -651,6 +651,7 @@ def scc( filter_genes(adata, min_cell_s=min_cells) adata.uns["pp"] = {} + calc_sz_factor(adata, layers="X") normalize(adata, layers="X") log1p(adata) pca(adata, n_pca_components=30, pca_key="X_pca") diff --git a/tests/test_preprocess.py b/tests/test_preprocess.py index fd02bd438..a9f544939 100644 --- a/tests/test_preprocess.py +++ b/tests/test_preprocess.py @@ -13,7 +13,7 @@ from dynamo.preprocessing import Preprocessor from dynamo.preprocessing.cell_cycle import get_cell_phase from dynamo.preprocessing.deprecated import _calc_mean_var_dispersion_sparse_legacy -from dynamo.preprocessing.normalization import normalize +from dynamo.preprocessing.normalization import calc_sz_factor, normalize from dynamo.preprocessing.transform import log1p, is_log1p_transformed_adata from dynamo.preprocessing.utils import ( convert_layers2csr, @@ -449,6 +449,7 @@ def test_normalize(): adata.uns["pp"] = dict() # Call the function + calc_sz_factor(adata) normalized = normalize( adata=adata, # norm_method=np.log1p,