Skip to content

Commit

Permalink
Merge pull request #596 from Sichao25/debug
Browse files Browse the repository at this point in the history
Remove duplicate size factors calculation
  • Loading branch information
Xiaojieqiu authored Oct 30, 2023
2 parents 4a5ac26 + d875013 commit 4d94c7c
Show file tree
Hide file tree
Showing 4 changed files with 38 additions and 30 deletions.
2 changes: 2 additions & 0 deletions dynamo/preprocessing/Preprocessor.py
Original file line number Diff line number Diff line change
Expand Up @@ -452,6 +452,7 @@ def preprocess_adata_seurat_wo_pca(
self.standardize_adata(adata, tkey, experiment_type)
self._filter_cells_by_outliers(adata)
self._filter_genes_by_outliers(adata)
self._calc_size_factor(adata)
self._normalize_by_cells(adata)
self._select_genes(adata)
self._norm_method(adata)
Expand Down Expand Up @@ -787,6 +788,7 @@ def preprocess_adata_monocle_pearson_residuals(
self._force_gene_list(adata)

X_copy = adata.X.copy()
self._calc_size_factor(adata)
self._normalize_by_cells(adata)
adata.X = X_copy
self._normalize_selected_genes(adata)
Expand Down
60 changes: 32 additions & 28 deletions dynamo/preprocessing/normalization.py
Original file line number Diff line number Diff line change
Expand Up @@ -168,26 +168,29 @@ def get_sz_exprs(
corresponding to the size factor.
"""

if layer == "raw":
CM = adata.raw.X
szfactors = adata.obs[layer + "Size_Factor"].values[:, None]
elif layer == "X":
CM = adata.X
szfactors = adata.obs["Size_Factor"].values[:, None]
elif layer == "protein":
if "protein" in adata.obsm_keys():
CM = adata.obsm[layer]
szfactors = adata.obs["protein_Size_Factor"].values[:, None]
try:
if layer == "raw":
CM = adata.raw.X
szfactors = adata.obs[layer + "Size_Factor"].values[:, None]
elif layer == "X":
CM = adata.X
szfactors = adata.obs["Size_Factor"].values[:, None]
elif layer == "protein":
if "protein" in adata.obsm_keys():
CM = adata.obsm[layer]
szfactors = adata.obs["protein_Size_Factor"].values[:, None]
else:
CM, szfactors = None, None
else:
CM, szfactors = None, None
else:
CM = adata.layers[layer]
szfactors = adata.obs[layer + "_Size_Factor"].values[:, None]
CM = adata.layers[layer]
szfactors = adata.obs[layer + "_Size_Factor"].values[:, None]

if total_szfactor is not None and total_szfactor in adata.obs.keys():
szfactors = adata.obs[total_szfactor][:, None]
elif total_szfactor is not None:
main_warning("`total_szfactor` is not `None` and it is not in adata object.")
if total_szfactor is not None and total_szfactor in adata.obs.keys():
szfactors = adata.obs[total_szfactor][:, None]
elif total_szfactor is not None:
main_warning("`total_szfactor` is not `None` and it is not in adata object.")
except KeyError:
raise KeyError(f"Size factor for layer {layer} is not in adata object. Please run `dynamo.tl.calc_sz_factor`.")

return szfactors, CM

Expand Down Expand Up @@ -231,22 +234,23 @@ def normalize(
An updated anndata object that are updated with normalized expression values for different layers.
"""

layers = DKM.get_available_layer_keys(adata, layers)

if recalc_sz:
if "use_for_pca" in adata.var.columns and keep_filtered is False:
adata = adata[:, adata.var.loc[:, "use_for_pca"]]

adata.obs = adata.obs.loc[:, ~adata.obs.columns.str.contains("Size_Factor")]

layers = DKM.get_available_layer_keys(adata, layers)

calc_sz_factor(
adata,
layers=layers,
locfunc=np.nanmean,
round_exprs=False,
method=sz_method,
scale_to=scale_to,
)
if np.count_nonzero(adata.obs.columns.str.contains("Size_Factor")) < len(layers):
calc_sz_factor(
adata,
layers=layers,
locfunc=np.nanmean,
round_exprs=False,
method=sz_method,
scale_to=scale_to,
)

excluded_layers = DKM.get_excluded_layers(
X_total_layers=X_total_layers,
Expand Down
3 changes: 2 additions & 1 deletion dynamo/tools/clustering.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@

from ..configuration import DKM
from ..dynamo_logger import main_info
from ..preprocessing.normalization import normalize
from ..preprocessing.normalization import calc_sz_factor, normalize
from ..preprocessing.QC import filter_genes_by_outliers as filter_genes
from ..preprocessing.pca import pca
from ..preprocessing.transform import log1p
Expand Down Expand Up @@ -651,6 +651,7 @@ def scc(

filter_genes(adata, min_cell_s=min_cells)
adata.uns["pp"] = {}
calc_sz_factor(adata, layers="X")
normalize(adata, layers="X")
log1p(adata)
pca(adata, n_pca_components=30, pca_key="X_pca")
Expand Down
3 changes: 2 additions & 1 deletion tests/test_preprocess.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
from dynamo.preprocessing import Preprocessor
from dynamo.preprocessing.cell_cycle import get_cell_phase
from dynamo.preprocessing.deprecated import _calc_mean_var_dispersion_sparse_legacy
from dynamo.preprocessing.normalization import normalize
from dynamo.preprocessing.normalization import calc_sz_factor, normalize
from dynamo.preprocessing.transform import log1p, is_log1p_transformed_adata
from dynamo.preprocessing.utils import (
convert_layers2csr,
Expand Down Expand Up @@ -449,6 +449,7 @@ def test_normalize():
adata.uns["pp"] = dict()

# Call the function
calc_sz_factor(adata)
normalized = normalize(
adata=adata,
# norm_method=np.log1p,
Expand Down

0 comments on commit 4d94c7c

Please sign in to comment.