# Feature selection

[Based on Single Cell Best Practices](https://www.sc-best-practices.org/preprocessing_visualization/feature_selection.html) 

In [None]:
import scanpy as sc
import anndata2ri
import logging
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns

import rpy2.rinterface_lib.callbacks as rcb
import rpy2.robjects as ro

sc.settings.verbosity = 0
sc.settings.set_figure_params(
    dpi=80,
    facecolor="white",
    frameon=False,
)

rcb.logger.setLevel(logging.ERROR)
ro.pandas2ri.activate() # type: ignore
anndata2ri.activate()

%load_ext rpy2.ipython

In [None]:
%%R
library(scry)

In [None]:
adata = sc.read(
    filename="data/s4d8_normalization.h5ad",
    backup_url="https://figshare.com/ndownloader/files/40015741",
)

In [None]:
ro.globalenv["adata"] = adata

In [None]:
%%R
sce = devianceFeatureSelection(adata, assay="X")

In [None]:
binomial_deviance = ro.r("rowData(sce)$binomial_deviance").T

In [None]:
idx = binomial_deviance.argsort()[-4000:]
mask = np.zeros(adata.var_names.shape, dtype=bool)
mask[idx] = True

adata.var["highly_deviant"] = mask
adata.var["binomial_deviance"] = binomial_deviance

In [None]:
sc.pp.highly_variable_genes(adata, layer="scran_normalization")

In [None]:
ax = sns.scatterplot(
    data=adata.var, x="means", y="dispersions", hue="highly_deviant", s=5
)
ax.set_xlim(None, 1.5)
ax.set_ylim(None, 3)
plt.show()

In [None]:
adata.write("data/s4d8_feature_selection.h5ad") # type:ignore