# SCENIC+ scRNA-seq data processing

### Loading data

In [None]:
import scanpy as sc

adata = sc.read_10x_mtx(
    "../data/scenic/filtered_feature_bc_matrix/", var_names="gene_symbols"
)
adata.var_names_make_unique()
adata

In [None]:
import pandas as pd

cell_data = pd.read_table("../data/scenic/cell_data.tsv", index_col=0)
cell_data

In [None]:
cell_data.index = [cb.rsplit("-", 1)[0] for cb in cell_data.index]
adata = adata[list(set(adata.obs_names) & set(cell_data.index))].copy()
adata.obs = cell_data.loc[adata.obs_names]
adata.var["mt"] = adata.var_names.str.startswith("MT-")

sc.pp.calculate_qc_metrics(
    adata, qc_vars=["mt"], percent_top=None, log1p=False, inplace=True
)

### Data normalization

In [None]:
adata.raw = adata

sc.pp.normalize_total(adata, target_sum=1e4)
sc.pp.log1p(adata)
sc.pp.highly_variable_genes(adata, min_mean=0.0125, max_mean=3, min_disp=0.5)

adata = adata[:, adata.var.highly_variable]
sc.pp.scale(adata, max_value=10)

adata.obs

In [None]:
adata.write("../data/scenic/filtered_feature_bc_matrix.h5ad")

### Analysis

In [None]:
sc.tl.pca(adata)
sc.pl.pca(adata, color="Seurat_cell_type")

In [None]:
sc.pp.neighbors(adata)
sc.tl.umap(adata)
sc.pl.umap(adata, color="Seurat_cell_type")