In [None]:
import warnings
warnings.filterwarnings('ignore')

In [None]:
import scanpy as sc
import anndata
from scipy import io
from scipy.sparse import coo_matrix, csr_matrix
import numpy as np
import os
import pandas as pd

In [None]:
import matplotlib.pyplot as plt

In [None]:
import scirpy
import h5py
import anndata

In [None]:
from matplotlib.pyplot import rc_context

In [None]:
pd.set_option('display.max_rows', None)

In [None]:
sc.set_figure_params(scanpy=True, fontsize=12)

In [None]:
## the count matrix from datasets were combined

# load sparse matrix:
X = io.mmread("counts.mtx")

# create anndata object
adata = anndata.AnnData(
    X=X.transpose().tocsr()
)

# load cell metadata:
cell_meta = pd.read_csv("metadata.csv", low_memory=False)

# load gene names:
with open("gene_names.csv", 'r') as f:
    gene_names = f.read().splitlines()

# set anndata observations and index obs by barcodes, var by gene names
adata.obs = cell_meta
adata.obs.index = adata.obs['barcode']
adata.var.index = gene_names

In [None]:
adata.var['ribo'] = adata.var_names.str.contains('RPL') | adata.var_names.str.contains('RPS') # annotate the group of ribosomal genes as 'ribo'
sc.pp.calculate_qc_metrics(adata, qc_vars=['ribo'], percent_top=None, log1p=False, inplace=True)

In [None]:
si=adata[adata.obs['Region'].isin(['SmallInt'])] ####we only use the SI datasets

In [None]:
sc.pp.filter_cells(si, min_genes=200)
sc.pp.filter_genes(si, min_cells=3)

In [None]:
sc.pp.normalize_total(si, target_sum=1e4)

In [None]:
sc.pp.log1p(si)

In [None]:
sc.pp.highly_variable_genes(si, min_mean=0.0125, max_mean=3, min_disp=0.5)

In [None]:
si.raw = si

In [None]:
si = si[:, si.var.highly_variable]

In [None]:
sc.pp.regress_out(si, ['total_counts', 'percent.mt','pct_counts_ribo'])

In [None]:
sc.pp.scale(si, max_value=10)

In [None]:
sc.tl.pca(si,svd_solver='arpack')

In [None]:
import bbknn

In [None]:
bbknn.bbknn(si, batch_key='sample_name',metric='euclidean',neighbors_within_batch=3,n_pcs=50) ###batch correction

In [None]:
sc.tl.umap(si) ####clustering

In [None]:
si.uns['log1p']["base"] = None
sc.tl.rank_genes_groups(si, 'leiden', method='wilcoxon')
sc.pl.rank_genes_groups(si, n_genes=25, sharey=False)