In [None]:
import numpy as np
import pandas as pd
import scanpy as sc
import anndata as ad
import muon as mu
import os
from muon import atac as ac


# Process RNA-seq

In [None]:
# Change to directory you files stored
data_dir = "muon_data"
mdata = mu.read_10x_h5(os.path.join(data_dir, "filtered_feature_bc_matrix.h5"))
mdata.var_names_make_unique()
mdata

In [None]:
rna = mdata.mod['rna']

## Pre-processing

In [None]:
rna.var['mt'] = rna.var_names.str.startswith('MT-')  # annotate the group of mitochondrial genes as 'mt'
sc.pp.calculate_qc_metrics(rna, qc_vars=['mt'], percent_top=None, log1p=False, inplace=True)

In [None]:
# filter gene detected in over 3 cells
mu.pp.filter_var(rna, 'n_cells_by_counts', lambda x: x >= 3)

In [None]:
# filter cells who have 200-5000 genes inside, flow fig 1 above
mu.pp.filter_obs(rna, 'n_genes_by_counts', lambda x: (x >= 200) & (x < 5000))
# filter total counts of genes in cells by total_counts and pct_counts
mu.pp.filter_obs(rna, 'total_counts', lambda x: x < 15000)
mu.pp.filter_obs(rna, 'pct_counts_mt', lambda x: x < 20)

In [None]:
# normalise
sc.pp.normalize_total(rna, target_sum=1e4)
sc.pp.log1p(rna)

In [None]:
# Feature selection
sc.pp.highly_variable_genes(rna, min_mean=0.02, max_mean=4, min_disp=0.5)
sc.pl.highly_variable_genes(rna)
np.sum(rna.var.highly_variable)
# Scaling
# We’ll save log-normalised counts in a .raw slot
rna.raw = rna
sc.pp.scale(rna, max_value=10) 

## PCA / TSNE Mapping

### PCA

In [None]:
sc.tl.pca(rna, svd_solver='arpack')

In [None]:
sc.pp.neighbors(rna, n_neighbors=10, n_pcs=20)

In [None]:
sc.tl.leiden(rna, resolution=.5)

### TSNE

In [None]:
sc.tl.tsne(rna)

# Save Data

In [None]:
mdata.write("muon_data/pbmc10k.h5mu")