Dissecting the human liver cellular landscape by single cell RNA-seq reveals novel intrahepatic monocyte/ macrophage populations

In [None]:
import numpy as np
import pandas as pd
import scanpy as sc
import scipy.io
import scanpy.external as scex
import sklearn.metrics
import matplotlib
import bbknn

from matplotlib import pyplot as plt
import seaborn as sns
from scipy import stats
from sklearn.mixture import GaussianMixture as GMM
from scipy.stats import norm

import seaborn as sns

In [None]:
#adata=sc.read(results_file_post)
#adata.uns['log1p'] = {"base":None}

Set up out properties

In [None]:
sc.settings.verbosity = 3             # verbosity: errors (0), warnings (1), info (2), hints (3)
sc.logging.print_header()
sc.settings.set_figure_params(dpi=80, facecolor='white')

In [None]:
#%matplotlib inline

Set scanpy out-files

In [None]:
writeDir = "write/"

fileName = "humanLiver"

resultsFileQC = writeDir + fileName + '_QC.h5ad' 


Set figure parameters

In [None]:
sc.set_figure_params(scanpy=True, dpi=100, dpi_save=150, fontsize=10, format='png')
sc.settings.figdir = "figures/" + fileName + "/"
figName = fileName

read input file

In [None]:
inDir = 'data/humanLiver'
inputFile = f'{inDir}/GSE115469_Data.csv'
inMetaFile = f'{inDir}/GSE115469_CellClusterType.txt'

In [None]:
adata =  sc.read_csv(inputFile).T
adata

In [None]:
adata.var_names_make_unique()
adata.obs_names_make_unique()

In [None]:
meta = pd.read_table(inMetaFile, index_col=0)
meta

In [None]:
adata.obs = meta

# Start QC
investigate highest expressed genes

In [None]:
sc.pl.highest_expr_genes(adata, n_top=20, )

remove cells with less than 200 genes and remove genes expressed by less than 3 cells

In [None]:
for g in adata.var_names:
    if("MT-" in g):
        print(g)

In [None]:
sc.pp.filter_cells(adata, min_genes = 200)
sc.pp.filter_genes(adata, min_cells = 4)

In [None]:
adata = adata[:,np.logical_not(adata.var_names=="MALAT1")]

## Mito QC
set genes that start with mt- as mito genes

In [None]:
adata.var['MT'] = adata.var_names.str.startswith('MT-')  # annotate the group of mitochondrial genes as 'mt'
sc.pp.calculate_qc_metrics(adata, qc_vars=['MT'], log1p = False, inplace=True)

In [None]:
sc.pl.violin(adata, ['n_genes_by_counts', 'total_counts', 'pct_counts_MT'], jitter=0.4, multi_panel=True)

In [None]:
sc.pl.scatter(adata, x='total_counts', y='pct_counts_MT')
sc.pl.scatter(adata, x='total_counts', y='n_genes_by_counts')

remove cells that have too much mito or could be doublets

In [None]:
adata = adata[adata.obs.n_genes_by_counts < 3000, :]
adata = adata[adata.obs.total_counts < 4000, :]
adata = adata[adata.obs.pct_counts_MT < 6, :]

In [None]:
sc.pl.violin(adata, ['n_genes_by_counts', 'total_counts', 'pct_counts_MT'], jitter=0.4, multi_panel=True)

save post QC scanpy

In [None]:
adata

In [None]:
resultsFileQC

In [None]:
adata.write(resultsFileQC)

In [None]:
adata.obs