In [1]:
# install packages
%%capture
!pip install scanpy leidenalg

In [2]:
# load packages
%%capture
import pandas as pd
import numpy as np
import os,sys
import anndata as ad
from scipy.sparse import csr_matrix
import scanpy as sc
import matplotlib.pyplot as plt
from matplotlib import cm
from matplotlib.colors import to_hex
import seaborn as sns

In [4]:
# read the data
adata = sc.read('cell_by_gene_matrix.h5ad')


In [None]:
# inspect the data (expression value)
adata.X.toarray()

In [None]:
# inspect the data (cell metadata)
adata.obs

In [None]:
# inspect the data (gene metadata)
adata.var

In [None]:
# inspect the data (a holistic view)
adata.to_df().head()

In [12]:
# calculate some QC metrics
sc.pp.calculate_qc_metrics(adata, percent_top=None, log1p=False, inplace=True)

In [None]:
# for each gene, how many cells does it expresses?
sns.violinplot(adata.var['n_cells_by_counts'])
sns.stripplot(adata.var['n_cells_by_counts'],color='k',jitter=True,size=1)

In [None]:
# for each cell, how many genes are expressed?
sns.violinplot(adata.obs['n_genes_by_counts'])
sns.stripplot(adata.obs['n_genes_by_counts'],color='k',jitter=True,size=1)

In [15]:
# conducting QC
sc.pp.filter_cells(adata,min_counts=3)    
sc.pp.filter_genes(adata,min_counts=200)    # 23249 × 1240

In [16]:
# normalize and logirithmize
sc.pp.normalize_total(adata)
sc.pp.log1p(adata)

In [19]:
# run PCA
sc.tl.pca(adata,n_comps=30)

In [None]:
# variance explained by PCA
print('total variance explained by default PCs: {}'.format(adata.uns['pca']['variance_ratio'].sum()))
sc.pl.pca_variance_ratio(adata,n_pcs=30)

In [None]:
# visualize the different OD conditions on PC space
sc.pl.pca(adata,color='condition',components=['1,2','2,3'])

In [None]:
# neighbor graph
sc.pp.neighbors(adata,use_rep='X_pca')

In [24]:
# clustering on top of graph
sc.tl.leiden(adata)

In [None]:
# visualization 2d
sc.tl.umap(adata,n_components=2)
sc.pl.umap(adata,color=['condition','leiden'])

In [None]:
# visualization 3d
sc.tl.umap(adata,n_components=3)
colors = [to_hex(color) for color in cm.get_cmap('tab20').colors[:len(adata.obs['leiden'].unique())]]
dic = pd.Series(index=adata.obs['leiden'].unique(),data=colors).to_dict()
adata.obs['color'] = adata.obs['leiden'].map(dic).values
fig = plt.figure()
ax = fig.add_subplot(projection='3d')
ax.scatter(adata.obsm['X_umap'][:,0],adata.obsm['X_umap'][:,1],adata.obsm['X_umap'][:,2],c=adata.obs['color'],s=120000/adata.shape[0])

In [None]:
# differential analysis
sc.tl.rank_genes_groups(adata,groupby='leiden')

In [None]:
# visualize the diffentially expressed genes (heatmap)
sc.pl.rank_genes_groups_heatmap(adata,n_genes=4)

In [None]:
# visualize the diffentially expressed genes (dotplot)
sc.pl.rank_genes_groups_dotplot(adata)

In [None]:
# visualize the diffentially expressed genes (stacked_violin)
sc.pl.rank_genes_groups_stacked_violin(adata)

In [None]:
# visualize the diffentially expressed genes (track_plot)
sc.pl.rank_genes_groups_tracksplot(adata)

In [None]:
### Example1: motility
import copy
cmap = copy.copy(cm.get_cmap('YlOrRd'))
cmap.set_under('lightgrey')
sc.pl.umap(adata,color=['condition','srfAA','hag'],cmap=cmap,vmin=1e-5)

In [None]:
### Example2: Stress and folding response
sc.pl.umap(adata,color=['condition','clpC','groEL'],cmap=cmap,vmin=1e-5)

In [None]:
### Example3: metal intake
sc.pl.umap(adata,color=['condition','dhbA','feuA'],cmap=cmap,vmin=1e-5)

In [None]:
### Example4: Antimicrobial agents
sc.pl.umap(adata,color=['condition','albA'],cmap=cmap,vmin=1e-5)

In [None]:
# inspect specific genes involved in early growth stage (glycolysis)
'''
ptsG: glucose permease
ldh: lactate dehydrogenase
pdhA: pyruvate dehydrogenase
ackA: acetate kinase
'''
import copy
cmap = copy.copy(cm.get_cmap('YlOrRd'))
cmap.set_under('lightgrey')
sc.pl.umap(adata,color=['condition','ptsG','ldh','ackA','pdhA'],cmap=cmap,vmin=1e-5)

In [None]:
# inspect specific genes involved in late stage (TCA)
'''
citZ: acetyl-CoA -> citrate
citB: citrate -> isocitrate
icd: isocitrate -> 2-oxoglutarate
'''
sc.pl.umap(adata,color=['condition','citZ','citB','icd'],cmap=cmap,vmin=1e-5)