In [None]:
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import numpy as np
import pandas as pd
import scanpy as sc
import anndata
import bbknn
import os
from scipy import sparse
import matplotlib.pyplot as plt
# from scanpy_base_moudle_update2 import *
# import scrublet as scr
import datetime
import harmonypy as hm

sc.settings.verbosity = 3
#sc.logging.print_versions()
# 设置图片的分辨率以及其他样式
sc.settings.set_figure_params(dpi=150, figsize = (4, 3), fontsize=12)

import matplotlib.font_manager
flist = matplotlib.font_manager.get_fontconfig_fonts()
names = [matplotlib.font_manager.FontProperties(fname=fname).get_name() for fname in flist]
print(names)

params={
        #'font.style':'italic',
        'font.weight':'normal',    #or 'blod'
        }
plt.rcParams.update(params)

plt.rcParams['font.family']='Arial'

# 数据整理

In [None]:
# 读取人类肠道全部数据
adata = sc.read('/mnt/data/Project2021/Gut_immune_surveillance/data/h5ad/All_579886_umap.h5ad')
adata

In [None]:
# 提取上皮，并拆分为Postnatal、Prenatal和HIO

# Epithelium
obs_index = adata.obs.loc[adata.obs["Compartment"].isin(['Epithelium']), :].index
adata_com = adata[obs_index, :]
adata_com

In [None]:
adata_com.obs.to_csv('/mnt/data/Project2021/Gut_immune_surveillance/outputs/human_epi.csv')

In [None]:
Groups_tab_1 = pd.crosstab(index=adata_com.obs['pan_development_state'],  # Make a crosstab
                columns=adata_com.obs['pan_development_state'], margins=True)  # Name the count column
Groups_tab_1

In [None]:
# Postnatal
obs_index = adata_com.obs.loc[adata_com.obs["pan_development_state"].isin(['After_born']), :].index
adata_post = adata_com[obs_index, :]
print(adata_post)

# Prenatal
obs_index = adata_com.obs.loc[adata_com.obs["pan_development_state"].isin(['Fetal']), :].index
adata_pre = adata_com[obs_index, :]
print(adata_pre)

# HIO
obs_index = adata_com.obs.loc[adata_com.obs["pan_development_state"].isin(['tHIO']), :].index
adata_HIO = adata_com[obs_index, :]
print(adata_HIO)

In [None]:
# Save
adata_post.write('/mnt/data/Project2021/Gut_immune_surveillance/data/h5ad_Figure/Human/Epithelium/Postnatal/human_postnatal_epi.h5ad')

adata_pre.write('/mnt/data/Project2021/Gut_immune_surveillance/data/h5ad_Figure/Human/Epithelium/Postnatal/human_prenatal_epi.h5ad')

adata_HIO.write('/mnt/data/Project2021/Gut_immune_surveillance/data/h5ad_Figure/Human/Epithelium/Postnatal/human_HIO_epi.h5ad')

# postnatal UMAP

## 数据处理

In [None]:
adata = sc.read('/mnt/data/Project2021/Gut_immune_surveillance/data/h5ad_Figure/Human/Epithelium/Postnatal/human_postnatal_epi_ann.h5ad')
adata

In [None]:
adata.obs.to_csv('/mnt/data/Project2021/Gut_immune_surveillance/outputs/postnatal_epi.csv')

In [None]:
adata = sc.AnnData(X=adata.raw.X, var=adata.raw.var, obs = adata.obs)
adata

In [None]:
adata.raw = adata

sc.pp.highly_variable_genes(adata, min_mean=0.0125, max_mean=3, min_disp=0.5,batch_key='pan_organ')
adata = adata[:, adata.var.highly_variable]

sc.pp.scale(adata, zero_center=False)

def progress_pca_bbknn_umap_tsne(adata, n_pcs=20, batch_key='bbknn_batch'):
    ## 主成分分析降维
    sc.tl.pca(adata, svd_solver='arpack')
    # 在PCA坐标中绘制散点图
    sc.pl.pca(adata)
    # 可视化每个PC对数据总方差的贡献, 这为我们提供了有关为计算单元的邻域关系应考虑的PC数量的信息sc.tl.tsne()
    sc.pl.pca_variance_ratio(adata, log=True)

    # 使用bbknn算法处理批次效应(代替了neighbors)
    # bbknn要求数据已经过标准化和主成分分析

    adata.obs['bbknn_batch'] = [str(i)+'-'+str(j) for i,j in zip(adata.obs['dataset'], adata.obs['batch_name'])]
    sc.external.pp.bbknn(adata, batch_key=batch_key, n_pcs=n_pcs, neighbors_within_batch=1)
    
    # 计算UMAP
    sc.tl.umap(adata)
    # sc.tl.tsne(adata, n_jobs=6)

    return adata

adata = progress_pca_bbknn_umap_tsne(adata)

In [None]:
adata

In [None]:
sc.settings.set_figure_params(dpi=200, figsize = (4, 4), fontsize=10)
sc.pl.umap(adata, color=['organ'], add_outline=True, palette="tab20", frameon=False, title='')

In [None]:
from matplotlib import cm, colors
sc.settings.set_figure_params(dpi=400, figsize = (4, 3.5), fontsize=15)

colors2 = plt.cm.plasma(np.linspace(0, 1, 128))
colors3 = plt.cm.Greys_r(np.linspace(0.7,0.8,10))
colorsComb = np.vstack([colors3, colors2])
mymap = colors.LinearSegmentedColormap.from_list('my_colormap', colorsComb)

sc.pl.umap(adata, color=['CDX2'], add_outline=False, frameon=False,size = 1.8, color_map = mymap,vmin=0,vmax=5)

In [None]:
sc.pl.umap(adata, color=['cluster_figure1A'], add_outline=True, palette="tab20", frameon=False, title='')

In [None]:
sc.pl.umap(adata, color=['cluster_figure1A'], groups = ['C01-E07-PI3+enterocytes'] ,add_outline=True, palette="tab20_r", frameon=False)

In [None]:
# Harmony处理批次效应
import harmonypy as hm

harmony_out = hm.run_harmony(adata.obsm['X_pca'], adata.obs, vars_use = ['dataset', 'batch_name'],max_iter_harmony=30)
adata.obsm['X_pca_harmony'] = harmony_out.Z_corr.T

sc.tl.tsne(adata, n_jobs=7, use_rep='X_pca_harmony')

In [None]:
sc.settings.set_figure_params(dpi=150, figsize = (4, 4), fontsize=15)
sc.pl.tsne(adata, color=['organ'], add_outline=True, palette="tab20", frameon=False, title='')

In [None]:
sc.pl.tsne(adata, color=['cluster_figure1A'], add_outline=True, palette="tab20", frameon=False, title='')

In [None]:
sc.pl.tsne(adata, color=['PI3','SLPI','PLA2G2A','DEFA5'], add_outline=True, frameon=False)

In [None]:
adata.write('/mnt/data/Project2021/Gut_immune_surveillance/data/h5ad_Figure/Human/Epithelium/Postnatal/human_postnatal_epi_ann.h5ad')
adata

## 绘图

In [None]:
adata = sc.read('/mnt/data/Project2021/Gut_immune_surveillance/data/h5ad_Figure/Human/Epithelium/Postnatal/human_postnatal_epi_ann.h5ad')
adata 

In [None]:
sc.pl.umap(adata, color=['cluster_figure1A'], groups = ['C01-E09-Microfold cells'] ,add_outline=True, palette="tab20_r", frameon=False)

In [None]:
sc.settings.set_figure_params(dpi=150, figsize = (4, 4), fontsize=15)
sc.pl.umap(adata, color=['cluster_figure1A'], add_outline=True, palette="Spectral", frameon=False, title='')

In [None]:
adata.obs['cluster_figure1A'].cat.categories

In [None]:
old_colors = np.array(adata.uns['cluster_figure1A_colors'])
new_colors = old_colors
# reset colors
new_colors[[0]] = '#439bb5'
new_colors[[1]] = '#4175b4'
new_colors[[2]] = '#f7fcb2'
new_colors[[3]] = '#9e0142'
new_colors[[4]] = '#e06d43'
new_colors[[5]] = '#c32a4b'
new_colors[[6]] = '#94d4a4'
new_colors[[7]] = '#fff5ae'
new_colors[[8]] = '#e6f598'
new_colors[[9]] = '#fdbf6f'
new_colors[[10]] = '#5e4fa2'
new_colors[[11]] = '#fee08b'
new_colors[[12]] = '#66c2a5'
new_colors[[13]] = '#bfe5a0'

In [None]:
# 论文里的图片
adata.uns['cluster_figure1A_colors'] = new_colors

# Figure 1a 按细胞类型划分颜色
sc.settings.set_figure_params(dpi=400, figsize = (4, 4), fontsize=7)
sc.pl.umap(adata, color=['cluster_figure1A'], add_outline=False, frameon=False, title='',
           size = 1.8
          )

In [None]:
sc.settings.set_figure_params(dpi=150, figsize = (4, 4), fontsize=15)
sc.pl.umap(adata, color=['pan_organ'], palette="Spectral", frameon=False, title='')

In [None]:
adata.obs['pan_organ'].cat.categories

In [None]:
old_colors = np.array(adata.uns['pan_organ_colors'])
new_colors = old_colors
new_colors

In [None]:
# Proximal SI
new_colors[[1]] = '#ff0000'
# Ileum
new_colors[[2]] = '#0077b2'
# App-Col-Rec
new_colors[[0]] = '#60b55c'

In [None]:
# 论文里的图片
adata.uns['pan_organ_colors'] = new_colors

# Figure 1a 按细胞类型划分颜色
sc.settings.set_figure_params(dpi=400, figsize = (4, 4), fontsize=7)
sc.pl.umap(adata, color=['pan_organ'], add_outline=False, frameon=False, title='',
           size = 1.8
          )

In [None]:
sc.settings.set_figure_params(dpi=400, figsize = (4, 4), fontsize=15)
sc.pl.umap(adata, color=['SLC46A1','FABP6','AQP8'], add_outline=False, frameon=False,size = 1.8)

In [None]:
sc.settings.set_figure_params(dpi=150, figsize = (4, 4), fontsize=15)
sc.pl.umap(adata, color=['SLC46A1','FABP6','AQP8'], add_outline=False, frameon=False,size = 1.0, color_map = 'rainbow')

In [None]:
sc.settings.set_figure_params(dpi=400, figsize = (4, 4), fontsize=15)

colors2 = plt.cm.Spectral_r(np.linspace(0, 1, 128))
colors3 = plt.cm.Greys_r(np.linspace(0.7,0.8,10))
colorsComb = np.vstack([colors3, colors2])
mymap = colors.LinearSegmentedColormap.from_list('my_colormap', colorsComb)

sc.pl.umap(adata, color=['SLC46A1','FABP6','AQP8'], add_outline=False, frameon=False,size = 1.8, color_map = mymap)

In [None]:
adata_1 = sc.AnnData(X=adata.raw.X, var=adata.raw.var, obs = adata.obs)

print(np.max(list(adata_1.to_df()['SLC46A1'])))
print(np.max(list(adata_1.to_df()['FABP6'])))
print(np.max(list(adata_1.to_df()['AQP8'])))

# Prenatal UMAP

In [None]:
adata = sc.read('/mnt/data/Project2021/Gut_immune_surveillance/data/h5ad_Figure/Human/Epithelium/Postnatal/human_prenatal_epi.h5ad')
adata

In [None]:
adata = sc.AnnData(X=adata.raw.X, var=adata.raw.var, obs = adata.obs)
adata

In [None]:
adata.raw = adata

sc.pp.highly_variable_genes(adata, min_mean=0.0125, max_mean=3, min_disp=0.5,batch_key='pan_organ')
adata = adata[:, adata.var.highly_variable]

sc.pp.scale(adata, zero_center=False)

def progress_pca_bbknn_umap_tsne(adata, n_pcs=20, batch_key='bbknn_batch'):
    ## 主成分分析降维
    sc.tl.pca(adata, svd_solver='arpack')
    # 在PCA坐标中绘制散点图
    sc.pl.pca(adata)
    # 可视化每个PC对数据总方差的贡献, 这为我们提供了有关为计算单元的邻域关系应考虑的PC数量的信息sc.tl.tsne()
    sc.pl.pca_variance_ratio(adata, log=True)

    # 使用bbknn算法处理批次效应(代替了neighbors)
    # bbknn要求数据已经过标准化和主成分分析

    adata.obs['bbknn_batch'] = [str(i)+'-'+str(j) for i,j in zip(adata.obs['dataset'], adata.obs['batch_name'])]
    sc.external.pp.bbknn(adata, batch_key=batch_key, n_pcs=n_pcs, neighbors_within_batch=1)
    
    # 计算UMAP
    sc.tl.umap(adata)
    # sc.tl.tsne(adata, n_jobs=6)

    return adata

adata = progress_pca_bbknn_umap_tsne(adata)

In [None]:
sc.settings.set_figure_params(dpi=200, figsize = (4, 4), fontsize=10)
sc.pl.umap(adata, color=['organ'], add_outline=True, frameon=False, title='')
sc.pl.umap(adata, color=['PCW'], add_outline=True, frameon=False, title='')

In [None]:
sc.pl.umap(adata, color=['cluster_figure1A'], add_outline=True, palette="tab20", frameon=False, title='')

In [None]:
sc.pl.umap(adata, color=['DEFA5','DEFA6','REG3A','REG3G','ITLN2','NR1H4','NR1H3','VDR'], add_outline=True, frameon=False)

In [None]:
adata.write('/mnt/data/Project2021/Gut_immune_surveillance/data/h5ad_Figure/Human/Epithelium/Postnatal/human_prenatal_epi_ann.h5ad')
adata

## 绘图

In [None]:
adata=sc.read('/mnt/data/Project2021/Gut_immune_surveillance/data/h5ad_Figure/Human/Epithelium/Postnatal/human_prenatal_epi_ann.h5ad')
adata

In [None]:
adata.obs.to_csv('/mnt/data/Project2021/Gut_immune_surveillance/outputs/prenatal_epi.csv')

In [None]:
from matplotlib import cm, colors
sc.settings.set_figure_params(dpi=200, figsize = (4, 4), fontsize=12)

# 上皮
colors2 = plt.cm.Oranges(np.linspace(0, 1, 128))
colors3 = plt.cm.Greys_r(np.linspace(0.7,0.8,10))
colorsComb = np.vstack([colors3, colors2])
mymap = colors.LinearSegmentedColormap.from_list('my_colormap', colorsComb)
sc.pl.umap(adata, color=['DEFA5','DEFA6','REG3A','REG3G','ITLN2','NR1H4','NR1H3','VDR'], frameon=False, color_map=mymap)

In [None]:
sc.settings.set_figure_params(dpi=300, figsize = (4, 4), fontsize=10)
sc.pl.umap(adata, color=['cluster_figure1A'], palette="tab20", frameon=False, title='', size = 3.0)

In [None]:
adata.obs['cluster_figure1A'].cat.categories

In [None]:
old_colors = np.array(adata.uns['cluster_figure1A_colors'])
new_colors = old_colors
new_colors

In [None]:
# Stem and TA
new_colors[[0]] = '#439bb5'
new_colors[[1]] = '#4175b4'
new_colors[[2]] = '#f7fcb2'
new_colors[[3]] = '#9e0142'
new_colors[[4]] = '#e06d43'
new_colors[[5]] = '#c32a4b'
new_colors[[6]] = '#94d4a4'
new_colors[[7]] = '#fff5ae'
new_colors[[8]] = '#e6f598'
new_colors[[9]] = '#fdbf6f'
new_colors[[10]] = '#5e4fa2'
new_colors[[11]] = '#fee08b'
new_colors[[12]] = '#66c2a5'
new_colors[[13]] = '#bfe5a0'

In [None]:
# 论文里的图片
adata.uns['cluster_figure1A_colors'] = new_colors

# Figure 1a 按细胞类型划分颜色
sc.settings.set_figure_params(dpi=400, figsize = (4, 4), fontsize=7)
sc.pl.umap(adata, color=['cluster_figure1A'], add_outline=False, frameon=False, title='', size = 3.0)

In [None]:
sc.settings.set_figure_params(dpi=150, figsize = (4, 4), fontsize=15)
sc.pl.umap(adata, color=['pan_organ'], palette="Spectral", frameon=False, title='')

In [None]:
print(adata.obs['pan_organ'].cat.categories)

old_colors = np.array(adata.uns['pan_organ_colors'])
new_colors = old_colors

# Proximal SI
new_colors[[1]] = '#ff0000'
# Ileum
new_colors[[2]] = '#0077b2'
# App-Col-Rec
new_colors[[0]] = '#60b55c'

# 论文里的图片
adata.uns['pan_organ_colors'] = new_colors

# Figure 1a 按细胞类型划分颜色
sc.settings.set_figure_params(dpi=400, figsize = (4, 4), fontsize=7)
sc.pl.umap(adata, color=['pan_organ'], add_outline=False, frameon=False, title='',
           size = 3.0
          )

In [None]:
sc.settings.set_figure_params(dpi=400, figsize = (4, 4), fontsize=15)

colors2 = plt.cm.plasma(np.linspace(0, 1, 128))
colors3 = plt.cm.Greys_r(np.linspace(0.7,0.8,10))
colorsComb = np.vstack([colors3, colors2])
mymap = colors.LinearSegmentedColormap.from_list('my_colormap', colorsComb)

sc.pl.umap(adata, color=['SLC46A1','FABP6','AQP8'], add_outline=False, frameon=False,size = 3.0, color_map = mymap)

In [None]:
adata_1 = sc.AnnData(X=adata.raw.X, var=adata.raw.var, obs = adata.obs)

In [None]:
np.max(list(adata_1.to_df()['SLC46A1']))

In [None]:
np.max(list(adata_1.to_df()['FABP6']))

In [None]:
np.max(list(adata_1.to_df()['AQP8']))

# HIO

In [None]:
adata = sc.read('/mnt/data/Project2021/Gut_immune_surveillance/data/h5ad_Figure/Human/Epithelium/Postnatal/human_HIO_epi.h5ad')
adata = sc.AnnData(X=adata.raw.X, var=adata.raw.var, obs = adata.obs)
adata

In [None]:
adata.raw = adata

sc.pp.highly_variable_genes(adata, min_mean=0.0125, max_mean=3, min_disp=0.5,batch_key='pan_organ')
adata = adata[:, adata.var.highly_variable]

sc.pp.scale(adata, zero_center=False)

def progress_pca_bbknn_umap_tsne(adata, n_pcs=20, batch_key='bbknn_batch'):
    ## 主成分分析降维
    sc.tl.pca(adata, svd_solver='arpack')
    # 在PCA坐标中绘制散点图
    sc.pl.pca(adata)
    # 可视化每个PC对数据总方差的贡献, 这为我们提供了有关为计算单元的邻域关系应考虑的PC数量的信息sc.tl.tsne()
    sc.pl.pca_variance_ratio(adata, log=True)

    # 使用bbknn算法处理批次效应(代替了neighbors)
    # bbknn要求数据已经过标准化和主成分分析

    adata.obs['bbknn_batch'] = [str(i)+'-'+str(j) for i,j in zip(adata.obs['dataset'], adata.obs['batch_name'])]
    sc.external.pp.bbknn(adata, batch_key=batch_key, n_pcs=n_pcs, neighbors_within_batch=3)
    
    # 计算UMAP
    sc.tl.umap(adata)
    # sc.tl.tsne(adata, n_jobs=6)

    return adata

adata = progress_pca_bbknn_umap_tsne(adata)

In [None]:
sc.settings.set_figure_params(dpi=200, figsize = (4, 4), fontsize=10)
sc.pl.umap(adata, color=['cluster_figure1A'], add_outline=True, palette="tab20", frameon=False, title='')
sc.pl.umap(adata, color=['PCW'], add_outline=True, frameon=False, title='')

In [None]:
sc.pl.umap(adata, color=['DEFA5','DEFA6','REG3A','REG3G','ITLN2','NR1H4','NR1H3','VDR'], add_outline=True, frameon=False)

In [None]:
adata.write('/mnt/data/Project2021/Gut_immune_surveillance/data/h5ad_Figure/Human/Epithelium/Postnatal/human_HIO_epi_ann.h5ad')
adata

## 绘图

In [None]:
adata = sc.read('/mnt/data/Project2021/Gut_immune_surveillance/data/h5ad_Figure/Human/Epithelium/Postnatal/human_HIO_epi_ann.h5ad')
adata

In [None]:
adata.obs.to_csv('/mnt/data/Project2021/Gut_immune_surveillance/outputs/HIO_epi.csv')

In [None]:
sc.settings.set_figure_params(dpi=400, figsize = (4, 4), fontsize=10)
sc.pl.umap(adata, color=['cluster_figure1A'], palette="tab20", frameon=False, title='')

In [None]:
adata.obs['cluster_figure1A'].cat.categories

In [None]:
old_colors = np.array(adata.uns['cluster_figure1A_colors'])
new_colors = old_colors
new_colors

In [None]:
# Stem and TA
new_colors[[0]] = '#439bb5'
new_colors[[1]] = '#f7fcb2'
new_colors[[2]] = '#9e0142'
new_colors[[3]] = '#c32a4b'
new_colors[[4]] = '#fdbf6f'
new_colors[[5]] = '#fee08b'
new_colors[[6]] = '#bfe5a0'
new_colors[[7]] = '#df4e4b'
new_colors[[8]] = '#fa9857'

In [None]:
# 论文里的图片
adata.uns['cluster_figure1A_colors'] = new_colors

# Figure 1a 按细胞类型划分颜色
sc.settings.set_figure_params(dpi=400, figsize = (4, 4), fontsize=7)
sc.pl.umap(adata, color=['cluster_figure1A'], add_outline=False, frameon=False, title='')

In [None]:
from matplotlib import cm, colors

colors1 = plt.cm.Greys(np.linspace(0, 1, 128))
colors2 = plt.cm.Greys_r(np.linspace(0.7,0.8,10))
colorsComb = np.vstack([colors2, colors1])
mymap = colors.LinearSegmentedColormap.from_list('my_colormap', colorsComb)

sc.settings.set_figure_params(dpi=400, figsize = (4, 4), fontsize=10)
sc.pl.umap(adata, color=['PCW'], palette="tab20", color_map = mymap, frameon=False, title='')

In [None]:
from matplotlib import cm, colors

colors2 = plt.cm.viridis(np.linspace(0, 1, 128))
colors3 = plt.cm.viridis(np.linspace(0.2,1,5))
colorsComb = np.vstack([colors3, colors2])
mymap = colors.LinearSegmentedColormap.from_list('my_colormap', colorsComb)

sc.settings.set_figure_params(dpi=400, figsize = (4, 4), fontsize=10)
sc.pl.umap(adata, color=['PCW'], palette="tab20", color_map = mymap, frameon=False, title='')

In [None]:
sc.settings.set_figure_params(dpi=400, figsize = (4, 4), fontsize=15)

colors2 = plt.cm.plasma(np.linspace(0, 1, 128))
colors3 = plt.cm.Greys_r(np.linspace(0.7,0.8,10))
colorsComb = np.vstack([colors3, colors2])
mymap = colors.LinearSegmentedColormap.from_list('my_colormap', colorsComb)

sc.pl.umap(adata, color=['SLC46A1','FABP6','AQP8'], add_outline=False, frameon=False,color_map = mymap)

In [None]:
adata_1 = sc.AnnData(X=adata.raw.X, var=adata.raw.var, obs = adata.obs)

print(np.max(list(adata_1.to_df()['SLC46A1'])))
print(np.max(list(adata_1.to_df()['FABP6'])))
print(np.max(list(adata_1.to_df()['AQP8'])))

# All

## 1

In [None]:
# 读取人类肠道全部数据
adata = sc.read('/mnt/data/Project2021/Gut_immune_surveillance/data/h5ad/All_579886_umap.h5ad')
adata

In [None]:
# 提取上皮

# Epithelium
obs_index = adata.obs.loc[adata.obs["Compartment"].isin(['Epithelium']), :].index
adata = adata[obs_index, :]
adata

In [None]:
import harmonypy as hm

def progress_pca_harmony_umap(adata):
    sc.tl.pca(adata, svd_solver='arpack')
    sc.pl.pca(adata)
    sc.pl.pca_variance_ratio(adata, log=True)

    # Harmony处理批次效应
    harmony_out = hm.run_harmony(adata.obsm['X_pca'], adata.obs, vars_use = ['dataset', 'batch_name'],max_iter_harmony=30)
    adata.obsm['X_pca_harmony'] = harmony_out.Z_corr.T
    sc.pp.neighbors(adata, n_neighbors=10, use_rep='X_pca_harmony', n_pcs=20)
    
    # 计算UMAP
    sc.tl.umap(adata)
    # sc.tl.tsne(adata, n_jobs=6)

    return adata

adata = progress_pca_harmony_umap(adata)

In [None]:
sc.settings.set_figure_params(dpi=200, figsize = (4, 4), fontsize=10)
sc.pl.umap(adata, color=['ann_for_cpdb'], add_outline=True, palette="tab20", frameon=False, title='')
sc.pl.umap(adata, color=['pan_organ'], add_outline=True, palette="tab20", frameon=False, title='')
sc.pl.umap(adata, color=['pan_development_state'], add_outline=True, palette="tab20", frameon=False, title='')

## 2

In [None]:
adata = sc.read('/mnt/data/Project2021/Gut_immune_surveillance/data/h5ad/adata_epi_scenic_24846.h5ad')
adata

In [None]:
adata.raw = adata

sc.pp.highly_variable_genes(adata, min_mean=0.0125, max_mean=3, min_disp=0.5,batch_key='pan_organ')
adata = adata[:, adata.var.highly_variable]

sc.pp.scale(adata, zero_center=False)

def progress_pca_bbknn_umap_tsne(adata, n_pcs=20, batch_key='bbknn_batch'):
    ## 主成分分析降维
    sc.tl.pca(adata, svd_solver='arpack')
    # 在PCA坐标中绘制散点图
    sc.pl.pca(adata)
    # 可视化每个PC对数据总方差的贡献, 这为我们提供了有关为计算单元的邻域关系应考虑的PC数量的信息sc.tl.tsne()
    sc.pl.pca_variance_ratio(adata, log=True)

    # 使用bbknn算法处理批次效应(代替了neighbors)
    # bbknn要求数据已经过标准化和主成分分析

    adata.obs['bbknn_batch'] = [str(i)+'-'+str(j) for i,j in zip(adata.obs['dataset'], adata.obs['batch_name'])]
    sc.external.pp.bbknn(adata, batch_key=batch_key, n_pcs=n_pcs, neighbors_within_batch=3)
    
    # 计算UMAP
    sc.tl.umap(adata)
    # sc.tl.tsne(adata, n_jobs=6)

    return adata

adata = progress_pca_bbknn_umap_tsne(adata)