In [None]:
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import numpy as np
import pandas as pd
import scanpy as sc
import episcanpy.api as epi
import anndata
import bbknn
import os
from scipy import sparse
import matplotlib.pyplot as plt
from scanpy_base_moudle_update2 import *
import scrublet as scr

sc.settings.verbosity = 3
#sc.logging.print_versions()
# 设置图片的分辨率以及其他样式
sc.settings.set_figure_params(dpi=150, figsize = (4, 3), fontsize=12)

import matplotlib.font_manager
flist = matplotlib.font_manager.get_fontconfig_fonts()
names = [matplotlib.font_manager.FontProperties(fname=fname).get_name() for fname in flist]
print(names)

params={
        #'font.style':'italic',
        'font.weight':'normal',    #or 'blod'
        }
plt.rcParams.update(params)

plt.rcParams['font.family']='Arial'

In [None]:
adata = sc.read('/mnt/data2/Datasets/ATAC_data/Cell2021_human_adult_multiple_organs_scATAC/dataset_output/adata_epi_raw.h5ad')
adata

In [None]:
# 去除基因间区的var
var_index = adata.var.loc[~adata.var["transcript_annotation"].isin(['intergenic']), :].index
adata = adata[:, var_index]
adata

In [None]:
adata.raw = adata

In [None]:
epi.pp.binarize(adata)
adata

In [None]:
epi.pp.normalize_total(adata)

In [None]:
min_score_value = 0.515
nb_feature_selected = 100000
epi.pl.variability_features(adata,log=None,
                     min_score=min_score_value, nb_features=nb_feature_selected,
                     #save='variability_features_plot_bonemarrow_peakmatrix.png'
                           )

epi.pl.variability_features(adata,log='log10',
                     min_score=min_score_value, nb_features=nb_feature_selected,
                     #save='variability_features_plot_bonemarrow_peakmatrix_log10.png'
                           )

In [None]:
min_score_value = 0.515
nb_feature_selected = 100000
epi.pl.variability_features(adata,log=None,
                     min_score=min_score_value, nb_features=nb_feature_selected,
                     #save='variability_features_plot_bonemarrow_peakmatrix.png'
                           )

epi.pl.variability_features(adata,log='log10',
                     min_score=min_score_value, nb_features=nb_feature_selected,
                     #save='variability_features_plot_bonemarrow_peakmatrix_log10.png'
                           )

In [None]:
# create a new AnnData containing only the most variable features
adata = epi.pp.select_var_feature(adata,
                                  nb_features=nb_feature_selected,
                                  show=False,
                                  copy=True)

adata

In [None]:
import harmonypy as hm

def progress_pca_harmony_umap(adata):
    sc.tl.pca(adata, svd_solver='arpack')
    sc.pl.pca(adata)
    sc.pl.pca_variance_ratio(adata, log=True)

    # Harmony处理批次效应
    harmony_out = hm.run_harmony(adata.obsm['X_pca'], adata.obs, vars_use = ['batch_name'], max_iter_harmony=20)
    adata.obsm['X_pca_harmony'] = harmony_out.Z_corr.T
    sc.pp.neighbors(adata, n_neighbors=10, use_rep='X_pca_harmony', n_pcs=20)
    
    # 计算UMAP
    sc.tl.umap(adata)
    # sc.tl.tsne(adata, n_jobs=6)

    return adata

adata = progress_pca_harmony_umap(adata)

In [None]:
sc.tl.leiden(adata, resolution=1)
adata.obs['leiden-epi-1'] = adata.obs['leiden']
sc.tl.leiden(adata, resolution=2)
adata.obs['leiden-epi-2'] = adata.obs['leiden']

In [None]:
sc.settings.set_figure_params(dpi=150, figsize = (4, 3), fontsize=10)
sc.pl.umap(adata, color=['leiden-epi-1'], add_outline=True,
           palette=sc.pl.palettes.vega_20_scanpy, outline_width = (0.2, 0.05), frameon=False, legend_loc='on data')
sc.pl.umap(adata, color=['organ'], add_outline=True,
           palette=sc.pl.palettes.vega_20_scanpy, outline_width = (0.2, 0.05), frameon=False)
sc.pl.umap(adata, color=['leiden-epi-2'], add_outline=True,
           palette=sc.pl.palettes.vega_20_scanpy, outline_width = (0.2, 0.05), frameon=False, legend_loc='on data')

In [None]:
from matplotlib import cm, colors
colors2 = plt.cm.plasma(np.linspace(0, 1, 128))
colors3 = plt.cm.Greys_r(np.linspace(0.7,0.8,1))
colorsComb = np.vstack([colors3, colors2])
mymap = colors.LinearSegmentedColormap.from_list('my_colormap', colorsComb)

In [None]:
sc.pl.umap(adata,color=['chr8_7055000_7060000','chr8_6920000_6925000','chr8_6925000_6930000','chr5_98770000_98775000','chr10_128125000_128130000','chr5_160185000_160190000','chr16_25215000_25220000'],color_map=mymap, use_raw=True, frameon=False)

In [None]:
obs_index = adata.obs.loc[adata.obs["organ"].isin(['Ileum']), :].index
adata_ileum = adata[obs_index,:]
adata_ileum

In [None]:
obs_index = adata.obs.loc[adata.obs["organ"].isin(['Colon']), :].index
adata_colon = adata[obs_index,:]
adata_colon

In [None]:
sc.pl.umap(adata_ileum,color=['chr8_7055000_7060000','chr8_6920000_6925000','chr8_6925000_6930000','chr12_100490000_100495000','chr12_47940000_47945000'],frameon=False, size=5.0,color_map=mymap, use_raw=True)

In [None]:
sc.pl.umap(adata_ileum,color=['chr8_7055000_7060000'],frameon=False, size=5.0,color_map=mymap, use_raw=True,vmin=0,vmax=6)

In [None]:
sc.pl.umap(adata_colon,color=['chr8_7055000_7060000','chr8_6920000_6925000','chr8_6925000_6930000','chr12_100490000_100495000','chr12_47940000_47945000'],frameon=False, size=5.0,color_map=mymap, use_raw=True)

In [None]:
sc.pl.umap(adata_colon,color=['chr8_7055000_7060000'],frameon=False, size=5.0,color_map=mymap, use_raw=True,vmin=0,vmax=6)

In [None]:
epi.tl.rank_features(adata, 'leiden-epi-1', omic='ATAC')

In [None]:
epi.pl.rank_feat_groups(adata, feature_symbols='transcript_annotation')

In [None]:
adata.write('/mnt/data2/Datasets/ATAC_data/Cell2021_human_adult_multiple_organs_scATAC/dataset_output/adata_epi_umap_20211229.h5ad')

In [None]:
adata = sc.read('/mnt/data2/Datasets/ATAC_data/Cell2021_human_adult_multiple_organs_scATAC/dataset_output/adata_epi_umap_20211229.h5ad')
adata

In [None]:
sc.settings.set_figure_params(dpi=150, figsize = (4, 4), fontsize=10)
sc.pl.umap(adata, color=['leiden-epi-1'], add_outline=True,
           palette=sc.pl.palettes.vega_20_scanpy, outline_width = (0.2, 0.05), frameon=False, legend_loc='on data',title='')
sc.pl.umap(adata, color=['organ'], add_outline=True,
           palette=sc.pl.palettes.vega_20_scanpy, outline_width = (0.2, 0.05), frameon=False,title='')

In [None]:
new_colors = np.array(adata.uns['organ_colors'])

new_colors[[0]] = '#31bf73' 
new_colors[[1]] = '#fa56a3' 

adata.uns['organ_colors'] = new_colors
sc.settings.set_figure_params(dpi=400, figsize = (4, 4), fontsize=10)
sc.pl.umap(adata, color=['organ'],frameon=False,title='',size=5)

**SI**

In [None]:
obs_index = adata.obs.loc[adata.obs["organ"].isin(['Ileum']), :].index
adata_ileum = adata[obs_index,:]
adata_ileum

In [None]:
from matplotlib import cm, colors
colors2 = plt.cm.plasma(np.linspace(0, 1, 128))
colors3 = plt.cm.Greys_r(np.linspace(0.7,0.8,1))
colorsComb = np.vstack([colors3, colors2])
mymap = colors.LinearSegmentedColormap.from_list('my_colormap', colorsComb)

In [None]:
SI_var_list = ['chr7_75810000_75815000','chr7_75815000_75820000','chr7_75820000_75825000','chr7_75825000_75830000', # CCL24
            'chr12_69345000_69350000','chr12_69350000_69355000',  # LYZ
            'chr8_7055000_7060000', # DEFA5
            'chr8_6920000_6925000','chr8_6925000_6930000', # DEFA6
            'chr7_142765000_142770000','chr7_142770000_142775000', # PRSS2
            'chr2_79155000_79160000','chr2_79160000_79165000', # REG3A
            'chr2_79020000_79025000','chr2_79025000_79030000', # REG3G
            # DMBT1
            'chr10_122555000_122560000','chr10_122560000_122565000','chr10_122565000_122570000','chr10_122570000_122575000','chr10_122575000_122580000','chr10_122585000_122590000','chr10_122590000_122595000','chr10_122595000_122600000','chr10_122615000_122620000','chr10_122620000_122625000','chr10_122625000_122630000','chr10_122630000_122635000','chr10_122635000_122640000','chr10_122640000_122645000',
            # GBP1
            'chr1_89050000_89055000','chr1_89055000_89060000','chr1_89060000_89065000','chr1_89065000_89070000',
            # LEAP2
            'chr5_132870000_132875000']

In [None]:
sc.settings.set_figure_params(dpi=150, figsize = (4, 4), fontsize=10)
sc.pl.umap(adata_ileum,color=SI_var_list,frameon=False, size=15,color_map=mymap, use_raw=True,vmin=0)

In [None]:
# LI
LI_var_list = [# WFDC2
            'chr20_45465000_45470000','chr20_45470000_45475000','chr20_45475000_45480000','chr20_45480000_45485000',
            # LYPD8
            'chr1_248735000_248740000','chr1_248740000_248745000','chr1_248745000_248750000','chr1_248750000_248755000','chr1_248755000_248760000',
            # ADM
            'chr11_10300000_10305000','chr11_10305000_10310000',
            # DEFB1
            'chr8_6870000_6875000','chr8_6875000_6880000',
            # PI3
            'chr20_45170000_45175000','chr20_45175000_45180000',
            # CCL20
            'chr2_227800000_227805000','chr2_227805000_227810000','chr2_227810000_227815000','chr2_227815000_227820000',
            # CXCL2
            'chr4_74095000_74100000','chr4_74100000_74105000']

In [None]:
sc.settings.set_figure_params(dpi=150, figsize = (4, 4), fontsize=10)
sc.pl.umap(adata_ileum,color=LI_var_list,frameon=False, size=15,color_map=mymap, use_raw=True,vmin=0)

**LI**

In [None]:
obs_index = adata.obs.loc[adata.obs["organ"].isin(['Colon']), :].index
adata_colon = adata[obs_index,:]
adata_colon

In [None]:
SI_var_list = ['chr7_75810000_75815000','chr7_75815000_75820000','chr7_75820000_75825000','chr7_75825000_75830000', # CCL24
            'chr12_69345000_69350000','chr12_69350000_69355000',  # LYZ
            'chr8_7055000_7060000', # DEFA5
            'chr8_6920000_6925000','chr8_6925000_6930000', # DEFA6
            'chr7_142765000_142770000','chr7_142770000_142775000', # PRSS2
            'chr2_79155000_79160000','chr2_79160000_79165000', # REG3A
            'chr2_79020000_79025000','chr2_79025000_79030000', # REG3G
            # DMBT1
            'chr10_122555000_122560000','chr10_122560000_122565000','chr10_122565000_122570000','chr10_122570000_122575000','chr10_122575000_122580000','chr10_122585000_122590000','chr10_122590000_122595000','chr10_122595000_122600000','chr10_122615000_122620000','chr10_122620000_122625000','chr10_122625000_122630000','chr10_122630000_122635000','chr10_122635000_122640000','chr10_122640000_122645000',
            # GBP1
            'chr1_89050000_89055000','chr1_89055000_89060000','chr1_89060000_89065000','chr1_89065000_89070000',
            # LEAP2
            'chr5_132870000_132875000']

In [None]:
sc.settings.set_figure_params(dpi=150, figsize = (4, 4), fontsize=10)
sc.pl.umap(adata_colon,color=SI_var_list,frameon=False, size=10,color_map=mymap, use_raw=True,vmin=0)

In [None]:
# LI
LI_var_list = [# WFDC2
            'chr20_45465000_45470000','chr20_45470000_45475000','chr20_45475000_45480000','chr20_45480000_45485000',
            # LYPD8
            'chr1_248735000_248740000','chr1_248740000_248745000','chr1_248745000_248750000','chr1_248750000_248755000','chr1_248755000_248760000',
            # ADM
            'chr11_10300000_10305000','chr11_10305000_10310000',
            # DEFB1
            'chr8_6870000_6875000','chr8_6875000_6880000',
            # PI3
            'chr20_45170000_45175000','chr20_45175000_45180000',
            # CCL20
            'chr2_227800000_227805000','chr2_227805000_227810000','chr2_227810000_227815000','chr2_227815000_227820000',
            # CXCL2
            'chr4_74095000_74100000','chr4_74100000_74105000']

In [None]:
sc.settings.set_figure_params(dpi=150, figsize = (4, 4), fontsize=10)
sc.pl.umap(adata_colon,color=LI_var_list,frameon=False, size=10,color_map=mymap, use_raw=True,vmin=0)

# Figure4D

**SI**: DEFA5/6: 'chr8_7055000_7060000', # DEFA5 // 'chr8_6920000_6925000','chr8_6925000_6930000', # DEFA6

**LI**: WFDC2/ADM: 'chr20_45465000_45470000','chr20_45470000_45475000','chr20_45475000_45480000','chr20_45480000_45485000',# WFDC2 // 'chr11_10300000_10305000','chr11_10305000_10310000', # ADM

In [None]:
sc.settings.set_figure_params(dpi=400, figsize = (4, 4), fontsize=10)
# DEFA5
sc.pl.umap(adata_ileum,color=['chr8_7055000_7060000'],frameon=False, size=15.0,color_map=mymap, use_raw=True,vmin=0,vmax=6,title='')
sc.pl.umap(adata_colon,color=['chr8_7055000_7060000'],frameon=False, size=15.0,color_map=mymap, use_raw=True,vmin=0,vmax=6,title='')

In [None]:
from matplotlib import cm, colors
colors2 = plt.cm.plasma(np.linspace(0, 1, 128))
colors3 = plt.cm.Greys_r(np.linspace(0.7,0.8,1))
colorsComb = np.vstack([colors3, colors2])
mymap = colors.LinearSegmentedColormap.from_list('my_colormap', colorsComb)

In [None]:
# DEFA6
sc.pl.umap(adata_ileum,color=['chr8_6920000_6925000','chr8_6925000_6930000'],frameon=False, size=15.0,color_map=mymap, use_raw=True,vmin=0,vmax=6,title=['',''])
sc.pl.umap(adata_colon,color=['chr8_6920000_6925000','chr8_6925000_6930000'],frameon=False, size=15.0,color_map=mymap, use_raw=True,vmin=0,vmax=6,title=['',''])

In [None]:
# WFDC2
sc.pl.umap(adata_ileum,color=['chr20_45465000_45470000','chr20_45470000_45475000','chr20_45475000_45480000','chr20_45480000_45485000'],frameon=False, size=15.0,color_map=mymap, use_raw=True,vmin=0,vmax=6)
sc.pl.umap(adata_colon,color=['chr20_45465000_45470000','chr20_45470000_45475000','chr20_45475000_45480000','chr20_45480000_45485000'],frameon=False, size=15.0,color_map=mymap, use_raw=True,vmin=0,vmax=6)

In [None]:
# Stem
E01_index = adata.obs.loc[adata.obs["leiden-all-1"].isin(['10']), :].index
# TA
E02_index = adata.obs.loc[adata.obs["leiden-all-1"].isin(['6','12','18']), :].index

In [None]:
adata.obs['cluster_figure1A'] = 'C01-E01-Stem cells'
adata.obs.at[E02_index,'cluster_figure1A']='C01-E02-TA'
adata.obs.at[E03_index,'cluster_figure1A']='C01-E03-Immature enterocytes'

In [None]:
adata = sc.read_10x_h5('/mnt/data2/Datasets/Test_datasets/Signac/PBMC/atac_v1_pbmc_10k_filtered_peak_bc_matrix.h5')
adata

In [None]:
/mnt/data2/Datasets/Test_datasets/Signac/PBMC/atac_v1_pbmc_10k_singlecell.csv