This script plots the E1 values and A/B compartment regions of representative liver and brain sections at different stages, and compares the results with Hi-C and histone data to validate the accuracy of the compartment calls from SpaceA data.

In [None]:
import cooler
import numpy as np
import pandas as pd
import matplotlib as mpl
import seaborn as sns
import matplotlib.pyplot as plt
from matplotlib.backends.backend_pdf import PdfPages
mpl.rcParams['pdf.fonttype'] = 42


dir_path = '/home/goubo/CRICK/CRICK/spaceA/higashi_v2/higashi/'
gc_dir = '/home/xuyuetong/CRICK_Data_v4/Split_Cluster/'
h3k27ac_dir = '/home/goubo/CRICK/CRICK/spaceA/onlineData/epi_nature_BingRen_mouseEmbryo/bed_1mb/'
save_dir = '/home/xuyuetong/CRICK_Data_v3/Paper_Fig/Lineplot_E1value/'

sample_list = ['E11.5L1', 'E12.5L6', 'E13.5C1', 'E14.5F5']
time_list = ['E11.5', 'E12.5', 'E13.5', 'E14.5']
tissue_list = ['Liver', 'Brain']


In [35]:
from scipy.interpolate import make_interp_spline

plot_path = '{0}Lineplot_E1_GC_H3K27ac_1MB.pdf'.format(save_dir)

with PdfPages(plot_path) as pdf:

    for c in range(1, 20):

        chrom_id = 'chr{0}'.format(c)
        plot_row = 2*len(time_list)+1
        plot_col = 2
        plt.figure(figsize=(6*plot_col, 2*plot_row))
        print(chrom_id)

        gc_path = '{0}mm10_gc_cov_1MB.tsv'.format(gc_dir)
        gc_cov = pd.read_csv(gc_path, header=0, index_col=None, sep='\t')
        gc_cov_chr = gc_cov.loc[gc_cov['chrom'] == chrom_id, 'GC'].values
        gc_cov_chr = gc_cov_chr[3: -3]
        for i in range(plot_col):
            ax_gc = plt.subplot(plot_row, plot_col, i+1)
            ax_gc.plot(gc_cov_chr, label='GC')
            ax_gc.set_ylabel('GC', fontsize=16)
            ax_gc.set_xticks([])
            ax_gc.set_title(chrom_id, fontsize=20)
            ax_gc.spines['right'].set_visible(False)
            ax_gc.spines['top'].set_visible(False)
            ax_gc.spines['bottom'].set_visible(False)

        for s, sample_id in enumerate(sample_list):
            for t, tissue in enumerate(tissue_list):
                e1_path = '{0}{1}/{1}_fasthigashi_leiden_anno_man_{2}.cis_eigs.csv'.format(dir_path, sample_id, tissue)
                e1_df = pd.read_csv(e1_path, header=0, index_col=0, sep=',')
                e1_chr_df = e1_df[e1_df['chrom'] == chrom_id].copy()
                e1_array = e1_chr_df['E1'].values

                plot_start_idx = plot_col+s*4+t+1
                ax1 = plt.subplot(plot_row, plot_col, plot_start_idx)
                ax1.fill_between(range(e1_chr_df.shape[0]), e1_array, 0, where=(e1_array > 0), facecolor='red', alpha=0.5)
                ax1.fill_between(range(e1_chr_df.shape[0]), e1_array, 0, where=(e1_array <= 0), facecolor='blue', alpha=0.5)
                ax1.plot(e1_array, label='E1', color='black', alpha=1, lw=0.2)
                ax1.set_ylabel('E1', fontsize=16)
                ax1.set_title('{0} {1}'.format(sample_id, tissue), fontsize=20)
                ax1.set_xticks([])
                ax1.spines['right'].set_visible(False)
                ax1.spines['top'].set_visible(False)
                ax1.spines['bottom'].set_visible(False)

                # prepare h3k27ac data
                time = sample_id[1:5]
                if tissue == 'Liver':
                    h3k27ac_path = '{0}ChIP-seq_H3K27ac_liver_e{1}.1mb.bed'.format(h3k27ac_dir, time)
                    h3k27ac_liver_df = pd.read_csv(h3k27ac_path, header=0, index_col=None, sep='\t')
                    h3k27ac_array = h3k27ac_liver_df[h3k27ac_liver_df['chrom'] == chrom_id]['mean_signal'].values
                else:
                    ax_h3k27ac = plt.subplot(plot_row, plot_col, plot_start_idx+plot_col)
                    h3k27ac_fore_path = '{0}ChIP-seq_H3K27ac_forebrain_e{1}.1mb.bed'.format(h3k27ac_dir, time)
                    h3k27ac_mid_path = '{0}ChIP-seq_H3K27ac_midbrain_e{1}.1mb.bed'.format(h3k27ac_dir, time)
                    h3k27ac_hind_path = '{0}ChIP-seq_H3K27ac_hindbrain_e{1}.1mb.bed'.format(h3k27ac_dir, time)
                    h3k27ac_fore_df = pd.read_csv(h3k27ac_fore_path, header=0, index_col=None, sep='\t')
                    h3k27ac_fore_sign = h3k27ac_fore_df[h3k27ac_fore_df['chrom'] == chrom_id]['total_signal'].values
                    h3k27ac_fore_peak = h3k27ac_fore_df[h3k27ac_fore_df['chrom'] == chrom_id]['peak_count'].values
                    h3k27ac_mid_df = pd.read_csv(h3k27ac_mid_path, header=0, index_col=None, sep='\t')
                    h3k27ac_mid_sign = h3k27ac_mid_df[h3k27ac_mid_df['chrom'] == chrom_id]['total_signal'].values
                    h3k27ac_mid_peak = h3k27ac_mid_df[h3k27ac_mid_df['chrom'] == chrom_id]['peak_count'].values
                    h3k27ac_hind_df = pd.read_csv(h3k27ac_hind_path, header=0, index_col=None, sep='\t')
                    h3k27ac_hind_sign = h3k27ac_hind_df[h3k27ac_hind_df['chrom'] == chrom_id]['total_signal'].values
                    h3k27ac_hind_peak = h3k27ac_hind_df[h3k27ac_hind_df['chrom'] == chrom_id]['peak_count'].values
                    h3k27ac_brain_sign = h3k27ac_fore_sign + h3k27ac_mid_sign + h3k27ac_hind_sign
                    h3k27ac_brain_peak = h3k27ac_fore_peak + h3k27ac_mid_peak + h3k27ac_hind_peak
                    h3k27ac_array = np.divide(h3k27ac_brain_sign, h3k27ac_brain_peak, out=np.zeros_like(h3k27ac_brain_sign, dtype=float), where=h3k27ac_brain_peak != 0)

                ax_h3k27ac = plt.subplot(plot_row, plot_col, plot_start_idx+plot_col)   
                ax_h3k27ac.plot(h3k27ac_array)
                ax_h3k27ac.set_ylabel('H3K27ac', fontsize=16)
                ax_h3k27ac.set_xticks([])
                ax_h3k27ac.set_title('{0} {1}'.format(sample_id, tissue), fontsize=20)
                ax_h3k27ac.spines['right'].set_visible(False)
                ax_h3k27ac.spines['top'].set_visible(False)
                ax_h3k27ac.spines['bottom'].set_visible(False)

        plt.subplots_adjust(hspace=0.5, wspace=0.2)
        pdf.savefig()
        plt.close()


chr1
chr2
chr3
chr4
chr5
chr6
chr7
chr8
chr9
chr10
chr11
chr12
chr13
chr14
chr15
chr16
chr17
chr18
chr19
