In [1]:
import numpy as np
import pandas as pd

In [2]:
!wget https://ftp.ncbi.nlm.nih.gov/geo/series/GSE201nnn/GSE201352/suppl/GSE201352_LIMA_H3K27ac_diffPeaks_overlap.tsv.gz -q
!gunzip GSE201352_LIMA_H3K27ac_diffPeaks_overlap.tsv.gz -f

In [3]:
!wget https://ftp.ncbi.nlm.nih.gov/geo/series/GSE201nnn/GSE201352/suppl/GSE201352_RAW.tar -q
!tar -xvf GSE201352_RAW.tar
!rm GSE201352_RAW.tar

GSM6061743_LIMA_ChIP_h3k27ac_THP1_WT_LPIF_S_0000_1.1.1.bw
GSM6061743_LIMA_ChIP_h3k27ac_THP1_WT_LPIF_S_0000_1.1.1_peaks.narrowPeak.gz
GSM6061744_LIMA_ChIP_h3k27ac_THP1_WT_LPIF_S_0000_2.1.1.bw
GSM6061744_LIMA_ChIP_h3k27ac_THP1_WT_LPIF_S_0000_2.1.1_peaks.narrowPeak.gz
GSM6061745_LIMA_ChIP_h3k27ac_THP1_WT_LPIF_S_0030_1.1.1.bw
GSM6061745_LIMA_ChIP_h3k27ac_THP1_WT_LPIF_S_0030_1.1.1_peaks.narrowPeak.gz
GSM6061746_LIMA_ChIP_h3k27ac_THP1_WT_LPIF_S_0030_2.1.1.bw
GSM6061746_LIMA_ChIP_h3k27ac_THP1_WT_LPIF_S_0030_2.1.1_peaks.narrowPeak.gz
GSM6061747_LIMA_ChIP_h3k27ac_THP1_WT_LPIF_S_0060_1.1.1.bw
GSM6061747_LIMA_ChIP_h3k27ac_THP1_WT_LPIF_S_0060_1.1.1_peaks.narrowPeak.gz
GSM6061748_LIMA_ChIP_h3k27ac_THP1_WT_LPIF_S_0060_2.1.1.bw
GSM6061748_LIMA_ChIP_h3k27ac_THP1_WT_LPIF_S_0060_2.1.1_peaks.narrowPeak.gz
GSM6061749_LIMA_ChIP_h3k27ac_THP1_WT_LPIF_S_0090_1.1.1.bw
GSM6061749_LIMA_ChIP_h3k27ac_THP1_WT_LPIF_S_0090_1.1.1_peaks.narrowPeak.gz
GSM6061750_LIMA_ChIP_h3k27ac_THP1_WT_LPIF_S_0090_2.1.1.bw
GSM6061750_

In [4]:
h3k27ac = pd.read_csv("GSE201352_LIMA_H3K27ac_diffPeaks_overlap.tsv", sep="\t")
h3k27ac = h3k27ac.dropna()
h3k27ac["chr"] = h3k27ac["chr"].str.replace("chr","") 

In [5]:
#Divide 0 hr and 24 hr timepoint H3K27ac values into quartiles
h3k27ac.loc[h3k27ac["m0000_VST"] <= np.quantile(h3k27ac["m0000_VST"], 0.25), "quartile_0h"] = 1
h3k27ac.loc[h3k27ac["m0000_VST"] > np.quantile(h3k27ac["m0000_VST"], 0.25), "quartile_0h"] = 2
h3k27ac.loc[h3k27ac["m0000_VST"] > np.quantile(h3k27ac["m0000_VST"], 0.5), "quartile_0h"] = 3
h3k27ac.loc[h3k27ac["m0000_VST"] > np.quantile(h3k27ac["m0000_VST"], 0.75), "quartile_0h"] = 4
h3k27ac.loc[h3k27ac["m1440_VST"] <= np.quantile(h3k27ac["m1440_VST"], 0.25), "quartile_24h"] = 1
h3k27ac.loc[h3k27ac["m1440_VST"] > np.quantile(h3k27ac["m1440_VST"], 0.25), "quartile_24h"] = 2
h3k27ac.loc[h3k27ac["m1440_VST"] > np.quantile(h3k27ac["m1440_VST"], 0.5), "quartile_24h"] = 3
h3k27ac.loc[h3k27ac["m1440_VST"] > np.quantile(h3k27ac["m1440_VST"], 0.75), "quartile_24h"] = 4

In [6]:
#Pick out most changes regions
h3k27ac["changed"] = False
h3k27ac.loc[(h3k27ac["adjusted_pval"] < 0.05) &
            (h3k27ac["quartile_0h"] < 3) &
            (h3k27ac["quartile_24h"] == 4), 
            "changed"] = "up"
h3k27ac.loc[(h3k27ac["adjusted_pval"] < 0.05) &
            (h3k27ac["quartile_0h"] == 4) &
            (h3k27ac["quartile_24h"] < 3), 
            "changed"] = "down"

In [7]:
h3k27ac.loc[(h3k27ac["changed"] == "up"), 
            ["chr", "start", "end"]].to_csv("THP1_H3K27ac_up.bed",
                                            sep="\t", header=None,index=False)
h3k27ac.loc[(h3k27ac["changed"] == "down"), 
            ["chr", "start", "end"]].to_csv("THP1_H3K27ac_down.bed",
                                            sep="\t", header=None,index=False)