# ChIPseeker

In [1]:
## loading packages
suppressPackageStartupMessages({
  library(ChIPseeker)
  library(TxDb.Dmelanogaster.UCSC.dm6.ensGene)
  library(clusterProfiler)
})

In [2]:
# Load contrasted peaks from MACS2
peak <- readPeakFile("../results/diffbind/diffbind_results.tsv")
seqlevelsStyle(peak) <- "UCSC"  # e.g. "chr2R" -> "2R"

In [3]:
# Set database for genome annotation
txdb <- TxDb.Dmelanogaster.UCSC.dm6.ensGene

In [4]:
# Define TSS regions (takes ca 30sec)
promoter <- getPromoters(TxDb=txdb, upstream=3000, downstream=3000)
tagMatrix <- getTagMatrix(peak, windows=promoter)

>> preparing start_site regions by gene... 2024-08-20 14:33:24
>> preparing tag matrix...  2024-08-20 14:33:24 


“Each of the 2 combined objects has sequence levels not in the other:
  - in 'x': chrM, chrX_CP007103v1_random, chrX_CP007104v1_random, chrX_DS483648v1_random, chrX_DS483655v1_random, chrX_DS483660v1_random, chrX_DS483665v1_random, chrX_DS483666v1_random, chrX_DS483669v1_random, chrX_DS483685v1_random, chrX_DS483698v1_random, chrX_DS483745v1_random, chrX_DS483784v1_random, chrX_DS483789v1_random, chrX_DS483795v1_random, chrX_DS483803v1_random, chrX_DS483809v1_random, chrX_DS483818v1_random, chrX_DS483821v1_random, chrX_DS483843v1_random, chrX_DS483851v1_random, chrX_DS483885v1_random, chrX_DS483888v1_random, chrX_DS483892v1_random, chrX_DS483893v1_random, chrX_DS483897v1_random, chrX_DS483903v1_random, chrX_DS483905v1_random, chrX_DS483907v1_random, chrX_DS483909v1_random, chrX_DS483923v1_random, chrX_DS483926v1_random, chrX_DS483928v1_random, chrX_DS483946v1_random, chrX_DS483948v1_random, chrX_DS483950v1_random, chrX_DS483955v1_random, chrX_DS483963v1_random, chrX_DS483969v1_random, 

In [5]:
# Peak annotation
peakAnno <- annotatePeak(peak, 
    tssRegion=c(-3000, 3000),
    TxDb=txdb)

>> preparing features information...		 2024-08-20 14:33:40 
>> identifying nearest features...		 2024-08-20 14:33:40 


“Each of the 2 combined objects has sequence levels not in the other:
  - in 'x': M
  - in 'y': chrM, chrX_CP007103v1_random, chrX_CP007104v1_random, chrX_DS483648v1_random, chrX_DS483655v1_random, chrX_DS483660v1_random, chrX_DS483665v1_random, chrX_DS483666v1_random, chrX_DS483669v1_random, chrX_DS483685v1_random, chrX_DS483698v1_random, chrX_DS483745v1_random, chrX_DS483784v1_random, chrX_DS483789v1_random, chrX_DS483795v1_random, chrX_DS483803v1_random, chrX_DS483809v1_random, chrX_DS483818v1_random, chrX_DS483821v1_random, chrX_DS483843v1_random, chrX_DS483851v1_random, chrX_DS483885v1_random, chrX_DS483888v1_random, chrX_DS483892v1_random, chrX_DS483893v1_random, chrX_DS483897v1_random, chrX_DS483903v1_random, chrX_DS483905v1_random, chrX_DS483907v1_random, chrX_DS483909v1_random, chrX_DS483923v1_random, chrX_DS483926v1_random, chrX_DS483928v1_random, chrX_DS483946v1_random, chrX_DS483948v1_random, chrX_DS483950v1_random, chrX_DS483955v1_random, chrX_DS483963v1_random, chrX_DS483

>> calculating distance from peak to TSS...	 2024-08-20 14:33:40 
>> assigning genomic annotation...		 2024-08-20 14:33:40 


“Each of the 2 combined objects has sequence levels not in the other:
  - in 'x': M
  - in 'y': chrM, chrX_CP007103v1_random, chrX_CP007104v1_random, chrX_DS483648v1_random, chrX_DS483655v1_random, chrX_DS483660v1_random, chrX_DS483665v1_random, chrX_DS483666v1_random, chrX_DS483669v1_random, chrX_DS483685v1_random, chrX_DS483698v1_random, chrX_DS483745v1_random, chrX_DS483784v1_random, chrX_DS483789v1_random, chrX_DS483795v1_random, chrX_DS483803v1_random, chrX_DS483809v1_random, chrX_DS483818v1_random, chrX_DS483821v1_random, chrX_DS483843v1_random, chrX_DS483851v1_random, chrX_DS483885v1_random, chrX_DS483888v1_random, chrX_DS483892v1_random, chrX_DS483893v1_random, chrX_DS483897v1_random, chrX_DS483903v1_random, chrX_DS483905v1_random, chrX_DS483907v1_random, chrX_DS483909v1_random, chrX_DS483923v1_random, chrX_DS483926v1_random, chrX_DS483928v1_random, chrX_DS483946v1_random, chrX_DS483948v1_random, chrX_DS483950v1_random, chrX_DS483955v1_random, chrX_DS483963v1_random, chrX_DS483

>> assigning chromosome lengths			 2024-08-20 14:33:45 
>> done...					 2024-08-20 14:33:45 


In [6]:
# Save the results
peakAnno.df <- as.data.frame(peakAnno)
print(peakAnno.df[1:5,])

  seqnames    start      end width strand width.1 strand.1     Conc
1    chr3R 21662657 21663056   400      *     401        * 10.19282
2    chr2L    47259    47658   400      *     401        * 10.29213
3     chrX  3789785  3790184   400      *     401        * 10.05432
4    chr3L 16806423 16806822   400      *     401        * 10.73806
5    chr3R 23723375 23723774   400      *     401        * 11.62249
  Conc_Cph_NanoDam Conc_control      Fold      p.value          FDR
1         6.397865    11.139892 -4.664747 2.432945e-50 6.258750e-46
2        11.187563     7.453856  3.655514 3.620007e-35 4.656234e-31
3        10.900032     7.752839  3.057863 3.356041e-26 2.877805e-22
4        11.564912     8.593663  2.884171 2.430698e-25 1.563243e-21
5        12.493677     9.073190  3.318609 3.109194e-25 1.599680e-21
                                        annotation geneChr geneStart  geneEnd
1                                 Promoter (<=1kb)       4  21661186 21662972
2 Intron (FBtr0078163/FBgn00

In [7]:
# write to csv
write.csv(peakAnno.df, file = "../results/diffbind/peak_anno.csv")


# DiffBind

In [None]:
# if (!require("BiocManager", quietly = TRUE))
#     install.packages("BiocManager")

# BiocManager::install("DiffBind")

In [18]:
## loading packages
suppressPackageStartupMessages({
  library(DiffBind)
  # library(tidyverse)
})

In [19]:
samples <- read.csv('../resources/samplesheet_metadata.csv')
dbObj <- dba(sampleSheet=samples)


“incomplete final line found by readTableHeader on '../resources/samplesheet_metadata.csv'”


DS-002A5   control  2 bed

DS-002A6   Cph_NanoDam  2 bed

DS-002A7   control  1 bed

DS-002A8   Cph_NanoDam  1 bed



In [20]:
# The next step is to take the alignment files and compute count information for each of the peaks/regions in the consensus set. 
dbObj <- dba.count(dbObj, bUseSummarizeOverlaps=TRUE)

Computing summits...

Re-centering peaks...



In [35]:
# To see how well the samples cluster with one another, we can draw a PCA plot using all consensus sites.
pdf('../results/diffbind/DiffBind_pca.pdf')
dba.plotPCA(dbObj,  attributes=DBA_FACTOR, label=DBA_ID)
dev.off()

In [36]:
# We can also plot a correlation heatmap, to evaluate the relationship between samples.
pdf('../results/diffbind/DiffBind_corr_heatmap.pdf')
plot(dbObj)
dev.off()

In [28]:
# tell DiffBind which samples we want to compare to one another
print("Contrasting samples...")
dbObj <- dba.contrast(dbObj, contrast=c("Condition", "Cph_NanoDam", "control"), minMembers = 2)

[1] "Contrasting samples..."


Computing results names...



In [29]:
# Performing the differential enrichment analysis
print("Performing differential enrichment analysis...")
dbObj <- dba.analyze(dbObj, method=DBA_DESEQ2, design="~Condition")

[1] "Performing differential enrichment analysis..."


Applying Blacklist/Greylists...

No genome detected.

Normalize DESeq2 with defaults...

Setting design...

Analyzing...

gene-wise dispersion estimates

mean-dispersion relationship

final dispersion estimates



In [37]:
# Show summary
print("Differential enrichment analysis results:")
dba.show(dbObj, bContrasts=T)

[1] "Differential enrichment analysis results:"


Unnamed: 0_level_0,Factor,Group,Samples,Group2,Samples2,DB.DESeq2
Unnamed: 0_level_1,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>
1,Condition,Cph_NanoDam,2,control,2,1256


In [38]:
# MA plots are a useful way to visualize the effect of normalization on data, as well as seeing which of the data points are being identified as differentially bound.
pdf('../results/diffbind/DiffBind_MAplot_deseq.pdf')
dba.plotMA(dbObj, method=DBA_DESEQ2)
dev.off()

In [39]:
# concentrations of each sample groups plotted against each other.
pdf('../results/diffbind/DiffBind_MAplot_bXY.pdf')
dba.plotMA(dbObj, bXY=TRUE)
dev.off()

In [40]:
# If we want to see how the reads are distributed amongst the different classes of differentially bound sites and sample groups, we can use a boxplot
pdf('../results/diffbind/DiffBind_boxplot.pdf')
pvals <- dba.plotBox(dbObj)
dev.off()

In [34]:
# Extracting results
res <- dba.report(dbObj, method=DBA_DESEQ2, contrast = 1, th=1)
out <- as.data.frame(res)
write.table(out, file="../results/diffbind/diffbind_results.tsv", sep="\t", quote=F, row.names=F)