# Differential Peak Shape

**Created**: 24 April 2022

## Environment

In [1]:
if (!requireNamespace("limma", quietly=TRUE)) {
    BiocManager::install("limma")
}

In [2]:
library(tidyverse)
library(data.table)
library(limma)

setwd("~/eQTL_pQTL_Characterization/")

source("03_Functional_Interpretation/scripts/utils/ggplot_theme.R")

── [1mAttaching packages[22m ───────────────────────────────────────────────────────────────────────────────────────────── tidyverse 1.3.1 ──

[32m✔[39m [34mggplot2[39m 3.3.5     [32m✔[39m [34mpurrr  [39m 0.3.4
[32m✔[39m [34mtibble [39m 3.1.6     [32m✔[39m [34mdplyr  [39m 1.0.8
[32m✔[39m [34mtidyr  [39m 1.2.0     [32m✔[39m [34mstringr[39m 1.4.0
[32m✔[39m [34mreadr  [39m 2.1.1     [32m✔[39m [34mforcats[39m 0.5.1

── [1mConflicts[22m ──────────────────────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
[31m✖[39m [34mdplyr[39m::[32mfilter()[39m masks [34mstats[39m::filter()
[31m✖[39m [34mdplyr[39m::[32mlag()[39m    masks [34mstats[39m::lag()


Attaching package: ‘data.table’


The following objects are masked from ‘package:dplyr’:

    between, first, last


The following object is masked from ‘package:purrr’:

    transpose




## Load Data

In [3]:
shape.features <- fread("/nfs/users/nfs_n/nm18/gains_team282/epigenetics/regulation/immune/shape_features/consensus_shape_features.csv")

In [4]:
dim(shape.features)

In [5]:
tss.enrichment.scores <- read.table("/nfs/users/nfs_n/nm18/gains_team282/epigenetics/accessibility/analysis/atac_seq/tss_enrichment_scores.tsv", header=T)

In [6]:
head(tss.enrichment.scores)

Unnamed: 0_level_0,Sample,TSS_Enrichment_Score
Unnamed: 0_level_1,<chr>,<dbl>
1,Calderon-1001-Memory_Teffs-Control,35.71484
2,Calderon-1001-Myeloid_DCs-Control,24.90297
3,Calderon-1003-CD8pos_T-Treatment_1,24.91672
4,Calderon-1001-Regulatory_T-Control,30.79065
5,Calderon-1003-Effector_memory_CD8pos_T-Treatment_1,31.39529
6,Calderon-1001-Memory_Tregs-Control,35.20552


In [7]:
atac.meta <- read.csv("/nfs/users/nfs_n/nm18/eQTL_pQTL_Characterization/03_Functional_Interpretation/metadata/reads_atac_seq.txt")

In [8]:
head(atac.meta)

Unnamed: 0_level_0,Run,Group.ID,Sample.ID,Donor,Lineage,Cell_type,Stimulated,Treatment,Replicate,Assay.Type,⋯,Experiment,Instrument,LibraryLayout,LibrarySelection,LibrarySource,Organism,Platform,ReleaseDate,Sample.Name,SRA.Study
Unnamed: 0_level_1,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<lgl>,<chr>,<int>,<chr>,⋯,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>
1,SRR2920475,Corces-4983-Monocytes-Control,Corces-4983-Monocytes-Control-1,Corces-4983,MYELOID,Monocytes,False,Control,1,ATAC-Seq,⋯,SRX1427798,NextSeq 500,PAIRED,other,GENOMIC,Homo sapiens,ILLUMINA,2016-06-01T00:00:00Z,GSM1937385,SRP066100
2,SRR2920488,Corces-6792-Monocytes-Control,Corces-6792-Monocytes-Control-1,Corces-6792,MYELOID,Monocytes,False,Control,1,ATAC-Seq,⋯,SRX1427811,NextSeq 500,PAIRED,other,GENOMIC,Homo sapiens,ILLUMINA,2016-06-01T00:00:00Z,GSM1937398,SRP066100
3,SRR2920476,Corces-4983-Monocytes-Control,Corces-4983-Monocytes-Control-2,Corces-4983,MYELOID,Monocytes,False,Control,2,ATAC-Seq,⋯,SRX1427799,NextSeq 500,PAIRED,other,GENOMIC,Homo sapiens,ILLUMINA,2016-06-01T00:00:00Z,GSM1937386,SRP066100
4,SRR2920487,Corces-6792-Monocytes-Control,Corces-6792-Monocytes-Control-2,Corces-6792,MYELOID,Monocytes,False,Control,2,ATAC-Seq,⋯,SRX1427810,NextSeq 500,PAIRED,other,GENOMIC,Homo sapiens,ILLUMINA,2016-06-01T00:00:00Z,GSM1937397,SRP066100
5,SRR2920542,Corces-7256-Monocytes-Control,Corces-7256-Monocytes-Control-1,Corces-7256,MYELOID,Monocytes,False,Control,1,ATAC-Seq,⋯,SRX1427865,NextSeq 500,PAIRED,other,GENOMIC,Homo sapiens,ILLUMINA,2016-06-01T00:00:00Z,GSM1937452,SRP066100
6,SRR2920543,Corces-7256-Monocytes-Control,Corces-7256-Monocytes-Control-2,Corces-7256,MYELOID,Monocytes,False,Control,2,ATAC-Seq,⋯,SRX1427866,NextSeq 500,PAIRED,other,GENOMIC,Homo sapiens,ILLUMINA,2016-06-01T00:00:00Z,GSM1937453,SRP066100


In [9]:
data.mtx <- atac.meta %>%
    dplyr::select(Group.ID, Donor, Cell_type, Stimulated) %>%
    dplyr::mutate(Stimulated=ifelse(Stimulated, "Stimulated", "Control")) %>%
    dplyr::mutate(Group=interaction(Cell_type, Stimulated)) %>%
    dplyr::select(Group.ID, Donor, Group) %>%
    merge(., tss.enrichment.scores, by.x="Group.ID", by.y="Sample") %>%
    merge(., shape.features, by.x="Group.ID", by.y="Sample") %>%
    dplyr::mutate(Donor=as.character(Donor), Group=as.character(Group))

In [10]:
head(data.mtx[,1:10])

Unnamed: 0_level_0,Group.ID,Donor,Group,TSS_Enrichment_Score,1:804821-805016,1:812642-812879,1:815224-815554,1:817057-817622,1:819712-820327,1:821473-823221
Unnamed: 0_level_1,<chr>,<chr>,<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
1,Calderon-1001-Bulk_B-Control,Calderon-1001,Bulk_B.Control,24.89519,-0.006061507,0.009388944,0.006259146,0.005349278,-0.008032666,0.006808657
2,Calderon-1001-Bulk_B-Treatment_1,Calderon-1001,Bulk_B.Stimulated,26.17991,-0.006267462,0.009388944,0.005507453,0.005072293,-0.008686196,0.004151319
3,Calderon-1001-CD8pos_T-Control,Calderon-1001,CD8pos_T.Control,25.17192,-0.005631485,0.011452513,0.006208227,0.006237442,-0.008749866,0.006613037
4,Calderon-1001-CD8pos_T-Treatment_1,Calderon-1001,CD8pos_T.Stimulated,22.44896,-0.006457613,0.011891087,0.005507453,0.006234437,-0.008696625,0.004895769
5,Calderon-1001-Central_memory_CD8pos_T-Control,Calderon-1001,Central_memory_CD8pos_T.Control,27.64371,-0.006437052,0.009388944,0.005507453,0.005370726,-0.008744195,0.006477123
6,Calderon-1001-Central_memory_CD8pos_T-Treatment_1,Calderon-1001,Central_memory_CD8pos_T.Stimulated,21.34979,-0.005631485,0.014635136,0.005507453,0.006065289,-0.008054893,0.005115541


Not all cell types have both control and stimulated conditions. We need both to fit the linear model. Remove any groups that are missing either control or stimulated samples.

In [11]:
both.treatments <- (paste0(unique(atac.meta$Cell_type), ".Control") %in% data.mtx$Group) & (paste0(unique(atac.meta$Cell_type), ".Stimulated") %in% data.mtx$Group)
cell.types <- unique(atac.meta$Cell_type)[both.treatments]
data.mtx <- data.mtx[gsub("\\..*", "", data.mtx$Group) %in% cell.types,]

In [12]:
data.mtx <- data.mtx %>%
    dplyr::mutate(Donor=factor(Donor), Group=factor(Group))

group <- data.mtx$Group
donor <- data.mtx$Donor
tss.score <- data.mtx$TSS_Enrichment_Score

shape.features <- data.mtx[,-(1:4)]

## Linear Model

In [13]:
design <- model.matrix( ~ 0 + group + donor + tss.score)

In [14]:
fit <- lmFit(t(shape.features), design)

Create contrasts to compare stimulated cell types against control cell types.

In [15]:
group.contrasts <- matrix(0, nrow=ncol(design), ncol=length(cell.types))
rownames(group.contrasts) <- colnames(design)
colnames(group.contrasts) <- cell.types

In [16]:
for (cell.type in cell.types) {
    
    group.contrasts[paste0("group", cell.type, ".Control"), cell.type] = -1
    group.contrasts[paste0("group", cell.type, ".Stimulated"), cell.type] = 1
}

Run a simple linear model across all the peaks.

In [17]:
da.peaks <- rbindlist(lapply(1:ncol(group.contrasts), function(i) {
    de.res = eBayes(contrasts.fit(fit, group.contrasts[,i]))
    de.res.table = topTable(de.res, sort.by="p", n=Inf)
    
    contrast.cell.type = colnames(group.contrasts)[i]

    de.res.table %>%
        dplyr::mutate(Cell.Type=contrast.cell.type) %>%
        dplyr::mutate(Peak.ID=rownames(.)) %>%
        dplyr::mutate(Peak.Chr=sapply(strsplit(Peak.ID, ":"), function(x) { x[1] })) %>%
        dplyr::mutate(Peak.Coords=sapply(strsplit(Peak.ID, ":"), function(x) { x[2] })) %>%
        dplyr::mutate(Peak.Start=as.numeric(sapply(strsplit(Peak.Coords, "-"), function(x) { x[1] }))) %>%
        dplyr::mutate(Peak.End=as.numeric(sapply(strsplit(Peak.Coords, "-"), function(x) { x[2] }))) %>%
        dplyr::select(Cell.Type, Peak.ID, Peak.Chr, Peak.Start, Peak.End, logFC, AveExpr, t, P.Value, adj.P.Val, B)
}))

In [18]:
da.peak.set <- da.peaks %>%
    dplyr::filter(adj.P.Val < 0.01)

In [19]:
length(unique(da.peak.set$Peak.ID))

In [20]:
table(da.peak.set$Cell.Type)


                  Bulk_B                 CD8pos_T  Central_memory_CD8pos_T 
                    7751                     5951                    14640 
       Effector_CD4pos_T Effector_memory_CD8pos_T      Follicular_T_Helper 
                   10955                    12138                    11099 
           Gamma_delta_T                Mature_NK                    Mem_B 
                    8650                     7303                    10402 
            Memory_Teffs             Memory_Tregs                Monocytes 
                   14798                    11302                     4778 
                 Naive_B              Naive_CD8_T              Naive_Teffs 
                    7017                     6257                    13365 
             Naive_Tregs             Regulatory_T           Th1_precursors 
                    1658                    15571                    18368 
         Th17_precursors           Th2_precursors 
                    7362            

In [21]:
head(da.peak.set)

Cell.Type,Peak.ID,Peak.Chr,Peak.Start,Peak.End,logFC,AveExpr,t,P.Value,adj.P.Val,B
<chr>,<chr>,<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
Monocytes,20:1621268-1621665,20,1621268,1621665,-0.46757576,0.0006487178,-12.257109,9.291093000000001e-23,2.7555060000000003e-17,41.13771
Monocytes,12:109845022-109847993,12,109845022,109847993,-0.02110393,-0.0043939766,-10.51547,1.235845e-18,1.832604e-13,31.7063
Monocytes,2:156331680-156333649,2,156331680,156333649,-0.0252834,-0.0117342065,-10.349613,3.060158e-18,3.025221e-13,30.80634
Monocytes,17:28727663-28729453,17,28727663,28729453,-0.01725488,0.0134920019,-9.550207,2.394571e-16,1.775425e-11,26.48137
Monocytes,10:132570571-132572954,10,132570571,132572954,0.01682975,-0.011965604,9.4695,3.712591e-16,2.202123e-11,26.04663
Monocytes,1:151762655-151764357,1,151762655,151764357,0.01106495,-0.0057469328,9.377035,6.13264e-16,3.031313e-11,25.54913


In [26]:
write.csv(da.peak.set, "/nfs/users/nfs_n/nm18/gains_team282/epigenetics/regulation/immune/ds_peaks.csv", quote=F, row.names=F)