### load

In [2]:
## input: 1. summarizedExperiement(SE) obj for chromVAR 2. Jaspar matrix 
## output: 1. motif x cell (z score) 2. plot: ranked           
source("./libs.R")

In [3]:
##------------------------------------------------------------
## inputs
##------------------------------------------------------------

input.umap.res <- fread("../dat/1910_v2/islet.cluster_labels.filt.txt", header = T) %>% 
    separate(cluster_name, into = c("cell_type_overall", "subtype"), remove = F)

“Expected 2 pieces. Missing pieces filled with `NA` in 772 rows [3, 50, 83, 84, 138, 181, 196, 213, 253, 265, 274, 301, 303, 353, 371, 388, 412, 434, 491, 495, ...].”

In [5]:
input.chromVar.jaspar.z <- assays(input.chromVar.res.list$dev)$z

table(input.umap.res %>% filter(barcodes %in% colnames(input.chromVar.jaspar.z)[-1]) %>% 
    pull(cell_type_overall))

table(input.umap.res %>% pull(cell_type_overall))
sum(table(input.umap.res %>% pull(cell_type_overall))%>%as.numeric)


     acinar       alpha        beta       delta      ductal endothelial 
         46        6217        7598         710          80         118 
      gamma      immune    stellate 
        260         140         128 


     acinar       alpha        beta       delta      ductal endothelial 
         46        6218        7598         710          80         118 
      gamma      immune    stellate 
        260         140         128 

###  T test (one vs. other)

In [6]:
input.chromVar.jaspar.z <- data.table(assays(input.chromVar.res.list$dev)$z, keep.rownames = T)
input.chromVar.jaspar.z[is.na(input.chromVar.jaspar.z)] <- 0
# aggregate data --------------------------------------------------------------
# melt
input.chromVar.jaspar.z.agg <- melt(input.chromVar.jaspar.z, id = "rn", variable.name = "barcodes", 
    value.name = "zval")

# add celltype
input.chromVar.jaspar.z.agg <- merge(input.chromVar.jaspar.z.agg, input.umap.res)

table(input.chromVar.jaspar.z.agg %>% pull(cell_type_overall))


     acinar       alpha        beta       delta      ductal endothelial 
      26634     3600222     4399242      411090       46320       68322 
      gamma      immune    stellate 
     150540       81060       74112 

In [7]:
input.chromVar.jaspar.z.agg <- input.chromVar.jaspar.z.agg %>%select(rn,zval,cell_type_overall)
range(input.chromVar.jaspar.z.agg$zval)
sum(!complete.cases(input.chromVar.jaspar.z.agg))
dim(input.chromVar.jaspar.z.agg)
head(input.chromVar.jaspar.z.agg, 1)
table(input.chromVar.jaspar.z.agg %>% pull(cell_type_overall))

rn,zval,cell_type_overall
<chr>,<dbl>,<chr>
MA0004.1_Arnt,-1.566904,alpha



     acinar       alpha        beta       delta      ductal endothelial 
      26634     3600222     4399242      411090       46320       68322 
      gamma      immune    stellate 
     150540       81060       74112 

In [8]:
(celltype.test.all<- input.chromVar.jaspar.z.agg%>%pull(cell_type_overall)%>%unique())
test.motifs <- input.chromVar.jaspar.z.agg%>%pull(rn)%>%unique()
length(test.motifs)


In [9]:
require(parallel)
ttest.res.ct <- do.call(rbind, lapply(celltype.test.all, function(ntest) {
    celltype.test <- c(ntest, "other")
    ttest.res <- do.call(rbind, mclapply(test.motifs, function(motif) {
        pd <- input.chromVar.jaspar.z.agg %>% filter(rn == motif) %>% mutate(cell_type_overall = ifelse(cell_type_overall == 
            ntest, ntest, "other"))
        test.res <- t.test(pd %>% filter(cell_type_overall == celltype.test[1]) %>% 
            select(zval), pd %>% filter(cell_type_overall == celltype.test[2]) %>% 
            select(zval))
        (data.frame(motif = motif, mean_x = test.res$estimate[1], mean_y = test.res$estimate[2], 
            pval = test.res$p.value/2))
    }, mc.cores = 10)) %>% mutate(test = paste0(ntest, ".vs.other"))
})) %>% group_by(test) %>% mutate(FDR = p.adjust(pval, "BH"), padj = p.adjust(pval, 
    "bonferroni"))
head(ttest.res.ct, 1)

motif,mean_x,mean_y,pval,test,FDR,padj
<fct>,<dbl>,<dbl>,<dbl>,<chr>,<dbl>,<dbl>
MA0004.1_Arnt,0.1574304,-0.1897245,8.385327e-81,alpha.vs.other,2.4770939999999998e-80,4.855104e-78


In [15]:
head(ttest.res.ct, 1)%>% separate(test,into = c("x","y"),sep = ".vs.",remove = F)
ttest.res.ct<- ttest.res.ct%>% separate(test,into = c("x","y"),sep = ".vs.",remove = F)

motif,mean_x,mean_y,pval,test,x,y,FDR,padj
<fct>,<dbl>,<dbl>,<dbl>,<chr>,<chr>,<chr>,<dbl>,<dbl>
MA0004.1_Arnt,0.1574304,-0.1897245,8.385327e-81,alpha.vs.other,alpha,other,2.4770939999999998e-80,4.855104e-78


### Motif db

In [16]:
tfclass.db.dic <- readRDS("~/github/atacMotif/db/dic_jaspar_tfclass.rds")
tfclass.db <- readRDS('~/github/atacMotif/db/tfclass.rds')
#str(tfclass.db)

#### add tf family

In [18]:
ttest.res.ct %>% dim
ttest.res.2 <- ttest.res.ct %>% separate(motif, into = c("jaspar.id", "motif"), sep = "_") %>% 
    left_join(tfclass.db.dic$merged %>% select(family.id, family.name, jaspar.name)%>%unique, 
        by = c(motif = "jaspar.name"))
sum(is.na(ttest.res.2$family.id)) 
tmp <- ttest.res.2[is.na(ttest.res.2$family.id), ]
ttest.res.2[is.na(ttest.res.2$family.id), ] %>% head(2)
ttest.res.2 %>% dim
tmp %>% ungroup %>% select(jaspar.id, motif) %>% unique

## rescue NR2F1
## https://github.com/epigen-UCSD/atacMotif/blob/master/db/rescue_Jaspar.txt
data.frame(motif = c("NR2F1","NR2F2"), subfamily.id = "2.1.3.5", stringsAsFactors = F) %>% left_join(tfclass.db.dic$merged %>% 
    select(subfamily.id, family.id, family.name)%>%unique)
ttest.res.2[ttest.res.2$motif=='NR2F1',c("family.id","family.name")] <- data.frame(motif = "NR2F1", subfamily.id = "2.1.3.5", stringsAsFactors = F) %>% left_join(tfclass.db.dic$merged %>% 
    select(subfamily.id, family.id, family.name)%>%unique)%>%select(family.id,family.name)
ttest.res.2[ttest.res.2$motif=='NR2F2',c("family.id","family.name")] <- data.frame(motif = "NR2F2", subfamily.id = "2.1.3.5", stringsAsFactors = F) %>% left_join(tfclass.db.dic$merged %>% 
    select(subfamily.id, family.id, family.name)%>%unique)%>%select(family.id,family.name)

ttest.res.2[ttest.res.2$motif=='NR2F1',]%>%head(1)

jaspar.id,motif,mean_x,mean_y,pval,test,x,y,FDR,padj,family.id,family.name
<chr>,<chr>,<dbl>,<dbl>,<dbl>,<chr>,<chr>,<chr>,<dbl>,<dbl>,<chr>,<fct>
MA0089.1,MAFG::NFE2L1,-0.2499156,0.1596027,1.594834e-67,alpha.vs.other,alpha,other,4.294924e-67,9.234087e-65,,
MA0109.1,HLTF,0.3424834,-0.2240647,1.524367e-236,alpha.vs.other,alpha,other,7.5436619999999996e-236,8.826084e-234,,


jaspar.id,motif
<chr>,<chr>
MA0089.1,MAFG::NFE2L1
MA0109.1,HLTF
MA0111.1,Spz1
MA0619.1,LIN54
MA0621.1,mix-a
MA0637.1,CENPB
MA0017.2,NR2F1
MA1111.1,NR2F2


Joining, by = "subfamily.id"


motif,subfamily.id,family.id,family.name
<chr>,<chr>,<chr>,<fct>
NR2F1,2.1.3.5,2.1.3,RXR-related receptors
NR2F2,2.1.3.5,2.1.3,RXR-related receptors


Joining, by = "subfamily.id"
Joining, by = "subfamily.id"


jaspar.id,motif,mean_x,mean_y,pval,test,x,y,FDR,padj,family.id,family.name
<chr>,<chr>,<dbl>,<dbl>,<dbl>,<chr>,<chr>,<chr>,<dbl>,<dbl>,<chr>,<fct>
MA0017.2,NR2F1,0.08727388,-0.1074267,2.4809029999999998e-26,alpha.vs.other,alpha,other,4.694257e-26,1.4364430000000003e-23,2.1.3,RXR-related receptors


#### add tf class

In [20]:
ttest.res.2 %>% dim
ttest.res.2 %>% filter(is.na(family.id)) %>% ungroup %>% select(motif, family.id) %>% 
    unique
ttest.res.2 <- ttest.res.2 %>% mutate(class.id = sub(".[0-9]+$", "", family.id)) %>% 
    left_join(tfclass.db$class %>% select(-about), by = c(class.id = "id"))%>% rename(class.name = name) 
ttest.res.2 %>% dim
ttest.res.2 %>% head(1)
ttest.res.2 %>% filter(is.na(class.id)) %>% ungroup %>% select(motif, family.id, 
    class.id) %>% unique

motif,family.id
<chr>,<chr>
MAFG::NFE2L1,
HLTF,
Spz1,
LIN54,
mix-a,
CENPB,


jaspar.id,motif,mean_x,mean_y,pval,test,x,y,FDR,padj,family.id,family.name,class.id,class.name
<chr>,<chr>,<dbl>,<dbl>,<dbl>,<chr>,<chr>,<chr>,<dbl>,<dbl>,<chr>,<fct>,<chr>,<chr>
MA0004.1,Arnt,0.1574304,-0.1897245,8.385327e-81,alpha.vs.other,alpha,other,2.4770939999999998e-80,4.855104e-78,1.2.5,PAS,1.2,Basic helix-loop-helix factors (bHLH)


motif,family.id,class.id
<chr>,<chr>,<chr>
MAFG::NFE2L1,,
HLTF,,
Spz1,,
LIN54,,
mix-a,,
CENPB,,


#### add TF superclass

In [21]:
ttest.res.2 %>% dim
ttest.res.2 <- ttest.res.2 %>% mutate(superclass.id = sub(".[0-9]+$", "", class.id)) %>% 
    left_join(tfclass.db$superclass %>% select(-about), by = c(superclass.id = "id")) %>% 
    rename(superclass.name = "name")
ttest.res.2 %>% dim
ttest.res.2 %>% head(1)
ttest.res.2 %>% filter(is.na(superclass.id)) %>% ungroup %>% select(motif, family.id, 
    class.id,superclass.id) %>% unique

jaspar.id,motif,mean_x,mean_y,pval,test,x,y,FDR,padj,family.id,family.name,class.id,class.name,superclass.id,superclass.name
<chr>,<chr>,<dbl>,<dbl>,<dbl>,<chr>,<chr>,<chr>,<dbl>,<dbl>,<chr>,<fct>,<chr>,<chr>,<chr>,<chr>
MA0004.1,Arnt,0.1574304,-0.1897245,8.385327e-81,alpha.vs.other,alpha,other,2.4770939999999998e-80,4.855104e-78,1.2.5,PAS,1.2,Basic helix-loop-helix factors (bHLH),1,Basic domains


motif,family.id,class.id,superclass.id
<chr>,<chr>,<chr>,<chr>
MAFG::NFE2L1,,,
HLTF,,,
Spz1,,,
LIN54,,,
mix-a,,,
CENPB,,,


#### save data 

In [22]:
(ttest.res.2 %>% head(1) %>% mutate(enrichedIn = ifelse(mean_x > mean_y, x, y)))[, 
    c(1:8, 17, 9:16)]
ttest.res.2 <- (ttest.res.2 %>% mutate(enrichedIn = ifelse(mean_x > mean_y, x, y)))[, 
    c(1:8, 17, 9:16)]

jaspar.id,motif,mean_x,mean_y,pval,test,x,y,enrichedIn,FDR,padj,family.id,family.name,class.id,class.name,superclass.id,superclass.name
<chr>,<chr>,<dbl>,<dbl>,<dbl>,<chr>,<chr>,<chr>,<chr>,<dbl>,<dbl>,<chr>,<fct>,<chr>,<chr>,<chr>,<chr>
MA0004.1,Arnt,0.1574304,-0.1897245,8.385327e-81,alpha.vs.other,alpha,other,alpha,2.4770939999999998e-80,4.855104e-78,1.2.5,PAS,1.2,Basic helix-loop-helix factors (bHLH),1,Basic domains


In [23]:
ttest.res.list <- lapply(paste0(celltype.test.all, ".vs.other"), function(ntest) {
    ttest.res.2 %>% filter(test == ntest) %>% group_by(test) %>% arrange(desc(mean_x))
    
})
names(ttest.res.list) <- paste0(celltype.test.all, ".vs.other")
require(writexl)
write_xlsx(ttest.res.list, "../figures/Fig1/subfigs/fig1E_one_vs_other.xlsx")
system("open ../figures/Fig1/subfigs/fig1E_one_vs_other.xlsx")
fwrite(ttest.res.2 %>% group_by(test) %>% arrange(desc(mean_x)),  "../figures/Fig1/subfigs/fig1E_one_vs_other.csv")

Loading required package: writexl
