### load

In [2]:
## input: 1. summarizedExperiement(SE) obj for chromVAR 2. Jaspar matrix 
## output: 1. motif x cell (z score) 2. plot: ranked           
source("./libs.R")

In [3]:
##------------------------------------------------------------
## inputs
##------------------------------------------------------------

input.chromVar.res.list <- readRDS(file = "../dat/output.jaspar.dev.res.Rdata")
input.chromVar.jaspar.z <- assays(input.chromVar.res.list$dev)$z
input.umap.res <- fread("../dat/1908/Islet_123.MNN_corrected.cluster_labels.filt.txt", 
    header = T) %>% separate(cluster, into = c("cell_type_overall", "subtype"), remove = F)


“Expected 2 pieces. Missing pieces filled with `NA` in 600 rows [5, 18, 19, 50, 81, 114, 128, 147, 169, 175, 176, 190, 209, 250, 260, 345, 353, 376, 389, 413, ...].”

In [4]:
input.umap.res%>%head(1)

barcodes,UMAP1,UMAP2,cluster,cell_type_overall,subtype,log10_n_counts,log10_n_peaks,Islet1,Islet2,Islet3
<chr>,<dbl>,<dbl>,<chr>,<chr>,<chr>,<dbl>,<dbl>,<int>,<int>,<int>
Islet3-fresh_CTGAAGCTTGCAGCTACTCTCTATTATAGCCT,6.029433,-3.429818,beta_2,beta,2,4.520863,4.152013,0,0,1


In [5]:

input.chromVar.jaspar.z <- assays(input.chromVar.res.list$dev)$z

table(input.umap.res %>% filter(barcodes %in% colnames(input.chromVar.jaspar.z)[-1]) %>% 
    pull(cell_type_overall))

table(input.umap.res %>% pull(cell_type_overall))


      alpha        beta       delta endothelial    exocrine       gamma 
       5535        7108         709         136         113         205 
      glial      immune    stellate 
         34          58         134 


      alpha        beta       delta endothelial    exocrine       gamma 
       5594        7170         718         157         131         206 
      glial      immune    stellate 
         39          71         153 

###  T test (sub vs. sub)

In [6]:
input.chromVar.jaspar.z <- data.table(assays(input.chromVar.res.list$dev)$z, keep.rownames = T)

# aggregate data --------------------------------------------------------------
# melt
input.chromVar.jaspar.z.agg <- melt(input.chromVar.jaspar.z, id = "rn", variable.name = "barcodes", 
    value.name = "zval")

# add celltype
input.chromVar.jaspar.z.agg <- merge(input.chromVar.jaspar.z.agg, input.umap.res)

table(input.chromVar.jaspar.z.agg%>%pull(cell_type_overall))


      alpha        beta       delta endothelial    exocrine       gamma 
    2136510     2744074      273674       52496       43618       79130 
      glial      immune    stellate 
      13124       22388       51724 

In [7]:
input.chromVar.jaspar.z.agg %>% dim
input.chromVar.jaspar.z.agg %>% head(1)
input.chromVar.jaspar.z.agg %>% filter(cell_type_overall %in% c("alpha", "beta", 
    "delta")) %>% dim
input.chromVar.jaspar.z.agg %>% filter(cell_type_overall %in% c("alpha", "beta", 
    "delta")) %>% pull(cluster) %>% table
input.chromVar.jaspar.z.agg %>% filter(cell_type_overall %in% c("alpha", "beta", 
    "delta")) %>% select(barcodes, cluster) %>% unique %>% pull(cluster) %>% table
input.chromVar.jaspar.z.agg <- input.chromVar.jaspar.z.agg %>% filter(cell_type_overall %in% c("alpha", "beta", 
    "delta")) 

barcodes,rn,zval,UMAP1,UMAP2,cluster,cell_type_overall,subtype,log10_n_counts,log10_n_peaks,Islet1,Islet2,Islet3
<chr>,<chr>,<dbl>,<dbl>,<dbl>,<chr>,<chr>,<chr>,<dbl>,<dbl>,<int>,<int>,<int>
Islet1-fresh_AGACACCTAAGAGGCAAAGGAGTACCTAT,MA0025.1_NFIL3,-0.3430543,6.975175,-3.232226,beta_1,beta,1,3.694693,3.384174,1,0,0


.
alpha_1 alpha_2  beta_1  beta_2 delta_1 delta_2 
1639342  497168 1676398 1067676  260550   13124 

.
alpha_1 alpha_2  beta_1  beta_2 delta_1 delta_2 
   4247    1288    4343    2766     675      34 

In [8]:
input.chromVar.jaspar.z.agg <- input.chromVar.jaspar.z.agg %>% select(rn, zval, cluster, 
    cell_type_overall)
range(input.chromVar.jaspar.z.agg$zval)
sum(!complete.cases(input.chromVar.jaspar.z.agg))
dim(input.chromVar.jaspar.z.agg)
head(input.chromVar.jaspar.z.agg, 1)
table(input.chromVar.jaspar.z.agg %>% pull(cluster))

rn,zval,cluster,cell_type_overall
<chr>,<dbl>,<chr>,<chr>
MA0025.1_NFIL3,-0.3430543,beta_1,beta



alpha_1 alpha_2  beta_1  beta_2 delta_1 delta_2 
1639342  497168 1676398 1067676  260550   13124 

In [9]:
(test.all <- list(alpha = c("alpha_1", "alpha_2"), beta = c("beta_1", "beta_2"), delta = c("delta_1", 
    "delta_2")))
test.motifs <- input.chromVar.jaspar.z.agg %>% pull(rn) %>% unique()
length(test.motifs)

In [10]:
ntest=names(test.all)[1]
celltype.test <- test.all[[ntest]]
paste(celltype.test,collapse = '.vs.')

In [30]:
require(parallel)

ttest.res.ct <- do.call(rbind, lapply(names(test.all), function(ntest) {
    celltype.test <- test.all[[ntest]]
    ttest.res <- do.call(rbind, mclapply(test.motifs, function(motif) {
        pd <- input.chromVar.jaspar.z.agg %>% filter(rn == motif) %>% mutate(cluster %in% 
            celltype.test)
        test.res <- t.test(pd %>% filter(cluster == celltype.test[1]) %>% select(zval), 
            pd %>% filter(cluster == celltype.test[2]) %>% select(zval))
        (data.frame(motif = motif, mean_x = test.res$estimate[1], mean_y = test.res$estimate[2], 
            pval = test.res$p.value/2))
    }, mc.cores = 10)) %>% mutate(test = paste(celltype.test, collapse = ".vs."))
})) %>% separate(test, into = c("x", "y"), sep = ".vs.", remove = F) %>% mutate(enrichedIn = ifelse(mean_x > 
    mean_y, x, y)) %>% group_by(test) %>% mutate(FDR = p.adjust(pval, "BH"), padj = p.adjust(pval, 
    "bonferroni"))
head(ttest.res.ct, 1)

motif,mean_x,mean_y,pval,test,x,y,enrichedIn,FDR,padj
<fct>,<dbl>,<dbl>,<dbl>,<chr>,<chr>,<chr>,<chr>,<dbl>,<dbl>
MA0025.1_NFIL3,-0.2750475,-0.1587938,0.0004149447,alpha_1.vs.alpha_2,alpha_1,alpha_2,alpha_2,0.0005286094,0.1601686


In [31]:
(celltype.test.all <- ttest.res.ct%>%pull(test)%>%unique)
(ttest.res.ct %>% separate(motif, into = c("jaspar.id", "motif"), sep = "_") %>% 
    head(1) %>% left_join(fread("~/Dropbox (UCSD_Epigenomics)/workReports/2019-10_islet_rev/fig1E_one_vs_other.csv") %>% 
    select(jaspar.id, family.id, family.name, class.id, class.name, superclass.id, 
        superclass.name) %>% unique))[, c(2:11, 1, 12:17)]
ttest.res.ct <- (ttest.res.ct %>% separate(motif, into = c("jaspar.id", "motif"), 
    sep = "_") %>% left_join(fread("~/Dropbox (UCSD_Epigenomics)/workReports/2019-10_islet_rev/fig1E_one_vs_other.csv") %>% 
    select(jaspar.id, family.id, family.name, class.id, class.name, superclass.id, 
        superclass.name) %>% unique))[, c(2:11, 1, 12:17)] %>% group_by(test) %>% 
    arrange(enrichedIn, pval, desc(mean_x))

Joining, by = "jaspar.id"


motif,mean_x,mean_y,pval,test,x,y,enrichedIn,FDR,padj,jaspar.id,family.id,family.name,class.id,class.name,superclass.id,superclass.name
<chr>,<dbl>,<dbl>,<dbl>,<chr>,<chr>,<chr>,<chr>,<dbl>,<dbl>,<chr>,<chr>,<chr>,<dbl>,<chr>,<int>,<chr>
NFIL3,-0.2750475,-0.1587938,0.0004149447,alpha_1.vs.alpha_2,alpha_1,alpha_2,alpha_2,0.0005286094,0.1601686,MA0025.1,1.1.8,CEBP-related,1.1,Basic leucine zipper factors (bZIP),1,Basic domains


Joining, by = "jaspar.id"


In [32]:
ntest = "alpha_1"
sub(".[12]$", "", all.test[[ntest]])
ttest.res.ct %>% filter(test == sub(".[12]$", "", all.test[[ntest]]))%>%head(1)
ttest.res.ct %>% filter(test == sub(".[12]$", "", all.test[[ntest]]))%>%dim

motif,mean_x,mean_y,pval,test,x,y,enrichedIn,FDR,padj,jaspar.id,family.id,family.name,class.id,class.name,superclass.id,superclass.name
<chr>,<dbl>,<dbl>,<dbl>,<chr>,<chr>,<chr>,<chr>,<dbl>,<dbl>,<chr>,<chr>,<chr>,<dbl>,<chr>,<int>,<chr>
TAL1::TCF3,0.4377107,-1.215011,1.608224e-293,alpha_1.vs.alpha_2,alpha_1,alpha_2,alpha_1,5.173121e-292,6.207746e-291,MA0091.1,1.2.3,Tal-related,1.2,Basic helix-loop-helix factors (bHLH),1,Basic domains


In [33]:
(all.test <- c("alpha_1.vs.alpha_2.1", "alpha_1.vs.alpha_2.2", "beta_1.vs.beta_2.1", 
    "beta_1.vs.beta_2.2", "delta_1.vs.delta_2.1", "delta_1.vs.delta_2.2"))
names(all.test) <- c("alpha_1", "alpha_2", "beta_1", "beta_2", "delta_1", "delta_2")

ttest.res.list <- lapply(names(all.test), function(ntest) ttest.res.ct %>% filter(test == 
    sub(".[12]$", "", all.test[[ntest]]) & enrichedIn == ntest))
names(ttest.res.list) <- as.character(all.test)
require(writexl)
write_xlsx(ttest.res.list, "~/Dropbox (UCSD_Epigenomics)/workReports/2019-10_islet_rev/fig2d_sub_vs_sub.xlsx")
fwrite(ttest.res.ct, "~/Dropbox (UCSD_Epigenomics)/workReports/2019-10_islet_rev/fig2d_sub_vs_sub.csv")